## [1] "C"
##
## Call:
## glm(formula = buy ~ ., family = binomial(), data = TR[, c(2:9,
## 11)])
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.7931 -0.8733 -0.6991 1.0384 1.8735
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.259e+00 1.261e-01 -9.985 < 2e-16 ***
## r -1.227e-02 8.951e-04 -13.708 < 2e-16 ***
## s 9.566e-03 9.101e-04 10.511 < 2e-16 ***
## f 2.905e-01 1.593e-02 18.233 < 2e-16 ***
## m -3.028e-05 2.777e-05 -1.090 0.27559
## rev 4.086e-05 1.940e-05 2.106 0.03521 *
## raw -2.306e-04 8.561e-05 -2.693 0.00708 **
## agea29 -4.194e-02 8.666e-02 -0.484 0.62838
## agea34 1.772e-02 7.992e-02 0.222 0.82456
## agea39 7.705e-02 7.921e-02 0.973 0.33074
## agea44 8.699e-02 8.132e-02 1.070 0.28476
## agea49 1.928e-02 8.457e-02 0.228 0.81962
## agea54 1.745e-02 9.323e-02 0.187 0.85155
## agea59 1.752e-01 1.094e-01 1.602 0.10926
## agea64 6.177e-02 1.175e-01 0.526 0.59904
## agea69 2.652e-01 1.047e-01 2.533 0.01131 *
## agea99 -1.419e-01 1.498e-01 -0.947 0.34347
## areaz106 -4.105e-02 1.321e-01 -0.311 0.75603
## areaz110 -2.075e-01 1.045e-01 -1.986 0.04703 *
## areaz114 3.801e-02 1.111e-01 0.342 0.73214
## areaz115 2.599e-01 9.682e-02 2.684 0.00727 **
## areaz221 1.817e-01 9.753e-02 1.863 0.06243 .
## areazOthers -4.677e-02 1.045e-01 -0.448 0.65435
## areazUnknown -1.695e-01 1.232e-01 -1.375 0.16912
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 27629 on 20007 degrees of freedom
## Residual deviance: 23295 on 19984 degrees of freedom
## AIC: 23343
##
## Number of Fisher Scoring iterations: 5
## predict
## actual FALSE TRUE
## FALSE 3730 873
## TRUE 1700 2273
## [1] 0.5367304 0.6999767
## [,1]
## FALSE vs. TRUE 0.7556038
A2 = subset(A, A$buy) %>% mutate_at(c("m","rev","amount"), log10)
TR2 = subset(A2, spl2)
TS2 = subset(A2, !spl2)
##
## Call:
## lm(formula = amount ~ ., data = TR2[, c(2:6, 8:10)])
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.83304 -0.22812 0.04853 0.28096 1.64243
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.1403704 0.0504979 22.583 < 2e-16 ***
## r 0.0000702 0.0003090 0.227 0.82030
## s 0.0001173 0.0003123 0.376 0.70721
## f 0.0256836 0.0017965 14.296 < 2e-16 ***
## m 0.5045943 0.0372711 13.539 < 2e-16 ***
## rev 0.0450307 0.0360945 1.248 0.21222
## agea29 0.0737926 0.0251165 2.938 0.00331 **
## agea34 0.1204660 0.0230651 5.223 1.80e-07 ***
## agea39 0.1264592 0.0227496 5.559 2.79e-08 ***
## agea44 0.1382214 0.0232522 5.944 2.87e-09 ***
## agea49 0.1085828 0.0242698 4.474 7.77e-06 ***
## agea54 0.0787808 0.0264917 2.974 0.00295 **
## agea59 0.0703242 0.0312462 2.251 0.02443 *
## agea64 0.0694822 0.0321119 2.164 0.03051 *
## agea69 -0.0284007 0.0282282 -1.006 0.31439
## agea99 0.1124434 0.0395590 2.842 0.00449 **
## areaz106 0.0789586 0.0435321 1.814 0.06974 .
## areaz110 0.0375241 0.0353641 1.061 0.28868
## areaz114 -0.0111101 0.0371762 -0.299 0.76506
## areaz115 0.0111809 0.0325803 0.343 0.73147
## areaz221 0.0147066 0.0328140 0.448 0.65403
## areazOthers 0.0249228 0.0349567 0.713 0.47589
## areazUnknown 0.0105550 0.0388962 0.271 0.78612
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4216 on 9246 degrees of freedom
## Multiple R-squared: 0.291, Adjusted R-squared: 0.2893
## F-statistic: 172.5 on 22 and 9246 DF, p-value: < 2.2e-16
r2.tr = summary(lm1)$r.sq
SST = sum((TS2$amount - mean(TR2$amount))^ 2)
SSE = sum((predict(lm1, TS2) - TS2$amount)^2)
r2.ts = 1 - (SSE/SST)
c(R2train=r2.tr, R2test=r2.ts)
## R2train R2test
## 0.2909908 0.2575966
Aggregate data 2000-12-01 ~ 2001~02-28.
load("data/tf0.rdata")
d0 = max(X0$date) + 1
B = X0 %>%
filter(date >= as.Date("2000-12-01")) %>%
mutate(days = as.integer(difftime(d0, date, units="days"))) %>%
group_by(cust) %>% summarise(
r = min(days), # recency
s = max(days), # seniority
f = n(), # frquency
m = mean(total), # monetary
rev = sum(total), # total revenue contribution
raw = sum(gross), # total gross profit contribution
age = age[1], # age group
area = area[1], # area code
) %>% data.frame # 28584
nrow(B)
## [1] 28531
In B
, there is a record for each customer. B$Buy
is the probability of buying in March.
💡: 預測購買金額時要記得做指數、對數轉換!