Merge CUST_NO & VISDITDATE into S
S = merge(S, B[B$SQ == 1, c("SS", "CUST_NO", "VISITDATE")], all.x=TRUE)
is.na(S) %>% colSums
SS npage seq case CUST_NO VISITDATE
0 0 0 0 0 0
Read Transaction Log
A = list(
CC = read_csv("data/TBN_CC_APPLY.csv") %>% data.frame,
FX = read_csv("data/TBN_FX_TXN.csv") %>% data.frame,
LN = read_csv("data/TBN_LN_APPLY.csv") %>% data.frame,
WM = read_csv("data/TBN_WM_TXN.csv") %>% data.frame
)
30 DAYS Transaction As a Column in S
sapply(A, function(df) range(df$TXN_DT) )
CC FX LN WM
[1,] 9448 9448 9449 9449
[2,] 9567 9567 9567 9567
AX = lapply(names(A), function(txn) {
cat(txn, ": ")
mx = xtabs(~ CUST_NO + TXN_DT, data=A[[txn]])
cat(dim(mx), "; ")
cx = dimnames(mx)[[1]]
dx = as.integer(dimnames(mx)[[2]])
mx = sapply(1:sum(dx <= max(dx) - 30), function(i)
rowSums(mx[, dx > dx[i] & dx <= dx[i+30] ] ) )
cat(dim(mx), "\n")
mx = as(mx, "TsparseMatrix")
df = data.frame(
CUST_NO = cx[1 + mx@i],
DATE = dx[1 + mx@j],
X = mx@x, stringsAsFactors=F)
names(df)[3] = txn
df
})
CC : 44112 120 ; 44112 90
FX : 74111 120 ; 74111 90
LN : 6654 78 ; 6654 60
WM : 14004 85 ; 14004 64
names(S)[6] = "DATE"
S = merge(S, AX$CC, all.x=T)
S = merge(S, AX$FX, all.x=T)
S = merge(S, AX$LN, all.x=T)
S = merge(S, AX$WM, all.x=T)
summary(S)
CUST_NO DATE SS npage seq
Length:660735 Min. :9448 Min. : 1 Min. : 1.0 Length:660735
Class :character 1st Qu.:9470 1st Qu.:165184 1st Qu.: 1.0 Class :character
Mode :character Median :9492 Median :330368 Median : 2.0 Mode :character
Mean :9497 Mean :330368 Mean : 3.3
3rd Qu.:9521 3rd Qu.:495552 3rd Qu.: 3.0
Max. :9567 Max. :660735 Max. :649.0
case CC FX LN WM
Bounce:318442 Min. : 1 Min. : 1 Min. :1 Min. : 1
Long : 86765 1st Qu.: 1 1st Qu.: 1 1st Qu.:1 1st Qu.: 1
Repeat:137792 Median : 1 Median : 2 Median :1 Median : 1
Short :117736 Mean : 1 Mean : 7 Mean :1 Mean : 2
3rd Qu.: 1 3rd Qu.: 4 3rd Qu.:1 3rd Qu.: 3
Max. :18 Max. :1319 Max. :2 Max. :57
NA's :644496 NA's :537743 NA's :657479 NA's :641607
CUST_NO DATE SS npage seq case CC FX LN WM
0 0 0 0 0 0 644496 537743 657479 641607
save(A, AX, B, S, file="data/ys2.rdata")
進站30天內購買機率
S[,7:10] = !is.na(S[,7:10])
colMeans(S[,7:10])
CC FX LN WM
0.024577 0.186144 0.004928 0.028950
Regular Mosaic Plot
mosaic(~ case + CC, data=S, shade=T)

assoc(~ case + FX, data=S, shade=T)

Composite Mosaic Plot
mx = sapply(levels(S$case), function(x) colSums(S[S$case == x, 7:10]))
mosaic(mx, shade=T, labeling_args = list(
set_varnames = c(A="Transaction", B="Pattern")))

assoc(mx, shade=T, labeling_args = list(
set_varnames = c(A="Transaction", B="Pattern")))
