load("data/ys.rdata")
Merge CUST_NO & VISDITDATE into S
S = merge(S, B[B$SQ == 1, c("SS", "CUST_NO", "VISITDATE")], all.x=TRUE)
is.na(S) %>% colSums
       SS     npage       seq      case   CUST_NO VISITDATE 
        0         0         0         0         0         0 
Read Transaction Log
A = list(
  CC = read_csv("data/TBN_CC_APPLY.csv") %>% data.frame,
  FX = read_csv("data/TBN_FX_TXN.csv") %>% data.frame,
  LN = read_csv("data/TBN_LN_APPLY.csv") %>% data.frame,
  WM = read_csv("data/TBN_WM_TXN.csv") %>% data.frame
  )

30 DAYS Transaction As a Column in S

sapply(A, function(df) range(df$TXN_DT)  )
       CC   FX   LN   WM
[1,] 9448 9448 9449 9449
[2,] 9567 9567 9567 9567
AX = lapply(names(A), function(txn) {
  cat(txn, ": ")
  mx = xtabs(~ CUST_NO + TXN_DT, data=A[[txn]])
  cat(dim(mx), "; ")
  cx = dimnames(mx)[[1]]
  dx = as.integer(dimnames(mx)[[2]])
  mx = sapply(1:sum(dx <= max(dx) - 30), function(i) 
    rowSums(mx[, dx > dx[i] & dx <= dx[i+30] ] ) )
  cat(dim(mx), "\n")
  mx = as(mx, "TsparseMatrix")
  df = data.frame(
    CUST_NO = cx[1 + mx@i],
    DATE = dx[1 + mx@j],
    X = mx@x, stringsAsFactors=F)
  names(df)[3] = txn
  df
  })
CC : 44112 120 ; 44112 90 
FX : 74111 120 ; 74111 90 
LN : 6654 78 ; 6654 60 
WM : 14004 85 ; 14004 64 
names(AX) = names(A)
names(S)[6] = "DATE"
S = merge(S, AX$CC, all.x=T)
S = merge(S, AX$FX, all.x=T)
S = merge(S, AX$LN, all.x=T)
S = merge(S, AX$WM, all.x=T)
summary(S)
   CUST_NO               DATE            SS             npage           seq           
 Length:660735      Min.   :9448   Min.   :     1   Min.   :  1.0   Length:660735     
 Class :character   1st Qu.:9470   1st Qu.:165184   1st Qu.:  1.0   Class :character  
 Mode  :character   Median :9492   Median :330368   Median :  2.0   Mode  :character  
                    Mean   :9497   Mean   :330368   Mean   :  3.3                     
                    3rd Qu.:9521   3rd Qu.:495552   3rd Qu.:  3.0                     
                    Max.   :9567   Max.   :660735   Max.   :649.0                     
                                                                                      
     case              CC               FX               LN               WM        
 Bounce:318442   Min.   : 1       Min.   :   1     Min.   :1        Min.   : 1      
 Long  : 86765   1st Qu.: 1       1st Qu.:   1     1st Qu.:1        1st Qu.: 1      
 Repeat:137792   Median : 1       Median :   2     Median :1        Median : 1      
 Short :117736   Mean   : 1       Mean   :   7     Mean   :1        Mean   : 2      
                 3rd Qu.: 1       3rd Qu.:   4     3rd Qu.:1        3rd Qu.: 3      
                 Max.   :18       Max.   :1319     Max.   :2        Max.   :57      
                 NA's   :644496   NA's   :537743   NA's   :657479   NA's   :641607  
is.na(S) %>% colSums
CUST_NO    DATE      SS   npage     seq    case      CC      FX      LN      WM 
      0       0       0       0       0       0  644496  537743  657479  641607 
save(A, AX, B, S, file="data/ys2.rdata")

進站30天內購買機率

S[,7:10] = !is.na(S[,7:10])
colMeans(S[,7:10])
      CC       FX       LN       WM 
0.024577 0.186144 0.004928 0.028950 

Regular Mosaic Plot

mosaic(~ case + CC, data=S, shade=T)

assoc(~ case + FX, data=S, shade=T)

Composite Mosaic Plot

mx = sapply(levels(S$case), function(x) colSums(S[S$case == x, 7:10]))
mosaic(mx, shade=T, labeling_args = list(
  set_varnames = c(A="Transaction", B="Pattern")))

assoc(mx, shade=T, labeling_args = list(
  set_varnames = c(A="Transaction", B="Pattern")))