載入套件、資料

pacman::p_load(dplyr, googleVis)
load("data/olist.rdata")
load("data/Z.rdata")

1. 匯入欄位

先將英文品類名稱、評論星等、送達日期等欄位匯入品項資料框(I):

A = left_join(P[,1:2], TPC) %>% 
  transmute(product_id, category=product_category_name_english) %>% 
  inner_join(I) %>% 
  left_join(R[,2:3]) %>% 
  left_join(O[,c(1,7)]) %>% rename(date = order_delivered_customer_date) %>% 
  filter(!is.na(category) & !is.na(date)) %>% 
  mutate(month = as.Date(cut(date, 'month'))) %>% 
  filter(month >= as.Date('2017-02-01') & month <= as.Date('2018-08-01'))
glimpse(A)
Observations: 108,566
Variables: 11
$ product_id          <chr> "1e9e8ef04dbcff4541ed26657ea517e5", "3aa071139cb16b67ca9e…
$ category            <chr> "perfumery", "art", "sports_leisure", "baby", "housewares…
$ order_id            <chr> "e17e4f88e31525f7deef66779844ddce", "5236307716393b7114b5…
$ order_item_id       <int> 1, 1, 1, 1, 1, 1, 1, 1, 3, 4, 1, 2, 1, 2, 3, 4, 5, 1, 1, …
$ seller_id           <chr> "5670f4db5b62c43d542e1b2d56b0cf7c", "b561927807645834b59e…
$ shipping_limit_date <dttm> 2018-04-30 17:33:54, 2018-02-06 19:11:15, 2018-07-11 21:…
$ price               <dbl> 10.91, 248.00, 79.80, 112.30, 37.90, 45.87, 958.00, 968.0…
$ freight_value       <dbl> 7.39, 17.99, 7.82, 9.54, 8.29, 19.95, 27.76, 44.30, 16.02…
$ review_score        <int> 5, 5, 5, 1, 5, 5, 2, 5, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 4, …
$ date                <dttm> 2018-04-27 16:42:17, 2018-02-16 13:27:08, 2018-07-11 23:…
$ month               <date> 2018-04-01, 2018-02-01, 2018-07-01, 2018-08-01, 2018-04-…


2. 時間面板資料

§ 銷售金額最高的20個品類
top20 = tapply(A$price, A$category, sum) %>% sort %>% tail(20) %>% names
§ 製作時間面板
X = A %>% 
  filter(category %in% top20) %>%            # 選擇品類
  group_by(category, month) %>% summarise(   # 製作時間面板
    ItemSold = n(),
    TotalProdRev = sum(price),
    TotalFreightRev =  sum(freight_value), 
    TotalRevenue = TotalProdRev + TotalFreightRev,
    rProdRev = TotalProdRev / TotalRevenue,
    rFreightRev = TotalFreightRev / TotalRevenue,
    AvgPrice = mean(price),
    AvgFreight = mean(freight_value),
    AvgScore = mean(review_score),
    nProduct = n_distinct(product_id),
    nSeller = n_distinct(seller_id),
    rProdSeller = nProduct/nSeller,
    rItemProd = ItemSold/nProduct,
    rRevProd = TotalProdRev/nProduct,
    rRevSeller = TotalRevenue/nSeller
    )%>% ungroup 
§ 檢查面板資料

第一個面板

X[X$month == min(X$month),] %>% View

項目的期數

table(X$category)

                 auto                  baby        bed_bath_table             computers 
                   19                    19                    19                    14 
computers_accessories            cool_stuff       furniture_decor          garden_tools 
                   19                    19                    19                    19 
        health_beauty            housewares   musical_instruments      office_furniture 
                   19                    19                    19                    19 
            perfumery              pet_shop      small_appliances        sports_leisure 
                   19                    19                    19                    19 
           stationery             telephony                  toys         watches_gifts 
                   19                    19                    19                    19 

computers缺了5期的的資料

切齊面板資料

X = filter(X, category != "computers")
table(X$category)

                 auto                  baby        bed_bath_table computers_accessories 
                   19                    19                    19                    19 
           cool_stuff       furniture_decor          garden_tools         health_beauty 
                   19                    19                    19                    19 
           housewares   musical_instruments      office_furniture             perfumery 
                   19                    19                    19                    19 
             pet_shop      small_appliances        sports_leisure            stationery 
                   19                    19                    19                    19 
            telephony                  toys         watches_gifts 
                   19                    19                    19 


3. 動態泡泡圖

op = options(gvis.plot.tag='chart')
gvisMotionChart(
  X,                                               # 時間面板資料框
  'category', 'month',                             # 項目、時間欄位
  'ItemSold','AvgPrice','AvgScore','TotalRevenue'  # X、Y、顏色、大小欄位 
  ) %>% plot


💡 學習重點:
  ■ PC端不能直接Kint動態泡泡圖
  ■ 但我們還是可以從RStudio裡面直接執行plot(gvisMotionChart())
  ■ 執行結果會送到瀏覽器
  ■ 妳需要打開瀏覽器的Flash選項才看得到動態泡泡圖