pacman::p_load(dplyr, ggplot2, readr, plotly, googleVis)
load("data/olist.rdata")
load("data/Z.rdata")


A. 基本繪圖套件 ggplot

ggplot(segment, aes(x=log(avgItemsSold), y=avgPrice, col=avgScore)) +
  geom_point(aes(size=sqrt(noSellers))) +
  geom_text(aes(label=business_segment), size=3)


B. 互動式繪圖套件 ggplotly

g = ggplot(segment, aes(x=log(avgItemsSold), y=avgPrice, col=avgScore)) +
  geom_point(aes(size=sqrt(noSellers))) +
  geom_text(aes(label=business_segment), size=0.5)
ggplotly(g)


C. 互動式多軸度泡泡圖 googleVis

最簡單的做法
op = options(gvis.plot.tag='chart')
segment$year = 2018
gvisMotionChart(segment, "business_segment", "year") %>% plot


稍微複雜的案例

Merge TPC$product_category_name_english into P as P$category

P = left_join(P, TPC) %>% rename(category = product_category_name_english)
Joining, by = "product_category_name"

Merge P$category intoI`

I = left_join(I, P[,c(1,10)])
Joining, by = "product_id"

Summerise by category

category = filter(I, !is.na(category)) %>% 
  group_by(category) %>% summarise(
    `總銷售量` = n(),                     
    `總營收` = sum(price),
    `平均價格` = mean(price),
    `產品數` = n_distinct(product_id),
    `廠商數` = n_distinct(seller_id),
    dummy = 2018
  ) %>% arrange(desc(`總營收`))

Top-20 categories

top20 = category %>% top_n(20, `總營收`)
top20
# A tibble: 20 x 7
   category              總銷售量   總營收 平均價格 產品數 廠商數 dummy
   <chr>                    <int>    <dbl>    <dbl>  <int>  <int> <dbl>
 1 health_beauty             9670 1258681.    130.    2444    492  2018
 2 watches_gifts             5991 1205006.    201.    1329    101  2018
 3 bed_bath_table           11115 1036989.     93.3   3029    196  2018
 4 sports_leisure            8641  988049.    114.    2867    481  2018
 5 computers_accessories     7827  911954.    117.    1639    287  2018
 6 furniture_decor           8334  729762.     87.6   2657    370  2018
 7 cool_stuff                3796  635291.    167.     789    267  2018
 8 housewares                6964  632249.     90.8   2335    468  2018
 9 auto                      4235  592720.    140.    1900    383  2018
10 garden_tools              4347  485256.    112.     753    237  2018
11 toys                      4117  483947.    118.    1411    252  2018
12 baby                      3065  411765.    134.     919    244  2018
13 perfumery                 3419  399125.    117.     868    175  2018
14 telephony                 4545  323668.     71.2   1134    149  2018
15 office_furniture          1691  273961.    162.     309     34  2018
16 stationery                2517  230943.     91.8    849    173  2018
17 computers                  203  222963.   1098.      30      9  2018
18 pet_shop                  1947  214315.    110.     719    137  2018
19 musical_instruments        680  191499.    282.     289     70  2018
20 small_appliances           679  190649.    281.     231    105  2018

靜態多軸互動

gvisMotionChart(category, "category", "dummy") %>% plot
時間面板資料 Panel Data

併入時間資料

X = left_join(O[, c(1,4)], R[,2:3]) %>%     # pull score & timestamp into 'O'
  rename(
    time = order_purchase_timestamp, 
    score = review_score) %>% 
  mutate(                                   # cut timestamp into quarter    
    quarter = as.Date(cut(time, "quarter"))
    ) %>%  
  right_join(I) %>%                         # merge score & quarter into 'I'
  filter(category %in% top20$category) %>%  # pick out the top20 categories
  group_by(category, quarter) %>%           # group by category & quarter
  summarise(                            
    `總銷售量` = n(),                     
    `總營收` = sum(price),
    `平均價格` = mean(price),
    `平均星等` = mean(score),
    `產品數` = n_distinct(product_id),
    `廠商數` = n_distinct(seller_id)
  ) %>% 
  arrange(category, quarter)            # order by category & quarter
Joining, by = "order_id"
Joining, by = "order_id"

調整資料範圍、去除離群值

X2 = X %>%  # adjustment before ploting
  filter(quarter >= as.Date("2017-04-01")) %>% 
  filter(!(category %in% c("computers", "office_furniture"))) %>% 
  mutate(`平均星等` = pmax(`平均星等`, 3)) %>% as.data.frame

動態多軸互動

gvisMotionChart(X2, "category", "quarter") %>% plot