載入套件、資料
pacman::p_load(dplyr, googleVis)
load("data/olist.rdata")
load("data/Z.rdata")
先將英文品類名稱、評論星等、送達日期等欄位匯入品項資料框(I
):
TPC$product_category_name_english
匯入P$category
I$category
):將P$category
匯入I
I$review_score
):將R$review_score
匯入I
I$date
):將O$order_delivered_customer_date
匯入I$date
NA
month
A = left_join(P[,1:2], TPC) %>%
transmute(product_id, category=product_category_name_english) %>%
inner_join(I) %>%
left_join(R[,2:3]) %>%
left_join(O[,c(1,7)]) %>% rename(date = order_delivered_customer_date) %>%
filter(!is.na(category) & !is.na(date)) %>%
mutate(month = as.Date(cut(date, 'month'))) %>%
filter(month >= as.Date('2017-02-01') & month <= as.Date('2018-08-01'))
glimpse(A)
Observations: 108,566
Variables: 11
$ product_id <chr> "1e9e8ef04dbcff4541ed26657ea517e5", "3aa071139cb16b67ca9e…
$ category <chr> "perfumery", "art", "sports_leisure", "baby", "housewares…
$ order_id <chr> "e17e4f88e31525f7deef66779844ddce", "5236307716393b7114b5…
$ order_item_id <int> 1, 1, 1, 1, 1, 1, 1, 1, 3, 4, 1, 2, 1, 2, 3, 4, 5, 1, 1, …
$ seller_id <chr> "5670f4db5b62c43d542e1b2d56b0cf7c", "b561927807645834b59e…
$ shipping_limit_date <dttm> 2018-04-30 17:33:54, 2018-02-06 19:11:15, 2018-07-11 21:…
$ price <dbl> 10.91, 248.00, 79.80, 112.30, 37.90, 45.87, 958.00, 968.0…
$ freight_value <dbl> 7.39, 17.99, 7.82, 9.54, 8.29, 19.95, 27.76, 44.30, 16.02…
$ review_score <int> 5, 5, 5, 1, 5, 5, 2, 5, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 4, …
$ date <dttm> 2018-04-27 16:42:17, 2018-02-16 13:27:08, 2018-07-11 23:…
$ month <date> 2018-04-01, 2018-02-01, 2018-07-01, 2018-08-01, 2018-04-…
top20 = tapply(A$price, A$category, sum) %>% sort %>% tail(20) %>% names
X = A %>%
filter(category %in% top20) %>% # 選擇品類
group_by(category, month) %>% summarise( # 製作時間面板
ItemSold = n(),
TotalProdRev = sum(price),
TotalFreightRev = sum(freight_value),
TotalRevenue = TotalProdRev + TotalFreightRev,
rProdRev = TotalProdRev / TotalRevenue,
rFreightRev = TotalFreightRev / TotalRevenue,
AvgPrice = mean(price),
AvgFreight = mean(freight_value),
AvgScore = mean(review_score),
nProduct = n_distinct(product_id),
nSeller = n_distinct(seller_id),
rProdSeller = nProduct/nSeller,
rItemProd = ItemSold/nProduct,
rRevProd = TotalProdRev/nProduct,
rRevSeller = TotalRevenue/nSeller
)%>% ungroup
第一個面板
X[X$month == min(X$month),] %>% View
項目的期數
table(X$category)
auto baby bed_bath_table computers
19 19 19 14
computers_accessories cool_stuff furniture_decor garden_tools
19 19 19 19
health_beauty housewares musical_instruments office_furniture
19 19 19 19
perfumery pet_shop small_appliances sports_leisure
19 19 19 19
stationery telephony toys watches_gifts
19 19 19 19
computers
缺了5期的的資料
切齊面板資料
X = filter(X, category != "computers")
table(X$category)
auto baby bed_bath_table computers_accessories
19 19 19 19
cool_stuff furniture_decor garden_tools health_beauty
19 19 19 19
housewares musical_instruments office_furniture perfumery
19 19 19 19
pet_shop small_appliances sports_leisure stationery
19 19 19 19
telephony toys watches_gifts
19 19 19
op = options(gvis.plot.tag='chart')
gvisMotionChart(
X, # 時間面板資料框
'category', 'month', # 項目、時間欄位
'ItemSold','AvgPrice','AvgScore','TotalRevenue' # X、Y、顏色、大小欄位
) %>% plot
💡 學習重點:
■ PC端不能直接Kint動態泡泡圖
■ 但我們還是可以從RStudio裡面直接執行plot(gvisMotionChart())
■ 執行結果會送到瀏覽器
■ 妳需要打開瀏覽器的Flash選項才看得到動態泡泡圖