도매시장 데이터 가공과 시각화 - 배추(2/2)
앞선 Post에서는 가락시장을 중심으로 시각화를 해보았다면 이번 Post에서는 다른 도매시장들에서의 보다 구체적인 가격정보를 시각화하는 코드를 살펴보자.
library(dplyr); library(tidyr); library(ggplot2); library(ggthemes); library(formattable); library(plotly)
필요한 패키지를 불러들인다.
date0 <- 20180613
unit_list <- c('1kg','4kg','8kg','10kg','12kg')
unit_df <- data.frame(unit2=c('1kg','4kg','8kg','10kg','12kg'))
dat <- dat %>% mutate(unit_merg=paste(round(prut,0),unit,package,sep=''), mkt_short = substr(market,1,4))
date0l7d <- as.numeric(strftime(as.Date(as.character(date0),format="%Y%m%d") - 7, format="%Y%m%d")) #7일전
date0l14d <- as.numeric(strftime(as.Date(as.character(date0),format="%Y%m%d") - 14, format="%Y%m%d")) #14일전
date0l21d <- as.numeric(strftime(as.Date(as.character(date0),format="%Y%m%d") - 21, format="%Y%m%d")) #21일전
dat_r7d <- dat %>% filter(date>=date0l7d, date<=date0) %>% mutate(unit2 = paste(round(prut,0),unit,sep=''), pri100 = round(price_per_kg,-2))
dat_l2w <- dat %>% filter(date>=date0l14d, date<date0l7d) %>% mutate(unit2 = paste(round(prut,0),unit,sep=''), pri100 = round(price_per_kg,-2))
dat_l3w <- dat %>% filter(date>=date0l21d, date<date0l14d) %>% mutate(unit2 = paste(round(prut,0),unit,sep=''), pri100 = round(price_per_kg,-2))
tmp0 <- dat_r7d %>% filter(unit2%in%unit_list) %>% group_by(unit2,mkt_short,insname) %>% summarise(amt_ton=round(sum(sales_amt,na.rm=T)/1000,0), price=round(mean(price_per_kg,na.rm=T),0)) %>% arrange(desc(amt_ton)) %>% ungroup()
tmp1 <- dat_l2w %>% filter(unit2%in%unit_list) %>% group_by(unit2,mkt_short,insname) %>% summarise(amt_ton=round(sum(sales_amt,na.rm=T)/1000,0), price=round(mean(price_per_kg,na.rm=T),0)) %>% arrange(desc(amt_ton)) %>% ungroup()
tmp2 <- dat_l3w %>% filter(unit2%in%unit_list) %>% group_by(unit2,mkt_short,insname) %>% summarise(amt_ton=round(sum(sales_amt,na.rm=T)/1000,0), price=round(mean(price_per_kg,na.rm=T),0)) %>% arrange(desc(amt_ton)) %>% ungroup()
dat2_1_raw <- merge(tmp0,tmp1[,-4],by=c('unit2','mkt_short','insname'),all.x=T)
dat2_1_raw <- merge(dat2_1_raw,tmp2[,-4],by=c('unit2','mkt_short','insname'),all.x=T)
colnames(dat2_1_raw)[5:7] <- c('price_l1w','price_l2w','price_l3w')
도매시장 리스트가 담겨있는 market_list.csv
파일은 AgData Lab AWS S3 저장소에서 다운받을 수 있다.
mkt_df <- read.csv('market_list.csv', stringsAsFactors = F)
mkt_list <- mkt_df[,-1]
tmp <- NULL
tmp2 <- NULL
tmp3 <- NULL # 관심시장을 상위로 해서 테이블을 만들어줌
unit_df <- data.frame(unit2=c('1kg','4kg','8kg','10kg','12kg'))
for(i in mkt_list){
mkt_sel <- i
dat2_1_sel <- dat2_1_raw %>% filter(mkt_short==substr(mkt_sel,1,4)) %>% arrange(unit2,desc(amt_ton))
dat2_1_unsel <- dat2_1_raw %>% filter(mkt_short!=substr(mkt_sel,1,4)) %>% arrange(unit2, desc(amt_ton)) %>% group_by(unit2) %>% slice(1:20) # %>% head(n=20)
if(nrow(dat2_1_sel)==0) {
tmp <- rbind(tmp, data.frame(mkt_sel=rep(mkt_sel,nrow(unit_df)),unit_df,mkt_short=substr(mkt_sel,1,4),insname=NA,amt_ton=NA,price_l1w=NA,price_l2w=NA,price_l3w=NA))
tmp3 <- rbind(tmp3, data.frame(mkt_sel=rep(mkt_sel,nrow(unit_df)),unit_df,mkt_short=substr(mkt_sel,1,4),insname=NA,amt_ton=NA,price_l1w=NA,price_l2w=NA,price_l3w=NA), data.frame(mkt_sel=mkt_sel, dat2_1_unsel))
} else {
tmp0 <- merge(unit_df, dat2_1_sel, by='unit2', all.x=T)
tmp0[is.na(tmp0$mkt_short),'mkt_short'] <- substr(mkt_sel,1,4)
tmp <- rbind(tmp, data.frame(mkt_sel=mkt_sel, tmp0))
tmp3 <- rbind(tmp3,data.frame(mkt_sel=mkt_sel, tmp0), data.frame(mkt_sel=mkt_sel, dat2_1_unsel))
}
tmp2 <- rbind(tmp2, data.frame(mkt_sel=mkt_sel, dat2_1_unsel))
}
dat2_1 <- tmp3
dat2_1 <- merge(dat2_1,mkt_df,by.x='mkt_sel',by.y='name',all.x=T)
dat2_1 <- dat2_1[,c(9,2:8)]
colnames(dat2_1)[1] <- 'mkt_sel'
tab_2_1 <- dat2_1 %>% filter(unit2=='10kg', mkt_sel==2) %>% select(mkt_short,insname,amt_ton,price_l1w,price_l2w,price_l3w) %>% arrange(desc(amt_ton)) %>% head(n=20)
detach("package:plotly", unload=TRUE)
formattable(tab_2_1, list(
mkt_short=formatter(
"span",
style = x ~ ifelse(x == substr(mkt_sel,1,4), style(font.weight = "bold", color = 'blue'), NA)),
amt_ton = color_tile("white", "orange"),
area(col = c(price_l1w, price_l2w, price_l3w)) ~ normalize_bar("pink", 0.2, na.rm=T)
))
mkt_short | insname | amt_ton | price_l1w | price_l2w | price_l3w |
---|---|---|---|---|---|
서울가락 | 대아청과 | 760885 | 188 | 219 | 240 |
대전오정 | 대전청과(주) | 85937 | 267 | 313 | 349 |
서울가락 | 서울청과 | 67709 | 299 | 345 | 330 |
대전노은 | 대전중앙청과 | 54281 | 277 | 290 | 330 |
대전오정 | 농협대전(공) | 50562 | 213 | 249 | 310 |
인천삼산 | 부평농산 | 50138 | 212 | 228 | 252 |
구리도매 | 인터넷청과 | 48922 | 228 | 213 | 243 |
구리도매 | 구리청과 | 47397 | 231 | 250 | 317 |
대구북부 | 대구원협(공) | 45822 | 254 | 222 | 188 |
순천도매 | 남도청과 | 33988 | 243 | 268 | 264 |
서울가락 | 한국청과 | 32726 | 208 | 222 | 280 |
순천도매 | 순천원협(공) | 30521 | 332 | 366 | 274 |
광주각화 | 광주중앙청과 | 29587 | 309 | 272 | 323 |
인천구월 | 대인농산 | 27501 | 214 | 228 | 208 |
광주각화 | 광주청과 | 26049 | 234 | 242 | 314 |
안양도매 | 안양원협(공) | 25702 | 583 | 733 | 554 |
광주서부 | 호남청과 | 25553 | 214 | 249 | 219 |
인천삼산 | 경인농산 | 24362 | 164 | 228 | 181 |
부산엄궁 | 항도청과 | 20556 | 237 | 392 | 400 |
대구북부 | 효성청과 | 19266 | 219 | 250 | 208 |
tmp4 <- NULL
tmp5 <- NULL
mkt_amt_top7 <- dat_r7d %>% filter(unit2 %in% unit_list) %>% group_by(unit2,market) %>% summarise(amount=sum(sales_amt,na.rm=T)) %>% arrange(unit2, desc(amount)) %>% group_by(unit2) %>% slice(1:7) %>% ungroup
# i <- '강릉도매시장'; j <- '1kg'
for(i in mkt_list){
mkt_sel <- i
for(j in unit_list){
mkt_top7_tmp <- mkt_amt_top7[mkt_amt_top7$unit2==j,'market']
if(sum(as.integer(mkt_top7_tmp$market %in% mkt_sel))==1 ){
mkt_lst_top7 <- c(mkt_sel,mkt_top7_tmp$market[1:7])
mkt_lst_top7 <- unique(mkt_lst_top7)
} else {
mkt_lst_top7 <- c(mkt_sel,mkt_top7_tmp$market[1:6])
}
mkt_pri_dist <- dat_r7d %>% filter(market %in% mkt_lst_top7, unit2==j, pri100 <= 5000) %>% group_by(market,pri100,aucCodeName) %>%
summarise(amt_ton=sum(sales_amt,na.rm=T)/1000) %>% mutate(mkt_short=substr(market,1,4)) # %>% spread(pri100,amount,fill=0)
if(nrow(mkt_pri_dist)==0) {
tmp4 <- rbind(tmp4, data.frame(mkt_sel=i, unit2=j, market=NA,pri100=NA,aucCodeName=NA,amt_ton=NA,mkt_short=NA))
} else {
if(nrow(mkt_pri_dist[mkt_pri_dist$market==i,])==0) {
tmp4 <- rbind(tmp4, data.frame(mkt_sel=i, unit2=j, market=i,pri100=NA,aucCodeName=NA,amt_ton=NA,mkt_short=NA),data.frame(mkt_sel=i, unit2=j, mkt_pri_dist))
} else {
tmp4 <- rbind(tmp4, data.frame(mkt_sel=i, unit2=j, mkt_pri_dist))
}
}
tmp5 <- rbind(tmp5, data.frame(mkt_sel=i, unit2=j, mkt_lst_top7))
}
}
mkt_pri_dist <- tmp4
mkt_pri_dist <- merge(mkt_pri_dist,mkt_df,by.x='mkt_sel',by.y='name',all.x=T)
mkt_pri_dist <- mkt_pri_dist[,c(8,2:7)]
colnames(mkt_pri_dist)[1] <- 'mkt_sel'
dat_fil <- mkt_pri_dist %>% filter(mkt_sel==2, unit2=='10kg')
mkt_lst_top7 <- dat_fil %>% filter(market!=dat_fil[1,'market']) %>% group_by(market) %>% summarize(amt_ton=sum(amt_ton,na.rm=T)) %>% arrange(desc(amt_ton))
mkt_lst_top7 <- c(as.character(dat_fil[1,'market']), as.character(mkt_lst_top7$market))
mkt_lst_top7 <- substr(mkt_lst_top7,1,4)
dat_fil$pri100 <- as.numeric(dat_fil$pri100)
if(max(dat_fil$pri100,na.rm=T)<=2000) { breaks0 <- seq(0,max(dat_fil$pri100,na.rm=T),200)
} else if(max(dat_fil$pri100,na.rm=T)>=6000){ breaks0 <- seq(0,max(dat_fil$pri100,na.rm=T),1000)
} else { breaks0 <- seq(0,max(dat_fil$pri100,na.rm=T),500) }
g <- ggplot(data=dat_fil) + geom_bar(aes(x=pri100, y=amt_ton, fill=aucCodeName, color=aucCodeName),stat="identity",position='stack') +
scale_fill_manual(values=c("#f28e2b","#4e79a7")) + scale_color_manual(values=c("#f28e2b","#4e79a7")) +
scale_x_continuous(breaks=breaks0, labels=breaks0) + facet_grid(factor(substr(dat_fil$market,1,4), levels=mkt_lst_top7) ~ ., scales = "free", drop = F)
ggplotly(g, tooltip = c('x',"y"), height = 600)