도매시장 데이터 가공과 시각화 - 배추(2/2)

앞선 Post에서는 가락시장을 중심으로 시각화를 해보았다면 이번 Post에서는 다른 도매시장들에서의 보다 구체적인 가격정보를 시각화하는 코드를 살펴보자.

  library(dplyr); library(tidyr); library(ggplot2); library(ggthemes); library(formattable);  library(plotly)

필요한 패키지를 불러들인다.

date0 <- 20180613
unit_list <- c('1kg','4kg','8kg','10kg','12kg')
unit_df <- data.frame(unit2=c('1kg','4kg','8kg','10kg','12kg'))

dat <- dat %>% mutate(unit_merg=paste(round(prut,0),unit,package,sep=''), mkt_short = substr(market,1,4)) 

date0l7d <- as.numeric(strftime(as.Date(as.character(date0),format="%Y%m%d") - 7, format="%Y%m%d")) #7일전
date0l14d <- as.numeric(strftime(as.Date(as.character(date0),format="%Y%m%d") - 14, format="%Y%m%d")) #14일전
date0l21d <- as.numeric(strftime(as.Date(as.character(date0),format="%Y%m%d") - 21, format="%Y%m%d")) #21일전
dat_r7d <- dat %>% filter(date>=date0l7d, date<=date0) %>% mutate(unit2 = paste(round(prut,0),unit,sep=''), pri100 = round(price_per_kg,-2))
dat_l2w <- dat %>% filter(date>=date0l14d, date<date0l7d) %>% mutate(unit2 = paste(round(prut,0),unit,sep=''), pri100 = round(price_per_kg,-2))
dat_l3w <- dat %>% filter(date>=date0l21d, date<date0l14d) %>% mutate(unit2 = paste(round(prut,0),unit,sep=''), pri100 = round(price_per_kg,-2))

tmp0 <- dat_r7d %>% filter(unit2%in%unit_list) %>% group_by(unit2,mkt_short,insname) %>% summarise(amt_ton=round(sum(sales_amt,na.rm=T)/1000,0), price=round(mean(price_per_kg,na.rm=T),0)) %>% arrange(desc(amt_ton)) %>% ungroup()
tmp1 <- dat_l2w %>% filter(unit2%in%unit_list) %>% group_by(unit2,mkt_short,insname) %>% summarise(amt_ton=round(sum(sales_amt,na.rm=T)/1000,0), price=round(mean(price_per_kg,na.rm=T),0)) %>% arrange(desc(amt_ton)) %>% ungroup()
tmp2 <- dat_l3w %>% filter(unit2%in%unit_list) %>% group_by(unit2,mkt_short,insname) %>% summarise(amt_ton=round(sum(sales_amt,na.rm=T)/1000,0), price=round(mean(price_per_kg,na.rm=T),0)) %>% arrange(desc(amt_ton)) %>% ungroup()

dat2_1_raw <- merge(tmp0,tmp1[,-4],by=c('unit2','mkt_short','insname'),all.x=T)
dat2_1_raw <- merge(dat2_1_raw,tmp2[,-4],by=c('unit2','mkt_short','insname'),all.x=T)
colnames(dat2_1_raw)[5:7] <- c('price_l1w','price_l2w','price_l3w')

도매시장 리스트가 담겨있는 market_list.csv 파일은 AgData Lab AWS S3 저장소에서 다운받을 수 있다.

mkt_df <- read.csv('market_list.csv', stringsAsFactors = F)
mkt_list <- mkt_df[,-1]
tmp <- NULL
tmp2 <- NULL
tmp3 <- NULL # 관심시장을 상위로 해서 테이블을 만들어줌
unit_df <- data.frame(unit2=c('1kg','4kg','8kg','10kg','12kg'))
for(i in mkt_list){
  mkt_sel <- i
  dat2_1_sel <- dat2_1_raw %>% filter(mkt_short==substr(mkt_sel,1,4)) %>% arrange(unit2,desc(amt_ton))
  dat2_1_unsel <- dat2_1_raw %>% filter(mkt_short!=substr(mkt_sel,1,4)) %>% arrange(unit2, desc(amt_ton)) %>% group_by(unit2) %>% slice(1:20) # %>% head(n=20)
  if(nrow(dat2_1_sel)==0) {
    tmp <- rbind(tmp, data.frame(mkt_sel=rep(mkt_sel,nrow(unit_df)),unit_df,mkt_short=substr(mkt_sel,1,4),insname=NA,amt_ton=NA,price_l1w=NA,price_l2w=NA,price_l3w=NA))
    tmp3 <- rbind(tmp3, data.frame(mkt_sel=rep(mkt_sel,nrow(unit_df)),unit_df,mkt_short=substr(mkt_sel,1,4),insname=NA,amt_ton=NA,price_l1w=NA,price_l2w=NA,price_l3w=NA), data.frame(mkt_sel=mkt_sel, dat2_1_unsel))
  } else {
    tmp0 <- merge(unit_df, dat2_1_sel, by='unit2', all.x=T)
    tmp0[is.na(tmp0$mkt_short),'mkt_short'] <- substr(mkt_sel,1,4)
    tmp <- rbind(tmp, data.frame(mkt_sel=mkt_sel, tmp0))
    tmp3 <- rbind(tmp3,data.frame(mkt_sel=mkt_sel, tmp0), data.frame(mkt_sel=mkt_sel, dat2_1_unsel))
  }
  tmp2 <- rbind(tmp2, data.frame(mkt_sel=mkt_sel, dat2_1_unsel))  
}
dat2_1 <- tmp3
dat2_1 <- merge(dat2_1,mkt_df,by.x='mkt_sel',by.y='name',all.x=T)
dat2_1 <- dat2_1[,c(9,2:8)]
colnames(dat2_1)[1] <- 'mkt_sel'
tab_2_1 <- dat2_1 %>% filter(unit2=='10kg', mkt_sel==2) %>% select(mkt_short,insname,amt_ton,price_l1w,price_l2w,price_l3w) %>% arrange(desc(amt_ton)) %>% head(n=20)
detach("package:plotly", unload=TRUE)
formattable(tab_2_1, list(
  mkt_short=formatter(
    "span",
    style = x ~ ifelse(x == substr(mkt_sel,1,4), style(font.weight = "bold", color = 'blue'), NA)),
  amt_ton = color_tile("white", "orange"),
  area(col = c(price_l1w, price_l2w, price_l3w)) ~ normalize_bar("pink", 0.2, na.rm=T)
  )) 
mkt_short insname amt_ton price_l1w price_l2w price_l3w
서울가락 대아청과 760885 188 219 240
대전오정 대전청과(주) 85937 267 313 349
서울가락 서울청과 67709 299 345 330
대전노은 대전중앙청과 54281 277 290 330
대전오정 농협대전(공) 50562 213 249 310
인천삼산 부평농산 50138 212 228 252
구리도매 인터넷청과 48922 228 213 243
구리도매 구리청과 47397 231 250 317
대구북부 대구원협(공) 45822 254 222 188
순천도매 남도청과 33988 243 268 264
서울가락 한국청과 32726 208 222 280
순천도매 순천원협(공) 30521 332 366 274
광주각화 광주중앙청과 29587 309 272 323
인천구월 대인농산 27501 214 228 208
광주각화 광주청과 26049 234 242 314
안양도매 안양원협(공) 25702 583 733 554
광주서부 호남청과 25553 214 249 219
인천삼산 경인농산 24362 164 228 181
부산엄궁 항도청과 20556 237 392 400
대구북부 효성청과 19266 219 250 208
tmp4 <- NULL
tmp5 <- NULL
mkt_amt_top7 <- dat_r7d %>% filter(unit2 %in% unit_list) %>% group_by(unit2,market) %>% summarise(amount=sum(sales_amt,na.rm=T)) %>% arrange(unit2, desc(amount)) %>% group_by(unit2) %>% slice(1:7) %>% ungroup
# i <- '강릉도매시장'; j <- '1kg'
for(i in mkt_list){
  mkt_sel <- i 
  for(j in unit_list){
   mkt_top7_tmp <- mkt_amt_top7[mkt_amt_top7$unit2==j,'market']
    if(sum(as.integer(mkt_top7_tmp$market %in% mkt_sel))==1 ){
      mkt_lst_top7 <- c(mkt_sel,mkt_top7_tmp$market[1:7])
      mkt_lst_top7 <- unique(mkt_lst_top7)
    } else {
      mkt_lst_top7 <- c(mkt_sel,mkt_top7_tmp$market[1:6])
    }
    mkt_pri_dist <- dat_r7d %>% filter(market %in% mkt_lst_top7, unit2==j, pri100 <= 5000) %>% group_by(market,pri100,aucCodeName) %>% 
      summarise(amt_ton=sum(sales_amt,na.rm=T)/1000) %>% mutate(mkt_short=substr(market,1,4)) # %>% spread(pri100,amount,fill=0)
    if(nrow(mkt_pri_dist)==0) {
      tmp4 <- rbind(tmp4, data.frame(mkt_sel=i, unit2=j, market=NA,pri100=NA,aucCodeName=NA,amt_ton=NA,mkt_short=NA))
    } else {
      if(nrow(mkt_pri_dist[mkt_pri_dist$market==i,])==0) {
        tmp4 <- rbind(tmp4, data.frame(mkt_sel=i, unit2=j, market=i,pri100=NA,aucCodeName=NA,amt_ton=NA,mkt_short=NA),data.frame(mkt_sel=i, unit2=j, mkt_pri_dist))  
      } else {
        tmp4 <- rbind(tmp4, data.frame(mkt_sel=i, unit2=j, mkt_pri_dist))  
      }
    }
    tmp5 <- rbind(tmp5, data.frame(mkt_sel=i, unit2=j, mkt_lst_top7))
  }
}

mkt_pri_dist <- tmp4
mkt_pri_dist <- merge(mkt_pri_dist,mkt_df,by.x='mkt_sel',by.y='name',all.x=T)
mkt_pri_dist <- mkt_pri_dist[,c(8,2:7)]
colnames(mkt_pri_dist)[1] <- 'mkt_sel'

dat_fil <- mkt_pri_dist %>% filter(mkt_sel==2, unit2=='10kg')
mkt_lst_top7 <- dat_fil %>% filter(market!=dat_fil[1,'market']) %>% group_by(market) %>% summarize(amt_ton=sum(amt_ton,na.rm=T)) %>% arrange(desc(amt_ton))
mkt_lst_top7 <- c(as.character(dat_fil[1,'market']), as.character(mkt_lst_top7$market))
mkt_lst_top7 <- substr(mkt_lst_top7,1,4)
dat_fil$pri100 <- as.numeric(dat_fil$pri100)

if(max(dat_fil$pri100,na.rm=T)<=2000) { breaks0 <- seq(0,max(dat_fil$pri100,na.rm=T),200)
} else if(max(dat_fil$pri100,na.rm=T)>=6000){ breaks0 <- seq(0,max(dat_fil$pri100,na.rm=T),1000) 
} else { breaks0 <- seq(0,max(dat_fil$pri100,na.rm=T),500) }
g <- ggplot(data=dat_fil) + geom_bar(aes(x=pri100, y=amt_ton, fill=aucCodeName, color=aucCodeName),stat="identity",position='stack') +
  scale_fill_manual(values=c("#f28e2b","#4e79a7")) + scale_color_manual(values=c("#f28e2b","#4e79a7")) +
  scale_x_continuous(breaks=breaks0, labels=breaks0) + facet_grid(factor(substr(dat_fil$market,1,4), levels=mkt_lst_top7) ~ ., scales = "free", drop = F) 
ggplotly(g, tooltip = c('x',"y"), height = 600)     
장박 avatar
About 장박
장박은 농업정보시스템 연구실에서 오랜시간 숙성되고 있는 농업데이터사이언티스트이다.
comments powered by Disqus