library(httr)
library(rvest)
library(dplyr)
basic_url <- 'http://www.etoland.co.kr/bbs/board.php?bo_table=etoboard01&page='
urls <- NULL
for (x in 0:5) {
urls[x+1] <- paste0(basic_url,x+1)
}
urls
url
html <- read_html(urls[1])
links <- html%>%html_node('#mw_basic')%>%html_node('.mw_basic_list_subject')%>%html_nodes('a')%>%html_attr('href')%>%unique()
links
links <- links[-grep("pdf",links)]
links
links <- NULL
for (url in urls) {
html <- read_html(urls[1])
links <- c(links,links <- html%>%html_node('#mw_basic')%>%html_node('.mw_basic_list_subject')%>%html_nodes('a')%>%html_attr('href')%>%unique())}
urls <- NULL
for(x in 0:5){
urls[x+1] <- paste0(basic_url, as.character(x+1))
}
urls
links <- NULL
for(url in urls){
html <- read_html(urls[1])
links <- c(links,html%>%html_node('#mw_basic')%>%html_node('.mw_basic_list_subject')%>%html_nodes('a')%>%html_attr('href')%>%unique())
#ÀÌ´Ü°è¿¡¼ À¥±¸Á¶¸¦ ¸ô¶ó¼ Áú¹®ÇÕ´Ï´Ù html_node°¡ Á¢±ÙÇÏ´Â ÇÔ¼ö·Î ¾Ë°íÀִµ¥ ¾È¿¡ À¥ÀÇ ¾îµð¼ ºÎÅÍ ½ÃÀÛÇؾߵǴÂÁö ¾Ë·ÁÁÖ¼¼¿ä
}
grep("pdf",links)
links <- links[-grep("pdf", links)]
links
txts <- NULL
for(link in links){
html <- read_html(links)
txt <- c(txts, html%>% html_node('.js')%>%html_text())
}
#°Ô½Ã±ÛÀ» ÅؽºÆ®·Î ¸ðÀ¸°í ½Í¾î¿ä
txt
write.csv(txts, "text.csv")