네이버 책 검색기¶
In [1]:
%%html
<style type='text/css'>
.CodeMirror{ font-size: 14px; font-family: callable}
</style>
In [2]:
# 라이브러리
import requests
from bs4 import BeautifulSoup
In [3]:
# 주소
url = 'https://book.naver.com/search/search.nhn'
params = {'sm' : 'sta_hty.book',
'sug' : ' ',
'where' : 'nexearch',
'query' : 'bigdata'}
In [4]:
# get 요청
response = requests.get(url, params=params)
status_code = response.status_code
print(status_code)
if status_code == 200:
text = response.text
In [5]:
# str ==> BeautifulSoup
soup = BeautifulSoup(text)
In [6]:
# 책 전체정보 가져오기 (크롬 개발자 도구)
book_all = soup.select_one('ul#searchBiblioList') # = soup.find(id='searchBiblioList') = soup.find(attrs={'id'='searchBiblioList')
#book_all
In [7]:
# 책 정보 가져오기
book_all_li_one = book_all.select_one('li') # 책 한권
book_all_li_all = book_all.select('li') # 책 각각 전체
In [8]:
# 책 bid 한개 가져오기
bid_one = book_all_li_one.select_one('a')['href'].split('=')[1] # 책 한권
bid_one
Out[8]:
In [9]:
# 책 bid 모두 가져오기
bid_list = []
for item in book_all_li_all:
b_id = item.select_one('a')['href'].split('=')[1]
bid_list.append(b_id)
print(bid_list)
In [10]:
# 책 제목 한권 가져오기
book_image = book_all_li_one.select_one('img')
book_name_one = book_image['alt']
book_name_one
Out[10]:
In [11]:
# 책 제목 모두 가져오기
title_list = []
for item in book_all_li_all:
book_image = item.select_one('img')
b_name = book_image['alt']
title_list.append(b_name)
#title_list
In [12]:
# 책 한 권 저자, 출판사, 출판일
book_info = book_all_li_one.select_one('dd.txt_block')
book_text = book_info.text.replace('\n',' ').replace('\xa0',' ')
book_text = book_info.text.replace('\t',' ').replace('\r',' ')
book_text_list = book_text.split('|')
book_author = book_text_list[0]
book_publish = book_text_list[1]
book_pubdate = book_text_list[2]
print(book_author,book_publish,book_pubdate)
In [13]:
# 모든 책 저자, 출판사, 출판일
author_list = []
publish_list = []
pubdate_list = []
for item in book_all_li_all:
book_info = item.select_one('dd.txt_block')
book_text = book_info.text.replace('\n','').replace('\xa0','')
book_text = book_text.replace('\r','').replace('\t','')
book_text_list = book_text.split('|')
if(len(book_text_list)) == 4:
book_author = book_text_list[0] + book_text_list[1]
book_publish = book_text_list[2]
book_pubdate = book_text_list[3]
else:
book_author = book_text_list[0]
book_publish = book_text_list[1]
book_pubdate = book_text_list[2]
author_list.append(book_author)
publish_list.append(book_publish)
pubdate_list.append(book_pubdate)
print(author_list)
print(publish_list)
print(pubdate_list)
In [14]:
# 책 한권 가격
book_txt_desc = book_all_li_all[0].select_one('dd.txt_desc')
price_old = book_txt_desc.select_one('strike').text
price_old = price_old.split('원')[0]
price_new= book_txt_desc.select_one('em.price').text
price_new = price_new.split('원')[0]
print(price_old, price_new)
In [15]:
# 오류문 확인
book_txt_desc = book_all_li_all[4].select_one('dd.txt_desc')
price_old = book_txt_desc.select_one('strike')
price_new = book_txt_desc.select_one('em.price')
if price_old == None:
price_old = 0
else :
price_old = price_old.splite('원')[0]
if price_new == None:
price_new = 0
else :
price_new = price_new.splite('원')[0]
print(price_old, price_new)
In [16]:
# 전체 책 가격
price_list = []
for item in book_all_li_all:
book_txt_desc = item.select_one('dd.txt_desc')
price_old = book_txt_desc.select_one('strike')
price_new = book_txt_desc.select_one('em.price')
if price_old == None:
price_old = 0
else :
price_old = price_old.text.split('원')[0]
if price_new == None:
price_new = 0
else :
price_new = price_new.text.split('원')[0]
price_list.append((price_old, price_new))
price_list
Out[16]:
In [17]:
book_info_list = []
for i in range(len(bid_list)):
book_info_dict = dict()
book_info_dict['bid'] = bid_list[i]
book_info_dict['title'] = title_list[i]
book_info_dict['author'] = author_list[i]
book_info_dict['publish'] = publish_list[i]
book_info_dict['pubdate'] = pubdate_list[i]
book_info_dict['price_old_new'] = price_list[i]
book_info_list.append(book_info_dict)
#book_info_list
In [18]:
book_info_list[1]
Out[18]:
'학원 > Python' 카테고리의 다른 글
데이터 수집 - 다음뉴스 (0) | 2020.06.08 |
---|---|
데이터 수집 - 네이버 영화 순위 (0) | 2020.06.08 |
스크래핑과 크롤링 (0) | 2020.06.02 |
tinyDB 설치와 조작 (0) | 2020.06.02 |
로또 번호 가져오기 (0) | 2020.06.02 |