|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
"""
1、分析网站结构
"""
import requests
import csv
from bs4 import BeautifulSoup
url = 'http://search.dangdang.com/?key=%C8%FD%C4%EA%BC%B6%BF%CE%CD%E2%D4%C4%B6%C1%B1%D8%B6%C1%CA%E9&act=input'
# 1>获取网络返回的html数据
def get_requests_data(url):
req = requests.get(url).text
return req
#2>数据的解析
def parser_content(content):
soup = BeautifulSoup(content,'lxml')
books = soup.findall('a',class_ = 'pic')
print(books)
#3>数据的保存
with open(fileName,'a+',encoding = 'utf_8') as file:
file_names = ['书名','页面地址','图片地址']
writer = csv.DictWriter(file,file_names)
for book in books:
print(list(book.children)[0].attrs)
if len(list(book.children)[0].attrs) == 3:
img = list(book.children)[0].attrs['data-original']
else:
img = list(book.children)[0].attrs['src']
row = {file_names[0]:book.attrs['title'],
file_names[1]:book.attrs[''],
file_names[2]:book.attrs['']
}
#if _main_ == '_main_':
fileName = 'dd_book.csv'
req = get_requests_data(url)
parser_content(req) |
|