import requests
from lxml import etree
def main():
url = 'https://movie.douban.com/top250'
headers = {'user-agent': 'firefox'}
params = {'start': ''}
movies = []
for x in range(10):
params['start'] = x
r = requests.get(url, headers=headers, params=params)
html = etree.HTML(r.text)
lis = html.xpath('//ol[@class="grid_view"]/li')
for li in lis:
m_name = li.xpath('.//div[@class="hd"]/a/span[1]/text()')[0]
m_rank = li.xpath('.//div[@class="pic"]/em/text()')[0]
m_score = li.xpath('.//div[@class="star"]/span[2]/text()')[0]
m_info = li.xpath('normalize-space(.//div[@class="bd"]/p/text())')
movies.append([m_name, m_rank, m_score, m_info])
print(movies)
if __name__ == '__main__':
main()
|