|
发表于 2020-11-28 17:14:01
|
显示全部楼层
本帖最后由 suchocolate 于 2020-11-28 20:48 编辑
- from urllib import request
- from lxml import etree
- import csv
- def main():
- headers = {'User-Agent': 'Firefox'}
- req = request.Request('https://movie.douban.com/chart', headers=headers)
- r = request.urlopen(req)
- html = etree.HTML(r.read().decode('utf-8'))
- result = html.xpath('//span[@style="font-size:13px;"]/text()')
- print(result)
- with open('movies.csv', 'w', encoding='utf-8') as csvfile:
- writer = csv.writer(csvfile)
- writer.writerow(result)
- print('done')
- if __name__ == '__main__':
- main()
复制代码
- import requests
- from lxml import etree
- import csv
- def main():
- url = 'https://movie.douban.com/chart'
- headers = {'user-agent': 'firefox'}
- r = requests.get(url, headers=headers)
- html = etree.HTML(r.text)
- result = html.xpath('//span[@style="font-size:13px;"]/text()')
- print(result)
- with open('movies.csv', 'w', encoding='utf-8') as csvfile:
- writer = csv.writer(csvfile)
- writer.writerow(result)
- print('done')
- if __name__ == '__main__':
- main()
复制代码 |
|