|  | 
 
 发表于 2020-11-28 17:14:01
|
显示全部楼层 
| 本帖最后由 suchocolate 于 2020-11-28 20:48 编辑 
 复制代码from urllib import request
from lxml import etree
import csv
def main():
    headers = {'User-Agent': 'Firefox'}
    req = request.Request('https://movie.douban.com/chart', headers=headers)
    r = request.urlopen(req)
    html = etree.HTML(r.read().decode('utf-8'))
    result = html.xpath('//span[@style="font-size:13px;"]/text()')
    print(result)
    with open('movies.csv', 'w', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(result)
    print('done')
if __name__ == '__main__':
    main()
 
 复制代码import requests
from lxml import etree
import csv
def main():
    url = 'https://movie.douban.com/chart'
    headers = {'user-agent': 'firefox'}
    r = requests.get(url, headers=headers)
    html = etree.HTML(r.text)
    result = html.xpath('//span[@style="font-size:13px;"]/text()')
    print(result)
    with open('movies.csv', 'w', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(result)
    print('done')
if __name__ == '__main__':
    main()
 | 
 |