ÅÀÈ¡¶¹°êijһҳµÄµçÓ°Ãû³Æ²¢±£´æΪcsvÎļþ
²ÉÓÃurllibÅÀÈ¡https://movie.douban.com/chart Ò³ÃæϵĵçÓ°Ãû³Æ£¬½«Æä±£´æÔÚmovie.csvÎļþÖÐ ±¾Ìû×îºóÓÉ suchocolate ÓÚ 2020-11-28 20:48 ±à¼from urllib import request
from lxml import etree
import csv
def main():
headers = {'User-Agent': 'Firefox'}
req = request.Request('https://movie.douban.com/chart', headers=headers)
r = request.urlopen(req)
html = etree.HTML(r.read().decode('utf-8'))
result = html.xpath('//span[@style="font-size:13px;"]/text()')
print(result)
with open('movies.csv', 'w', encoding='utf-8') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(result)
print('done')
if __name__ == '__main__':
main()
import requests
from lxml import etree
import csv
def main():
url = 'https://movie.douban.com/chart'
headers = {'user-agent': 'firefox'}
r = requests.get(url, headers=headers)
html = etree.HTML(r.text)
result = html.xpath('//span[@style="font-size:13px;"]/text()')
print(result)
with open('movies.csv', 'w', encoding='utf-8') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(result)
print('done')
if __name__ == '__main__':
main()
suchocolate ·¢±íÓÚ 2020-11-28 17:14
Êú×Å¡¤ÅÅÁÐÔõô±ä suchocolate ·¢±íÓÚ 2020-11-28 17:14
ÄǸöÕýÔò±í´ïʽÔõôдµÄ£¬×îºóÄǸöÁ½ÐдúÂëÊÇʲôÒâ˼ÄØ £¬´óÀÐ ±¾Ìû×îºóÓÉ suchocolate ÓÚ 2020-11-29 19:37 ±à¼
˽¤Ï¤ê ·¢±íÓÚ 2020-11-29 19:24
Êú×Å¡¤ÅÅÁÐÔõô±ä
Êú×ÅÅÅÁÐimport requests
from lxml import etree
import csv
def main():
url = 'https://movie.douban.com/chart'
headers = {'user-agent': 'firefox'}
r = requests.get(url, headers=headers)
html = etree.HTML(r.text)
result = html.xpath('//span[@style="font-size:13px;"]/text()')
print(result)
with open('movies.csv', 'w', encoding='utf-8', newline='') as csvfile: # ×¢Òâ¶àÁËÒ»¸önewline²ÎÊý£¬·ÀÖ¹¿ÕÐгöÏÖ¡£
writer = csv.writer(csvfile)
for item in result:
writer.writerow()
print('done')
if __name__ == '__main__':
main()
˽¤Ï¤ê ·¢±íÓÚ 2020-11-29 19:26
ÄǸöÕýÔò±í´ïʽÔõôдµÄ£¬×îºóÄǸöÁ½ÐдúÂëÊÇʲôÒâ˼ÄØ £¬´óÀÐ
ÄǸöÊÇxpath£¬ÊÇÓÃÀ´½âÎöhtmlµÄ£¬ÄãÍøÉÏËÑһϾÍÖªµÀÁË¡£
if __name__ == '__main__'µ±Ä£¿é±»Ö±½ÓÔËÐÐʱ£¬ÒÔÏ´úÂë¿é½«±»ÔËÐУ¬µ±Ä£¿éÊDZ»µ¼Èëʱ£¬´úÂë¿é²»±»ÔËÐС£
Ï°¹ßÐÔд·¨£¬Ñø³ÉºÃÏ°¹ß¡£
suchocolate ·¢±íÓÚ 2020-11-29 19:41
ÄǸöÊÇxpath£¬ÊÇÓÃÀ´½âÎöhtmlµÄ£¬ÄãÍøÉÏËÑһϾÍÖªµÀÁË¡£
if __name__ == '__main__'µ±Ä£¿é±»Ö±½ÓÔËÐÐʱ ...
ºÃµÄ ¶àл suchocolate ·¢±íÓÚ 2020-11-28 17:14
import requests
from bs4 import BeautifulSoup
url2="https://movie.douban.com/chart"
hs={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36',}
res = requests.get(url2, headers=hs)
print(res.status_code)
text = res.text
#print(text)
soup = BeautifulSoup(text,'html.parser')
tags = soup('img')
for li in tags:
result=li['alt']
print(result)
ÎÒÕâ¸ö´úÂë µÄ altÄǼ¸ÐдúÂëÊǸÉʲôµÄѽ
tags = soup('img')
for li in tags:
result=li['alt']
print(result) ˽¤Ï¤ê ·¢±íÓÚ 2020-11-29 20:19
import requests
from bs4 import BeautifulSoup
url2="https://movie.douban.com/chart"
imgÔªËصÄaltÊôÐÔµÄÄÚÈÝ£¬ÒÔºó²»ÖªµÀÄÚÈݶà´òӡһϾÍÖªµÀÁË£¬Ñ§»á×Ô¼º·ÖÎö¡£
Ò³:
[1]