|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
import time
import re
import pymongo
from lxml import etree
import requests
client=pymongo.MongoClient('localhost',27017)
mydb=client['mydb']
musictop=mydb['muisctop']
headers={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"}
def get_url_muisc(url):
html=requests.get(url,headers=headers)
selector=etree.HTML(html.text)
muisc_hrefs=selector.xpath('//div[@class="p12"]/@href')
for muisc_href in muisc_hrefs:
get_muisc_info(muisc_href)
def get_muisc_info(url):
html=requests.get(url,headers=headers)
selector=etree.HTML(html.text)
name=selector.xpath('//*[@id="wrapper"]/h1/span/text()')[0]
author=selector.xpath('//*[@id="info"]/span[2]/text()')[0]
styles=selector.xpath('//*[@id="info"]/span[3]/text()')[0]
time=selector.xpath('//*[@id="info"]/span[5]/text()')[0]
publishers=selector.xpath('//*[@id="info"]/span[6]/text()')[0]
score=selector.xpath('//*[@id="interest_sectl"]/div/div[2]/strong/text')[0]
print(name,author,time,publishers,score)
info={
'name':name,
'author':author,
'styles':styles,
'time':time,
'publishers':publishers,
'score':score
}
musictop.insert_one(info)
if __name__=='__main__':
urls=['https://music.douban.com/top250?start={}'.format(str(i)) for i in range(0,250,25)]
for url in urls:
get_url_muisc(url)
time.sleep(2)
上面是准确的代码 为什么在robomongo看不到数据?? |
|