爬取58同城游戏机数据,并存入mongo数据库
import timeimport pymongo
importrequests
from bs4 import BeautifulSoup
client=pymongo.MongoClient("localhost",27017)
ceshi=client['ceshi']
url_list=ceshi['url_list']
url_info=ceshi['url_info']
def get_list(channel_url,page):
url='{}/pn{}'.format(channel_url,str(page))
wb_data=requests.get(url)
time.sleep(1)
soup=BeautifulSoup(wb_data.text,'lxml')
if soup.find('td','t'):
for links in soup.select('td.t a.t'):
item_link=links.get('href').split('?')
url_list.insert_one({'url':item_link})
print(item_link)
else:
pass
# get_list('http://bj.58.com/qiulei/',4)
def get_info(url,n):
wb_data=requests.get(url)
soup=BeautifulSoup(wb_data.text,'lxml')
for i in range(1,n):
title = soup.select('td.t > a').text
price=soup.select('span.price').text
data=soup.select('span.desc').text
area=soup.select('td.t > span.fl > span:nth-of-type(1)').text
url_info.insert_one({"title":title,"data":data,"price":price,"area":area})
print({"title":title,"data":data,"price":price,"area":area})
time.sleep(1)
get_list('http://bj.58.com/youxiji/',4)
get_info('http://bj.58.com/youxiji/',25)
页:
[1]