luroot153 发表于 2018-4-7 12:22:19

爬取58同城游戏机数据,并存入mongo数据库

import time
import pymongo
importrequests
from bs4 import BeautifulSoup

client=pymongo.MongoClient("localhost",27017)
ceshi=client['ceshi']
url_list=ceshi['url_list']
url_info=ceshi['url_info']


def get_list(channel_url,page):
    url='{}/pn{}'.format(channel_url,str(page))
    wb_data=requests.get(url)
    time.sleep(1)
    soup=BeautifulSoup(wb_data.text,'lxml')
    if soup.find('td','t'):
      for links in soup.select('td.t a.t'):
            item_link=links.get('href').split('?')
            url_list.insert_one({'url':item_link})
            print(item_link)
    else:
      pass

# get_list('http://bj.58.com/qiulei/',4)


def get_info(url,n):
    wb_data=requests.get(url)
    soup=BeautifulSoup(wb_data.text,'lxml')
    for i in range(1,n):
      title = soup.select('td.t > a').text
      price=soup.select('span.price').text
      data=soup.select('span.desc').text
      area=soup.select('td.t > span.fl > span:nth-of-type(1)').text
      url_info.insert_one({"title":title,"data":data,"price":price,"area":area})
      print({"title":title,"data":data,"price":price,"area":area})
      time.sleep(1)


get_list('http://bj.58.com/youxiji/',4)
get_info('http://bj.58.com/youxiji/',25)
页: [1]
查看完整版本: 爬取58同城游戏机数据,并存入mongo数据库