本帖最后由 xiaosi4081 于 2020-8-5 15:35 编辑
可能缩进乱了from lxml import etree
import requests
import csv
import time
def writecsv(item):
with open('ershou.csv','a',encoding = 'utf-8') as f:
writer = csv.writer(f)
try:
writer.writerow(item)
except:
print('write error!')
if __name__ == '__main__':
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.92 Safari/537.3'}
start_url = 'https://cd.ke.com/ershoufang/damian/pg'
for x in range(1,76):
url = start_url + str(x)
html = requests.get(url,headers = headers)
time.sleep(3)
selector = etree.HTML(html.text)
#小区列表
xiaoqulist = selector.xpath('//*[@id="beike"]/div[1]/div[4]/div[1]/div[4]/ul/li')
for xiaoqu in xiaoqulist:
#楼盘名
mingcheng = xiaoqu.xpath('div/div[2]/div[1]/div/a/text()')[0]
#楼盘信息
xinxi = xiaoqu.xpath('div/div[2]/div[2]/text()')[1]
#均价
junjia = xiaoqu.xpath('div/div[2]/div[5]/div[2]/span/text()')[0]
#总价
zongjia = xiaoqu.xpath('//div/div[5]/div[@class="totalPrice"]/span/text()')[0]
item = [mingcheng, xinxi, junjia, zongjia]
writecsv(item)
print('londing:',mingcheng)
|