|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
import requests
import parsel
import csv
import time
import random
for page in range(1,56 + 1):
choice = random.randint(0,20)
time.sleep(choice)
print('======真正爬取第{}页数据====='.format(page))
url='https://sz.fang.lianjia.com/loupan/pg{}/'.format(str(page))
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36'}
r=requests.get(url=url,headers=headers)
print(r.status_code)
html=r.text
print(type(html))
select = parsel.Selector(html)
lis = select.css('.resblock-list.post_ulog_exposure_scroll.has-results')
for li in lis:
title = li.css('.resblock-name a::text').get()
quyu = li.css('.resblock-location span::text').getall()
quyu = '/'.join(quyu)
dizhi = li.css('.resblock-location a::text').getall()
mianji = li.css('.resblock-area span::text').get()
fangxing = li.css('.resblock-room span::text').getall()
fangxing = '/'.join(fangxing)
junjia = li.css('.main-price span::text').get() + '元/㎡'
zongjia = li.css('.resblock-price .second::text').get()
#title_url = li.css('.title a::attr(href)').get()
print(title,quyu,dizhi,fangxing,mianji,junjia,zongjia,sep='---')
f=open('链家深圳网.csv','w',encoding='utf-8')
f.writelines('名字'+','+'区域'+','+'地址'+','+'房型'+','+'建筑面积'+','+'均价'+','+'总价'+'\n')
for i in range(len(quyu)):
f.writelines(title[i]+','+quyu[i]+','+dizhi[i]+','+fangxing[i]+','+mianji[i]+','+junjia[i]+','+zongjia[i]+'\n')
f.close()
代码是这样的 Traceback (most recent call last):
File "C:/tfm/爬虫1.py", line 34, in <module>
f.writelines(title[i]+','+quyu[i]+','+dizhi[i]+','+fangxing[i]+','+mianji[i]+','+junjia[i]+','+zongjia[i]+'\n')
IndexError: list index out of range
索引超出范围怎么解决啊 求求了 新手不会搞 |
|