|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
#coding=utf-8
import requests
from redis import StrictRedis
from selenium import webdriver
import time
from bs4 import BeautifulSoup
import random
import xlwt
r = StrictRedis(host='127.0.0.1', port=6379, db=0)
proxy = '127.0.0.1:1080'
proxies = {
'http': 'socks5://' + proxy,
'https': 'socks5://' + proxy
}
def get_info(url, headers):
status_code=503
while status_code==503 :
try :
req = requests.get(url, headers=headers)
status_code=req.status_code
if req.status_code==500:
status_code=503
except requests.exceptions.RequestException:
status_code=503
soup = BeautifulSoup(req.text, "html.parser")
items = soup.find(
'table', id="mainModuleContainer_1855_1856_ctl00_ctl00_p1_f1")
# 所需信息组成字典
info = {}
# 行政区
print(status_code)
print(req)
# print(soup)
division1=items.find(
'span', id="mainModuleContainer_1855_1856_ctl00_ctl00_p1_f1_r1_c2_ctrl")
if division1 is None:
return
print(division1)
division = items.find(
'span', id="mainModuleContainer_1855_1856_ctl00_ctl00_p1_f1_r1_c2_ctrl").get_text()
info['行政区'] = division
# 项目名称
prjname = items.find(
'span', id="mainModuleContainer_1855_1856_ctl00_ctl00_p1_f1_r17_c2_ctrl").get_text()
info['项目名称'] = prjname
# 项目位置
location = items.find(
'span', id="mainModuleContainer_1855_1856_ctl00_ctl00_p1_f1_r16_c2_ctrl").get_text()
info['项目位置'] = location
# 面积(公顷)
square = items.find(
'span', id="mainModuleContainer_1855_1856_ctl00_ctl00_p1_f1_r2_c2_ctrl").get_text()
info['面积'] = square
# 土地用途
purpose = items.find(
'span', id="mainModuleContainer_1855_1856_ctl00_ctl00_p1_f1_r3_c2_ctrl").get_text()
info['土地用途'] = purpose
# 土地使用年限
tdsynx = items.find(
'span', id="mainModuleContainer_1855_1856_ctl00_ctl00_p1_f1_r19_c2_ctrl").get_text()
info['土地使用年限'] = tdsynx
# 土地级别
tdjb = items.find(
'span', id="mainModuleContainer_1855_1856_ctl00_ctl00_p1_f1_r20_c2_ctrl").get_text()
info['土地级别'] = tdjb
# 分期支付约定支付期号
summary_node=items.find(
'span', id="mainModuleContainer_1855_1856_ctl00_ctl00_p1_f3_r2_c1_0_ctrl")
info['分期支付约定支付期号']='';
info['分期支付约定约定支付日期']='';
info['分期支付约定约定支付金额']='';
info['分期支付约定备注']='';
if summary_node is not None:
zfqh = items.find(
'span', id="mainModuleContainer_1855_1856_ctl00_ctl00_p1_f3_r2_c1_0_ctrl").get_text()
info['分期支付约定支付期号'] = zfqh
# 分期支付约定约定支付日期
zfrq = items.find(
'span', id="mainModuleContainer_1855_1856_ctl00_ctl00_p1_f3_r2_c2_0_ctrl").get_text()
info['分期支付约定约定支付日期'] = zfrq
# 分期支付约定约定支付金额(万元)
zfje = items.find(
'span', id="mainModuleContainer_1855_1856_ctl00_ctl00_p1_f3_r2_c3_0_ctrl").get_text()
info['分期支付约定约定支付金额'] = zfje
# 分期支付约定备注
bz = items.find(
'span', id="mainModuleContainer_1855_1856_ctl00_ctl00_p1_f3_r2_c4_0_ctrl").get_text()
info['分期支付约定备注'] = bz
# 土地使用权人
tdsyq = items.find(
'span', id="mainModuleContainer_1855_1856_ctl00_ctl00_p1_f1_r23_c2_ctrl").get_text()
info['土地使用权人'] = tdsyq
if tdsyq==' ' :
tdsyq = items.find(
'span', id="mainModuleContainer_1855_1856_ctl00_ctl00_p1_f1_r9_c2_ctrl").get_text()
info['土地使用权人'] = tdsyq
# 约定容积率下限
ydrjxx = items.find(
'span', id="mainModuleContainer_1855_1856_ctl00_ctl00_p1_f2_r1_c2_ctrl").get_text()
info['约定容积率下限'] = ydrjxx
# 约定容积率上限
ydrjsx = items.find(
'span', id="mainModuleContainer_1855_1856_ctl00_ctl00_p1_f2_r1_c4_ctrl").get_text()
info['约定容积率上限'] = ydrjsx
# 约定开工时间
kgtime = items.find(
'span', id="mainModuleContainer_1855_1856_ctl00_ctl00_p1_f1_r22_c2_ctrl").get_text()
info['约定开工时间'] = kgtime
# 实际开工时间
sjkgtime = items.find(
'span', id="mainModuleContainer_1855_1856_ctl00_ctl00_p1_f1_r10_c2_ctrl").get_text()
info['实际开工时间'] = sjkgtime
# 批准单位
pzdw = items.find(
'span', id="mainModuleContainer_1855_1856_ctl00_ctl00_p1_f1_r14_c2_ctrl").get_text()
info['批准单位'] = pzdw
# 用唯一值的电子监管号当key, 所需信息当value的字典
Key_ID = items.find(
'span', id="mainModuleContainer_1855_1856_ctl00_ctl00_p1_f1_r1_c4_ctrl").get_text()
info['电子监管号'] = Key_ID
# 土地来源
tdly = items.find(
'span', id="mainModuleContainer_1855_1856_ctl00_ctl00_p1_f1_r2_c4_ctrl").get_text()
info['土地来源'] = tdsyq
# 供地方式
gdfs = items.find(
'span', id="mainModuleContainer_1855_1856_ctl00_ctl00_p1_f1_r3_c4_ctrl").get_text()
info['供地方式'] = gdfs
# 行业分类
hyfl = items.find(
'span', id="mainModuleContainer_1855_1856_ctl00_ctl00_p1_f1_r19_c4_ctrl").get_text()
info['行业分类'] = hyfl
# 成交价格(万元)
cjjg = items.find(
'span', id="mainModuleContainer_1855_1856_ctl00_ctl00_p1_f1_r20_c4_ctrl").get_text()
info['成交价格'] = cjjg
# 约定交地时间
ydjdsj = items.find(
'span', id="mainModuleContainer_1855_1856_ctl00_ctl00_p1_f1_r21_c4_ctrl").get_text()
info['约定交地时间'] = ydjdsj
# 约定竣工时间
jgtime = items.find(
'span', id="mainModuleContainer_1855_1856_ctl00_ctl00_p1_f1_r22_c4_ctrl").get_text()
info['约定竣工时间'] = jgtime
# 实际竣工时间
sjjgtime = items.find(
'span', id="mainModuleContainer_1855_1856_ctl00_ctl00_p1_f1_r10_c4_ctrl").get_text()
info['实际竣工时间'] = sjjgtime
# 合同签订日期
qdtime = items.find(
'span', id="mainModuleContainer_1855_1856_ctl00_ctl00_p1_f1_r14_c4_ctrl").get_text()
info['合同签订日期'] = qdtime
#r.sadd("zlist1",info)
print("信息:",info['行政区'],info['项目名称'],info['项目位置'],info['面积'],info['土地用途'],info['约定开工时间'],info['约定竣工时间'],info['合同签订日期'],info['土地使用权人'],info['电子监管号'])
return info
def checkRedis(sleepCounter, headers): # 从redis读,并解析页面
# 如果redis中暂无数据,等待。等待时间超过100秒后退出程序。
book = xlwt.Workbook()
sheet1 = book.add_sheet('sheet1')
row0 = ["行政区","项目名称","项目位置","面积(公顷)","土地用途","土地使用年限","土地级别","分期支付约定支付期号","分期支付约定约定支付日期"
,"分期支付约定约定支付金额(万元)","分期支付约定备注","土地使用权人","约定容积率下限","约定容积率上限","约定开工时间","实际开工时间"
,"批准单位","电子监管号","土地来源","供地方式","行业分类","成交价格(万元)","约定交地时间","约定竣工时间","实际竣工时间","合同签订日期"]
#写第一行
for i in range(0,len(row0)):
sheet1.write(0,i,row0[i])
rowNo = 0
while 1:
if r.scard('mylist2') != 0:
url = r.spop('mylist2')
print("二级URL:",url)
info=get_info(url, headers)
报错
PS E:\qycache\xuexi\pythonProject> & C:/Users/一介书生/AppData/Local/Programs/Python/Python312/python.exe e:/qycache/xuexi/pythonProject/房地产/土地.py
PS E:\qycache\xuexi\pythonProject>
|
|