|
|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
源代码如下:
import urllib.request
from urllib.error import URLError,HTTPError
import pandas as pd
import datetime
import calendar
import time
import random
def get_data(daylist=[]):
f = daylist
rb_f_list=[["tradeday"],["DELIVERYMONTH"],['CLOSEPRICE']]
hc_f_list=[["tradeday"],["DELIVERYMONTH"],['CLOSEPRICE']]
rb_f_data = pd.DataFrame()
hc_f_data = pd.DataFrame()
for each_tradeday in f:
iplist = ['183.154.223.211:9000','115.223.198.116:9000','182.105.14.29:9000']
proxy_support = urllib.request.ProxyHandler({'http':random.choice(iplist)})
opener = urllib.request.build_opener(proxy_support)
opener.addheaders =[('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36')]
urllib.request.install_opener(opener)
#print('http://www.shfe.com.cn/data/dailydata/kx/kx' + each_tradeday[0:4] + each_tradeday[5:7]+ each_tradeday[8:10] + '.dat')
resp = urllib.request.Request('http://www.shfe.com.cn/data/dailydata/kx/kx' + each_tradeday[0:4] + each_tradeday[5:7]+ each_tradeday[8:10] + '.dat')
resp.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36')
try :
urllib.request.urlopen(resp)
except urllib.error.HTTPError :
continue
response = urllib.request.urlopen(resp)
html = response.read()
response.close()
html = html.decode('utf-8')
contain = eval(html)
contain1 = contain["o_curinstrument"]
for each_price_data in contain1:
if each_price_data["PRODUCTID"] == "rb_f " and each_price_data["DELIVERYMONTH"] == "1804":
rb_f_list[0].append(each_tradeday)
rb_f_list[1].append(each_price_data["DELIVERYMONTH"])
rb_f_list[2].append(each_price_data["CLOSEPRICE"])
print(rb_f_list[0],rb_f_list[1],rb_f_list[2])
if each_price_data["PRODUCTID"] == "hc_f " and each_price_data["DELIVERYMONTH"] == "1804":
hc_f_list[0].append(each_tradeday)
hc_f_list[1].append(each_price_data["DELIVERYMONTH"])
hc_f_list[2].append(each_price_data["CLOSEPRICE"])
print(hc_f_list[0],hc_f_list[1],hc_f_list[2])
time.sleep(5)
rb_f_data0= pd.DataFrame(data = rb_f_list[0])
rb_f_data1= pd.DataFrame(data = rb_f_list[1])
rb_f_data2= pd.DataFrame(data = rb_f_list[2])
hc_f_data0= pd.DataFrame(data = hc_f_list[0])
hc_f_data1= pd.DataFrame(data = hc_f_list[1])
hc_f_data2= pd.DataFrame(data = hc_f_list[2])
rb_f_data0=rb_f_data0.reset_index()
rb_f_data1=rb_f_data1.reset_index()
rb_f_data2=rb_f_data2.reset_index()
hc_f_data0=hc_f_data0.reset_index()
hc_f_data1=hc_f_data1.reset_index()
hc_f_data2=hc_f_data2.reset_index()
rb_f_data =pd.merge(rb_f_data0,rb_f_data1,on='index')
rb_f_data =pd.merge(rb_f_data,rb_f_data2,on='index')
hc_f_data =pd.merge(hc_f_data0,hc_f_data1,on='index')
hc_f_data =pd.merge(hc_f_data,hc_f_data2,on='index')
rb_f_data.to_csv('螺纹钢数据 .csv')
hc_f_data.to_csv('热轧钢板 .csv')
def get_trade_day(tradeday_str):
trade_day_list =[]
number_of_tradedays = 0
while number_of_tradedays <200 :
year = int(tradeday_str[0:4])
month = int(tradeday_str[5:7])
day = int(tradeday_str[8:10])
#print(calendar.weekday(year,month,day))
if calendar.weekday(year,month,day)!=5 and calendar.weekday(year,month,day)!=6 :
trade_day_list.append(str(tradeday_str)[0:10])
number_of_tradedays +=1
the_date = datetime.datetime(year,month,day)
the_date = the_date - datetime.timedelta(days=1)
tradeday_str = str(the_date)
return trade_day_list
def get_tradedays_data_of_two_things():
riqi = input('请输入某一日期(注意格式必须为年-月-日):')
days =get_trade_day(riqi)
get_data(days)
if __name__=="__main__":
get_tradedays_data_of_two_things()
我本意是想通过爬虫来获取期货价格,但是不知道是不是访问太过频繁,所以才导致这样的错误,我试过加上time.sleep(5)和利用user-agent模拟浏览访问,但仍然抛出10045错误,这次利用代理,但是抛出的错误是10060,请教大神们要如何解决
|
|