马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
import requests
import re
from bs4 import BeautifulSoup as soup
import openpyxl
def request_url(url,headers):
html = requests.get(url,headers=headers).text
html = soup(html,'html.parser')
return html
def get_data(html):
data_all = html.find_all('div',class_= 'listX')
list = []
for each in data_all:
list1 = []
for i in each.find_all('p'):
cop = i.text
for n in cop.split(sep = '·'):
list1.append(n)
list.append(list1)
return list
def save_as_excel(data, name):
wb = openpyxl.Workbook()
ws = wb.active
ws['A1'] = '户型'
ws['B1'] = '面积'
ws['C1'] = '朝向'
ws['D1'] = '楼层'
ws['E1'] = '装修'
ws['F1'] = '地址'
ws['G1'] = '关注人数'
ws['H1'] = '近期带看'
ws['I1'] = '发布时间'
ws['J1'] = '总价'
ws['K1'] = '单价'
for each in data:
ws.append(each)
wb.save(name)
def main():
headers = {'user-agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'}
url_1 = input('输入你要搜索的杭州区的名字请输入拼音(例如 西湖区:xihuqu):')
url_2 = input('选择你喜欢的房价的范围(例如:100~200):')
url_3 = input('选择你喜欢的房子的面积大小m2(例如:100~200):')
url_4 = input('选择你的户型(例如 n居室:n,6居室及以上输入9):')
url = 'https://hz.5i5j.com/ershoufang/' +url_1+ '/'+'b'+url_3.split('~')[0]+'e'+url_3.split('~')[1]+'h'+url_3.split('~')[1]+'l'+url_3.split('~')[0]+'/'
html = request_url(url,headers)
data = get_data(html)
name = url_1 + url_2 +'平方'+url_3 + '万元' + url_4 + '居室'+'二手房信息.xlsx'
save_as_excel(data, name)
print('查询信息已保存!')
if __name__ == '__main__':
main()
|