|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
爬取2017年房价排行榜数据:
- import requests
- import bs4
- import re
- import openpyxl
- def open_url(url):
- headers = {'user-agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'}
- html = requests.get(url,headers = headers)
- #print(html.encoding)
- encoding = html.encoding
- return html
- def get_data(html):
- soup = bs4.BeautifulSoup(html.text,'html.parser')
- #print(soup.find_all('li',class_=re.compile('^clearfix.+')))
- mylist = []
- for each in soup.find_all('li',class_=re.compile('^clearfix.+')):
- mylist.append(each.text.strip().split('\n'))
- new_mylist = []
- for each_list in mylist:
- each_list[0] = re.search(r'2017年(.+)房价',each_list[0]).group(1)
- new_mylist.append(each_list)
- return new_mylist
- def save_as_excel(mylist):
- wb = openpyxl.Workbook()
- ws = wb.active
- ws['A1'] = '地区'
- ws['B1'] = '房价'
- ws['C1'] = '同比'
- for each in mylist:
- ws.append(each)
- wb.save('2017全国房价排行.xlsx')
- def main():
- url = 'https://www.anjuke.com/fangjia/quanguo2017/'
- html = open_url(url)
- mylist = get_data(html)
- save_as_excel(mylist)
- if __name__ == '__main__':
- main()
复制代码 |
-
评分
-
查看全部评分
|