|
|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
- from bs4 import BeautifulSoup
- import requests
- import time
- import sys
- url = 'http://www.chinaseed114.com/seed/pzdq/'
- r = requests.get(url)
- r.encoding = r.apparent_encoding
- soup = BeautifulSoup(r.text,'lxml')
- def get_url(url):
- url1=[]
- urls=soup.select('tr > td > a')
- for url in urls:
- url1.append(url.get('href'))
- return url1
- def get_single_url(url):
- c=[]
- urls =[str(url)+'{}.html'.format(str(i)) for i in range(1,80,1)]
- for single_url in urls:
- c.extend(get_detail_name(single_url))
- return c
- def get_detail_name(url):
- try:
- a=[]
- r = requests.get(url,timeout=8)
- r.encoding = r.apparent_encoding
- soup = BeautifulSoup(r.text,'lxml')
- if 'miaomu' in url:
- return(get_miaomu_name(url))
- else:
- names=soup.select('tr > td > ul > li.t_c > a.px14')
- for name in names:
- a.append(name.get_text())
- return a
-
- except:
- pass
-
- def get_miaomu_name(url):
- b=[]
- r = requests.get(url)
- r.encoding = r.apparent_encoding
- soup = BeautifulSoup(r.text,'lxml')
- names1=soup.select('ul > li.catlist_li > a')
- for name1 in names1:
- b.append(name1.get_text())
- return b
- def get_item_info(url):
- names=soup.select('tr > td > a')
- urls=get_url(url)
- for name,url1 in zip(names,urls):
- r = requests.get(url1)
- r.encoding = r.apparent_encoding
- soup1 = BeautifulSoup(r.text,'lxml')
- data={
- '品种':name.get_text(),
- '详细':get_single_url(url1)
- } #这是一个字典。
- print(data) #print在这里,怎么把字典弄到一个txt里?
- get_item_info(url)
复制代码
代码内容并不重要。 |
|