|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
import re
import bs4
import easygui
import requests
str1 = '\S+'
header = {'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/81.0.4044.129 Safari/537.36'}
def get_url(name):
url = 'https://www.80s.tw/search'
data = {'keyword': name}
res = requests.post(url, data, headers=header)
soup = bs4.BeautifulSoup(res.text, "html.parser")
body = soup.find('div', id='body')
block3 = body.find('div', id='block3')
ul = block3.find('ul', class_='clearfix search_list')
li = ul.find_all('li')
for each in li:
a = each.find('a')
name = re.findall(str1, a.text)
with open('name.txt', 'at', encoding='utf-8') as f:
f.write(name[1] + '\n')
url = 'www.80s.tw' + a['href']
with open('url.txt', 'at', encoding='utf-8') as f:
f.write(url + '\n')
def get_bt():
with open('url.txt', 'rt', encoding='utf-8') as f:
url = f.readlines()
with open('name.txt', 'rt', encoding='utf-8') as f:
name = f.readlines()
for each, each1 in zip(url, name):
url = re.findall(str1, each)
name = re.findall(str1, each1)
print(name, url)
res = requests.get('https://' + url[0], headers=header)
soup = bs4.BeautifulSoup(res.text, "html.parser")
span = soup.find_all('span', class_='xunlei dlbutton1')
for each in span:
a = each.find_all('a')
for each in a:
if len(re.findall(str1, each['href'])) != 0:
with open('bt.txt', 'at', encoding='utf-8') as f:
f.write(name[0] + '\n')
f.write(each['href'] + '\n')
print(each['href'])
name = easygui.enterbox()
get_url(name)
get_bt() |
|