| 
 | 
 
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册  
 
x
 
例如在下面的代码中,tags 似乎比正则表达式更好用: 
- # Vollyball
 
  
- # Data from https://en.volleyballworld.com/en/vnl/2019/women/resultsandranking/round1
 
  
 
- from bs4 import BeautifulSoup
 
 - import requests
 
 - # import re
 
  
- req = requests.get('https://en.volleyballworld.com/en/vnl/2019/women/resultsandranking/round1')
 
 - soup = BeautifulSoup(req.text, 'html.parser')
 
 - prePattern = soup.find_all('tr', 'group')
 
 - patternThrhd = 0  # We take pattern as prePattern[patternThrhd:].
 
  
- # Setting patternThrhd
 
 - for item in prePattern:
 
 -     tds = item.find_all('td')
 
 -     if tds[1]['class'] == ['result--highlight']:
 
 -         patternThrhd += 1
 
 -     else:
 
 -         break
 
  
- pattern = prePattern[patternThrhd:]  # pattern is the list of matches.
 
 - dict = {}  # The dictionary of matches. The keywords are the numbers.
 
  
- # td = pattern[0].find_all('td')
 
 - # print(td[3].string)
 
 - example = pattern[0].find_all('td')
 
 - indices = [1, 4, 5, 6, 7, 8, 10, 11]
 
  
- for item in pattern:
 
 -     tds = item.find_all('td')
 
 -     dict[int(tds[0].string.replace('\r\n                            ', ''))] = \
 
 -         tuple(tds[i].string.replace('\r\n                            ', '') for i in indices)
 
  
 
- print(pattern)
 
 
  复制代码 |   
 
 
 
 |