求助大佬，这段爬虫代码有一部分看不懂,Python交流,编程语言专区,鱼C论坛

雪白七七子111 发表于 2022-4-17 10:40:21

求助大佬，这段爬虫代码有一部分看不懂

这段代码注释的地方和while循环看不太懂！求大佬解释一下

#这是一个通过关键字来爬取漏洞信息的模块
from lxml import etree
import requests
import re
import warnings
warnings.filterwarnings("ignore")             #处理错误

headerss = {
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 QIHU 360SE'
}

def KeyWord():
content = input('请输入查询的关键字信息:')

#调用绿盟科技的漏洞库
print('以下是调用的绿盟数据库：')
url = 'http://www.nsfocus.net'
url1 = f'http://www.nsfocus.net/index.php?act=sec_bug&type_id=&os=&keyword={content}&page=1'
url1_re = requests.get(url=url1,headers=headerss)
url1_re.encoding = ('utf-8')
obj = re.compile(r'.*?共(?P<shuliang>.*?)条记录',re.S)#这三行看不懂！
url1_re_quantity = obj.findall(url1_re.text)                #！！
url1_re.close                                                          #！！
yeshu = url1_re_quantity
yeshu = int(yeshu)
yeshu = yeshu/15
if yeshu % 15 != 0 :
   yeshu =yeshu +1

i = 1
while i <= yeshu :
   url11 = f'http://www.nsfocus.net/index.php?act=sec_bug&type_id=&os=&keyword={content}&page={i}'
   url11_re = requests.get(url=url11,headers=headerss)
   url11_re.encoding = ('utf-8')
   url11_re_etree = etree.HTML(url11_re.text)
   url11_re_etree_title = url11_re_etree.xpath('/html/body/div/section/div/section/div/div/div/div/ul/li[*]/a/text()')
   url11_re_etree_link = url11_re_etree.xpath('/html/body/div/section/div/section/div/div/div/div/ul/li[*]/a/@href')

   j = 0
   while j < len(url11_re_etree_title) :
         print(url11_re_etree_title)
         print(url + url11_re_etree_link)
         j = j+1

   i = i+1

KeyWord()

页: [1]

鱼C论坛's Archiver

求助大佬，这段爬虫代码有一部分看不懂