|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
本帖最后由 ink_Ocelot 于 2020-5-3 21:13 编辑
- import urllib.request
- import re
- import time
- def open_url(url):
- req = urllib.request.Request(url)
- req.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0')
- page = urllib.request.urlopen(req)
- html = page.read().decode('utf-8')
- return html
- def get_ip(html):
- #r'(([0,1]?\d?\d│2[0-4]\d│25[0-5])\.{3})(([0,1]?\d?\d│2[0-4]\d│25[0-5])'
- #r'(?:(?:[0,1]?\d?\d|2[0-4]\d|25[0-5])\.){3}(?:[0,1]?\d?\d|2[0-4]\d|25[0-5])'
- p = r'(?:(?:[0,1]?\d?\d|2[0-4]\d|25[0-5])\.){3}(?:[0,1]?\d?\d|2[0-4]\d|25[0-5])'
- iplist = re.findall(p,html)
- # for each in iplist:
- # print(each)
- # for each in iplist:
- # print (each)
- for each in iplist:
- exec('ipwrite.write(\'' + each + '\n\')'
- if __name__ == '__main__':
- page_num = int(input('输入获取IP数量(以页面计):')) + 1
- exec('ipwrite = open(\'IP:page1-page' + str(page_num - 1) + '.txt\',\'w\')')
- for i in range(1,page_num):
- exec('url = \'https://www.xicidaili.com/nt/' + str(i) + '\'')
- get_ip(open_url(url))
- ipwrite.close()
- print ('IP爬取完成!')
- time.sleep (5)
复制代码
这个代码一直提示SyntaxError: invalid syntax
标红if __name__ == '__main__':中的':'
各位大佬知道这个问题怎么解决吗?
本帖最后由 hrp 于 2020-5-3 22:56 编辑
- import re
- import time
- import urllib.request
- def open_url(url):
- req = urllib.request.Request(url)
- req.add_header(
- 'User-Agent',
- 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0'
- )
- page = urllib.request.urlopen(req)
- html = page.read().decode('utf-8')
- print(html)
- return html
- def get_ip(html):
- #r'(([0,1]?\d?\d│2[0-4]\d│25[0-5])\.{3})(([0,1]?\d?\d│2[0-4]\d│25[0-5])'
- #r'(?:(?:[0,1]?\d?\d|2[0-4]\d|25[0-5])\.){3}(?:[0,1]?\d?\d|2[0-4]\d|25[0-5])'
- p = r'(?:(?:[0,1]?\d?\d|2[0-4]\d|25[0-5])\.){3}(?:[0,1]?\d?\d|2[0-4]\d|25[0-5])'
- iplist = re.findall(p, html)
- # for each in iplist:
- # print(each)
- # for each in iplist:
- # print (each)
- for each in iplist:
- # exec('ipwrite.write(\'' + each + '\n\')') # 最初报错的原因是这里少个回头的小括号,但还有其他问题。
- ipwrite.write(each + '\n') # 是不是想这么写?
- if __name__ == '__main__':
- page_num = int(input('输入获取IP数量(以页面计):'))
- ipwrite = open('IP-page1-page' + str(page_num) + '.txt', 'w') # 在win中冒号不能用作文件名!
- for i in range(1, page_num + 1):
- url = 'https://www.xicidaili.com/nt/' + str(i) # 你是不是想这么写?
- get_ip(open_url(url))
- ipwrite.close()
- print('IP爬取完成!')
- time.sleep(5)
复制代码
|
|