|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
前几天见一个大佬写了一个爬全部用户的个人信息,我就心血来潮写了一个输入uid查询的,初学python,写的不好的地方请多指教
还有就是注释起来的爬取头衔和个性签名的部分,不知道为什么只能爬小甲鱼的,到别人就会报错,报错内容:IndexError: list index out of range
下面上代码:
- import requests
- import re
- '''
- with open('fishc.txt','w') as g:
- g.write(response.text)
- '''
- z = 1
- def find_name():
-
- reg_name = r'"keywords" content="(.*?)"'
- find1 = re.findall(reg_name,response.text)
- if find1[0] == '':
- print('查无此人!')
- global z
- z = 0
- else:
- print('\t您现在看到的是:',find1[0])
- def find_all():
- '''
- emile = r'<li><em>邮箱状态</em>(.{3})</li>'
- find_emile = re.findall(emile,response.text)
- print('\t邮箱状态:',find_emile[0])
- gif = r'<li><em>视频认证</em>(.{3})</li>'
- find_gif = re.findall(gif,response.text)
- print('\t视频认证:',find_gif[0])
- header = r'<em>自定义头衔 </em>(.*?)</li><li><em class="xg1">个人签名 </em><table><tr><td>(.*?)</td></tr>'
- find_header = re.findall(header,response.text)
- print('\t自定义头衔:',find_header,'\n\t个性签名:',find_header)
- '''
- friend = r'"_blank">好友数 (.*?)</a>'
- find_friend = re.findall(friend,response.text)
- print('\t好友数:',find_friend[0])
- jilu = r'"_blank">记录数 (.*?)</a>'
- find_jilu = re.findall(jilu,response.text)
- print('\t记录数:',find_jilu[0])
- rizhi = r'"_blank">日志数 (.*?)</a>'
- find_rizhi = re.findall(rizhi,response.text)
- print('\t日志数:',find_rizhi[0])
- jpg = r'"_blank">相册数 (.*?)</a>'
- find_jpg = re.findall(jpg,response.text)
- print('\t相册数:',find_jpg[0])
- return1 = r'"_blank">回帖数 (.*?)</a>'
- find_return1 = re.findall(return1,response.text)
- print('\t回帖数:',find_return1[0])
- zhuti = r'"_blank">主题数 (.*?)</a>'
- find_zhuti = re.findall(zhuti,response.text)
- print('\t主题数:',find_zhuti[0])
- fx = r'"_blank">分享数 (.*?)</a>'
- find_fx = re.findall(fx,response.text)
- print('\t分享数:',find_fx[0])
- all = r'<li><em>(.*?)</em>(.*?)</li>'
- find_all0 = re.findall(all,response.text)
- for i in find_all0:
- print('\t',i[0],':',i[1])
- if __name__ == "__main__":
- uid = input('请输入你要查询用户的UID:')
- url = 'https://fishc.com.cn/space-uid-' + uid + '.html'
- response = requests.get(url)
- find_name()
- if z == 0:
- input('任意键继续...')
- else:
- find_all()
复制代码
|
|