|
|
发表于 2019-5-6 21:53:38
|
显示全部楼层
凑活着用吧- # py3.7 pycharm
- import re
- html = '''<tr class="" onmousemove="cursorOver(this)" onmouseout="cursorOut(this)" onclick="selectResult('4512')">
- <td style="width:70px;" align="left">河西</td>
- <td style="width:80px;" align="left">公告</td>
- <td style="width:280px;" align="left">
- <a href="#this">监控类型</a>
- </td>
- <td style="width:100px" align="left">2018-4-1</td>
- </tr>
- <tr class="jtgs_table_td_bg" onmousemove="cursorOver(this)" onmouseout="cursorOut(this)" onclick="selectResult('4514')">
- <td style="width:70px;" align="left">中心</td>
- <td style="width:80px;" align="left">公告</td>
- <td style="width:280px;" align="left">
- <a href="#this" title="比选类型">监控数据模型...</a>
- </td>
- <td style="width:100px" align="left">2018-4-3</td>
- </tr>
- '''
- results= re.findall(r''''(\d+)'.*?left">(.*?)</td>.*?left">(.*?)</td>.*?((?<=this">)(.*?)(?=<)|(?<=title=")(.*?)(?=")).*?left">(.*?)</td>''',
- html,re.M|re.S)
- if results:
- for result in results:
- print([i.strip() for i in result if len(i)])
- # ['4512', '河西', '公告', '监控类型', '监控类型', '2018-4-1']
- # ['4514', '中心', '公告', '比选类型', '比选类型', '2018-4-3']
复制代码 |
|