| 
 | 
 
 
发表于 2018-4-3 16:21:43
|
显示全部楼层
 
 
 
似乎有一点懂非捕获组的意思le 
>>> searchVsFindallStr = """ 
pic url test 1 http://1821.img.pp.sohu.com.cn/i ... 136ae35f9d5g213.jpg[/img] 
pic url test 2 http://1881.img.pp.sohu.com.cn/i ... 136ae35ee46g213.jpg[/img] 
pic url test 2 http://1802.img.pp.sohu.com.cn/i ... 136ae361ac6g213.jpg[/img] 
""" 
>>> singlep_nogroup='http://\w+\.\w+\.\w+.+?/\w+?.jpg' 
>>>  re.findall(singlep_nogroup, searchVsFindallStr) 
['http://1821.img.pp.sohu.com.cn/images/blog/2012/3/7/23/28/u121516081_136ae35f9d5g213.jpg', 'http://1881.img.pp.sohu.com.cn/images/blog/2012/3/7/23/28/u121516081_136ae35ee46g213.jpg', 'http://1802.img.pp.sohu.com.cn/images/blog/2012/3/7/23/28/u121516081_136ae361ac6g213.jpg'] 
 
>>> singlep_nogroup='http://\w+.+?/\w+?.jpg' 
>>> re.findall(singlep_nogroup, searchVsFindallStr) 
['http://1821.img.pp.sohu.com.cn/images/blog/2012/3/7/23/28/u121516081_136ae35f9d5g213.jpg', 'http://1881.img.pp.sohu.com.cn/images/blog/2012/3/7/23/28/u121516081_136ae35ee46g213.jpg', 'http://1802.img.pp.sohu.com.cn/images/blog/2012/3/7/23/28/u121516081_136ae361ac6g213.jpg'] 
>>> singlep_noncapture='http://(?:\w+)\.(?:\w+)\.(?:\w+).+?/(?:\w+?).jpg' 
#全都是非捕获组,则全都会返回,,返回的吗、内容不是包含子组内容的元组 
 
>>> re.findall(singlep_noncapture, searchVsFindallStr) 
['http://1821.img.pp.sohu.com.cn/images/blog/2012/3/7/23/28/u121516081_136ae35f9d5g213.jpg', 'http://1881.img.pp.sohu.com.cn/images/blog/2012/3/7/23/28/u121516081_136ae35ee46g213.jpg', 'http://1802.img.pp.sohu.com.cn/images/blog/2012/3/7/23/28/u121516081_136ae361ac6g213.jpg'] 
>>> singlep_noncapture='http://(\w+)\.(?:\w+)\.(?:\w+).+?/(?:\w+?).jpg' 
#有一个是捕获组,则结果只返回捕获组的内容 
 
>>> singlep_group='(http://)(\w+\.)(\w+\.)(\w+)(.+?/)(\w+?.)jpg' 
>>> re.findall(singlep_group, searchVsFindallStr) 
[('http://', '1821.', 'img.', 'pp', '.sohu.com.cn/images/blog/2012/3/7/23/28/', 'u121516081_136ae35f9d5g213.'), ('http://', '1881.', 'img.', 'pp', '.sohu.com.cn/images/blog/2012/3/7/23/28/', 'u121516081_136ae35ee46g213.'), ('http://', '1802.', 'img.', 'pp', '.sohu.com.cn/images/blog/2012/3/7/23/28/', 'u121516081_136ae361ac6g213.')] 
#有子组,则返回的是元组 
 
>>> re.findall(singlep_noncapture, searchVsFindallStr) 
['1821', '1881', '1802'] 
 
>>> p=re.findall(r'(\d+)(?:\.?)(?:\d+)([$¥])$','1000.56$') 
>>> p 
[('1000', '$')] 
>>> p=re.findall(r'(?:\d+)\.(?:\d+)([$¥])$','1000.56$') 
>>> p 
['$'] 
>>> p=re.findall(r'(?:\d+)\.(?:\d+)(?:[$¥])$','1000.56$') 
>>> p 
['1000.56$'] 
 
 |   
 
 
 
 |