|
发表于 2018-4-3 16:21:43
|
显示全部楼层
似乎有一点懂非捕获组的意思le
>>> searchVsFindallStr = """
pic url test 1 http://1821.img.pp.sohu.com.cn/i ... 136ae35f9d5g213.jpg[/img]
pic url test 2 http://1881.img.pp.sohu.com.cn/i ... 136ae35ee46g213.jpg[/img]
pic url test 2 http://1802.img.pp.sohu.com.cn/i ... 136ae361ac6g213.jpg[/img]
"""
>>> singlep_nogroup='http://\w+\.\w+\.\w+.+?/\w+?.jpg'
>>> re.findall(singlep_nogroup, searchVsFindallStr)
['http://1821.img.pp.sohu.com.cn/images/blog/2012/3/7/23/28/u121516081_136ae35f9d5g213.jpg', 'http://1881.img.pp.sohu.com.cn/images/blog/2012/3/7/23/28/u121516081_136ae35ee46g213.jpg', 'http://1802.img.pp.sohu.com.cn/images/blog/2012/3/7/23/28/u121516081_136ae361ac6g213.jpg']
>>> singlep_nogroup='http://\w+.+?/\w+?.jpg'
>>> re.findall(singlep_nogroup, searchVsFindallStr)
['http://1821.img.pp.sohu.com.cn/images/blog/2012/3/7/23/28/u121516081_136ae35f9d5g213.jpg', 'http://1881.img.pp.sohu.com.cn/images/blog/2012/3/7/23/28/u121516081_136ae35ee46g213.jpg', 'http://1802.img.pp.sohu.com.cn/images/blog/2012/3/7/23/28/u121516081_136ae361ac6g213.jpg']
>>> singlep_noncapture='http://(?:\w+)\.(?:\w+)\.(?:\w+).+?/(?:\w+?).jpg'
#全都是非捕获组,则全都会返回,,返回的吗、内容不是包含子组内容的元组
>>> re.findall(singlep_noncapture, searchVsFindallStr)
['http://1821.img.pp.sohu.com.cn/images/blog/2012/3/7/23/28/u121516081_136ae35f9d5g213.jpg', 'http://1881.img.pp.sohu.com.cn/images/blog/2012/3/7/23/28/u121516081_136ae35ee46g213.jpg', 'http://1802.img.pp.sohu.com.cn/images/blog/2012/3/7/23/28/u121516081_136ae361ac6g213.jpg']
>>> singlep_noncapture='http://(\w+)\.(?:\w+)\.(?:\w+).+?/(?:\w+?).jpg'
#有一个是捕获组,则结果只返回捕获组的内容
>>> singlep_group='(http://)(\w+\.)(\w+\.)(\w+)(.+?/)(\w+?.)jpg'
>>> re.findall(singlep_group, searchVsFindallStr)
[('http://', '1821.', 'img.', 'pp', '.sohu.com.cn/images/blog/2012/3/7/23/28/', 'u121516081_136ae35f9d5g213.'), ('http://', '1881.', 'img.', 'pp', '.sohu.com.cn/images/blog/2012/3/7/23/28/', 'u121516081_136ae35ee46g213.'), ('http://', '1802.', 'img.', 'pp', '.sohu.com.cn/images/blog/2012/3/7/23/28/', 'u121516081_136ae361ac6g213.')]
#有子组,则返回的是元组
>>> re.findall(singlep_noncapture, searchVsFindallStr)
['1821', '1881', '1802']
>>> p=re.findall(r'(\d+)(?:\.?)(?:\d+)([$¥])$','1000.56$')
>>> p
[('1000', '$')]
>>> p=re.findall(r'(?:\d+)\.(?:\d+)([$¥])$','1000.56$')
>>> p
['$']
>>> p=re.findall(r'(?:\d+)\.(?:\d+)(?:[$¥])$','1000.56$')
>>> p
['1000.56$']
|
|