本帖最后由 fdfanmo 于 2022-9-20 15:27 编辑 import urllib.request
import re
import sys
url="https://pornchil.com/after-hours-exposed-siterip/#more-98114"
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36"
}
#創造一個response對象
request = urllib.request.Request(url=url,headers=headers)
#訪問url
response = urllib.request.urlopen(request)
#接收並轉碼讀取的原始碼
source_code = response.read().decode("utf-8")
print(source_code)
f=open("G:\\after-hours-exposed-siterip.txt","r")
movie_name = f.readlines()
#after-hours-exposed-siterip.txt内读取到的内容
after-hours-exposed-siterip=[
20171011_public_rooftop_blowjob_in_old_town_riga_latvia,
20200923_double_teen_blowjob_doing_makeup_then_cumblast_croatia_vacation_1,
20200429_pov_dildoing_and_pussy_eating_vanessa_klein_pov_misspussycat_1,
20200318_19yo_pretty_blonde_mia_back_for_a_nice_afternoon_blowjob,
20200115_19yo_mia_sucking_me_off_and_2_private_sex_tapes_from_her_phone,
20190619_teen_jete_first_forest_blowjob_and_mouth_cum_drool,
20190227_barely_18_alina_huge_tits_and_sucking_me_off_titty_fuck_until_mouth_cum,
20190206_nervous_18yo_alina_blowjob_handjob_combo_huge_tits_cumed_and_glaz,
20190213_dream_night_with_my_18yo_blonde_latvian_dream_girl_one_night_on_a_cruis,
20171011_public_rooftop_blowjob_in_old_town_riga_latvia,
20210825_real_highschool_cheerleader_nervously_gives_perfect_nice.blowjob_pov_di,
20210818_super_double_blowjob_miss_pussycat_and_spinner_blake,
20210428_pov_lesbian_miss_pussycat_ice_and_poprocks_pussy_licking,
20210210_big_boobed_paula_giving_sexy_pov_blowjob,
20210127_new_girl_18yo_kelly_anne_pov_pussy_licking_striptease_with_miss_pussyca,
]
for value in movie_name:
re_str = value.replace("_",".")
print(re_str)
url_link1 = re.search(rf"https.+{20171011.public.rooftop.blowjob.in.old.town.riga.latvia}+.mp4.html", source_code)
print(url_link1)
我想要取到这样的数据
利用after-hours-exposed-siterip里面的元素匹配到完整的网址
"https://rapidgator.net/file/d563e2e78556baa8f282bf97e3eb493d/20171011_public_rooftop_blowjob_in_old_town_riga_latvia.mp4.html"
表達式是"字符串"变数+"字符串"这样的的表達式
如果直接按你.rar文件里面的after-hours-exposed-siterip.txt读取的话, 最后的for循环改一下:
for value in movie_name[1:]:
url_link1 = rf'https://rapidgator.net/file/d563e2e78556baa8f282bf97e3eb493d/{value.strip()}.mp4.html'
print(url_link1)
|