|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
刚接触爬虫,用requests.get() 方法获取网站html,用了简单的headers,就是一个User-Agent
程序没有报错,但是返回的html信息跟网页上看到的是不一样的,我也不知道返回的是什么页面
我输入的是什么值得买的一个搜索商品页面,在电脑上访问这个页面也没有提示需要登陆,返回的html比较简短,并不是商品页面,如下;请告诉我一下这个页面代表什么呢?
<script>
var search_domain = 'https://search.smzdm.com/';
var www_domain = 'https://www.smzdm.com/';
var search_url = '';
var keyword = GetQueryString('s');
var channel = GetQueryString('c');
var direct_url = GetQueryString('url');
var order = GetQueryString('order');
var p = GetQueryString('p');
var cate_id = GetQueryString('cate_id');
var mall_id = GetQueryString('mall_id');
var brand_id = GetQueryString('brand_id');
var min_price = GetQueryString('min_price');
var max_price = GetQueryString('max_price');
var channel_map = ['home','youhui','haitao','faxian','news','zhongce','zhiyou','coupon','second_hand','post'];
if ('' == keyword && '' == direct_url) {
window.location.href = www_domain;
}
if ('' == channel) {
channel = 'home';
}
if (channel_map.indexOf(channel) == -1) {
channel = 'home';
}
search_url += search_domain+'?c='+channel;
if (direct_url) {
search_url += "&url="+direct_url;
} else {
search_url += "&s="+keyword;
}
if (order) {
search_url += "&order=" + order;
}
if (mall_id) {
search_url += "&mall_id=" + mall_id;
}
if (cate_id) {
search_url += "&cate_id=" + cate_id;
}
if (brand_id) {
search_url += "&brand_id=" + brand_id;
}
if (min_price) {
search_url += "&min_price=" + min_price;
}
if (max_price) {
search_url += "&max_price=" + max_price;
}
var search_ab = GetQueryString('ss_ab');
// 分流
var r_w_map = new Array();
for (var i=1;i<=100;i++) {
// 80% 到a %20 到b.
if (i <= 50) {
r_w_map["ss"+i] = 'a';
}else{
r_w_map["ss"+i] = 'b';
}
}
if (typeof(r_w_map[search_ab]) !== "undefined") {
setCookie('ss_ab',search_ab, 3600 * 24 * 7, '/','.smzdm.com');
search_url += "&v="+ r_w_map[search_ab];
}
window.location.href = encodeURI(search_url);
/**
* 写cookie .
*
* @param cookieName
* @param cookieValue
* @param seconds
* @param path
* @param domain
* @param secure
*/
function setCookie(cookieName, cookieValue, seconds, path, domain, secure) {
seconds = seconds ? seconds : 604800;
seconds = seconds * 1000;
var expires = new Date();
expires.setTime(expires.getTime() + seconds);
document.cookie = escape(cookieName) + '=' + escape(cookieValue)
+ (expires ? '; expires=' + expires.toGMTString() : '')
+ (path ? '; path=' + path : '/')
+ (domain ? '; domain=' + domain : '')
+ (secure ? '; secure' : '');
return ;
}
/**
* 获取url 参数值.
*
* @param name
* @returns {*}
* @constructor
*/
function GetQueryString(name)
{
var reg = new RegExp("(^|&)"+ name +"=([^&]*)(&|$)");
var r = window.location.search.substr(1).match(reg);
//console.log(r);
if(r!=null)return decodeURI(r[2]); return '';
}
</script> |
|