Java爬虫 day2
今天学习的是带参数的get请求,代码如下:package test;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import java.io.IOException;
import java.net.URISyntaxException;
public class HttpGetParamTest {
public static void main(String[] args) throws URISyntaxException {
//创建HttpClient对象
CloseableHttpClient httpClient = HttpClients.createDefault();
//设置请求地址是:http://www.itcast.cn/search?keys=Java
//创建URIBuilder
URIBuilder uriBuilder = new URIBuilder("http://www.itcast.cn/search");
//设置参数
uriBuilder.setParameter("keys", "Java");
//创建HttpGet对象发起get请求,设置url访问地址
// HttpGet httpGet = new HttpGet("http://www.itcast.cn");
HttpGet httpGet = new HttpGet(uriBuilder.build());
httpGet.setHeader("User-Agent","Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36");
System.out.println("发起请求的地址是:" + httpGet);
CloseableHttpResponse response = null;
try {
//使用HttpClient对象发起请求,获取response
response = httpClient.execute(httpGet);
//解析响应,获取数据
//判断状态码是否是200
if (response.getStatusLine().getStatusCode() == 200) {
HttpEntity httpEntity = response.getEntity();
String content = EntityUtils.toString(httpEntity, "utf8");
System.out.println("内容的长度是:"+content.length());
}else
{
//如果返回状态不是200,比如404(页面不存在)等,根据情况做处理,这里略
System.out.println("返回状态不是200");
System.out.println(EntityUtils.toString(response.getEntity(), "utf-8"));
}
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
response.close();
} catch (IOException e) {
e.printStackTrace();
}
try {
httpClient.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
个人感觉带参数的方法挺麻烦的,要创建一个URIBuilder对象,还要调用方法将参数贴合进url里。我觉着与其这样,还不如用字符串的加法方便,将url利用加法整合完以后直接当作HttpGet的参数也挺香的。
当然这只是一己之见,不喜勿喷{:5_109:}
页:
[1]