java爬虫之基于httpclient的简单Demo(二)
生活随笔
收集整理的這篇文章主要介紹了
java爬虫之基于httpclient的简单Demo(二)
小編覺得挺不錯的,現(xiàn)在分享給大家,幫大家做個參考.
轉(zhuǎn)載自?java爬蟲之基于httpclient的簡單Demo(二)
延續(xù)demo1的?java爬蟲的2種爬取方式(HTTP||Socket)簡單Demo(一),demo2出爐啦,大家想學(xué)爬蟲都可以從這個網(wǎng)盤學(xué)習(xí)哦:https://pan.baidu.com/s/1pJJrcqJ#list/path=%2F
免費(fèi)課程,非常不錯。其實(shí)還是主要學(xué)習(xí)一個httpclient,httpclient全是英文文檔,看的我心累啊
package com.simple.crawImpl; import com.simple.Icrawl.ICrawl; import com.simple.pojos.CrawlResultPojo; import com.simple.pojos.UrlPojo; import org.apache.http.HttpEntity; import org.apache.http.ParseException; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpUriRequest; import org.apache.http.client.methods.RequestBuilder; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.net.URI; import java.net.URISyntaxException; import java.util.HashMap; import java.util.Map; import java.util.Map.Entry; /** * * Created by lewis on 2016/10/16. */ public class HttpClientCrawlerImpl implements ICrawl{ public CloseableHttpClient httpClient = HttpClients.custom().build(); //創(chuàng)建定制HttpClient @Override public CrawlResultPojo crawl(UrlPojo urlpojo) { if(urlpojo==null){ return null; } CrawlResultPojo crawlResultPojo = new CrawlResultPojo(); //結(jié)果集 CloseableHttpResponse response = null; //HTTP返回的各種信息集合,包含協(xié)議http標(biāo)準(zhǔn),httpcode狀態(tài)碼 BufferedReader br = null; // try { HttpGet httpGet = new HttpGet(urlpojo.getUrl()); response = httpClient.execute(httpGet); HttpEntity entity = response.getEntity(); //獲取輸入流 InputStreamReader isr = new InputStreamReader(entity.getContent(),"utf-8"); //字節(jié)流轉(zhuǎn)化為字符流,設(shè)置編碼 br =new BufferedReader(isr); String line =null; StringBuilder context = new StringBuilder(); while((line=br.readLine())!=null){ context.append(line+"\n"); } crawlResultPojo.setSuccess(true); crawlResultPojo.setPageContent(context.toString()); return crawlResultPojo; } catch (IOException e) { e.printStackTrace(); crawlResultPojo.setSuccess(false); }finally { try { if (br!=null) br.close(); //關(guān)閉流 if(response!=null) response.close(); } catch (IOException e) { e.printStackTrace(); } } return crawlResultPojo; } /** * 帶參數(shù)post的urlpojo * */ public CrawlResultPojo crawl4Post(UrlPojo urlPojo){ if(urlPojo==null||urlPojo.getUrl()==null){ return null; } CrawlResultPojo crawlResultPojo = new CrawlResultPojo(); BufferedReader br= null; try { RequestBuilder rb = RequestBuilder.post().setUri(new URI(urlPojo.getUrl())); Map<String,Object> parasMap = urlPojo.getParasMap() ; if(parasMap!=null){ for(Entry<String,Object> entry:parasMap.entrySet()){ rb.addParameter(entry.getKey(),entry.getValue().toString()); } } HttpUriRequest httpUriRequest = rb.build(); HttpEntity entity =httpClient.execute(httpUriRequest).getEntity(); InputStreamReader isr=new InputStreamReader(entity.getContent(),"utf-8"); br = new BufferedReader(isr); String line = null; StringBuilder stringBuilder = new StringBuilder(); while((line=br.readLine())!=null){ stringBuilder.append(line+"\n"); } crawlResultPojo.setPageContent(stringBuilder.toString()); crawlResultPojo.setSuccess(true); return crawlResultPojo; } catch (URISyntaxException e) { e.printStackTrace(); } catch (ClientProtocolException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { try { if(br!=null) br.close(); } catch (IOException e) { e.printStackTrace(); } } crawlResultPojo.setSuccess(false); return crawlResultPojo; } public static void main(String []args){ HttpClientCrawlerImpl httpClientCrawlerImpl = new HttpClientCrawlerImpl(); String url = "http://www.wangdaizhijia.com/front_select-plat"; UrlPojo urlPojo = new UrlPojo(url); Map<String, Object> parasMap = new HashMap<String, Object>(); int max_page_number = 1000; parasMap.put("currPage", 30); parasMap.put("params", ""); parasMap.put("sort", 0); urlPojo.setParasMap(parasMap); CrawlResultPojo resultPojo = httpClientCrawlerImpl.crawl4Post(urlPojo); print(resultPojo); resultPojo=httpClientCrawlerImpl.crawl(urlPojo); print(resultPojo); } public static void print(Object s){ System.out.println(s); } }創(chuàng)作挑戰(zhàn)賽新人創(chuàng)作獎勵來咯,堅(jiān)持創(chuàng)作打卡瓜分現(xiàn)金大獎
總結(jié)
以上是生活随笔為你收集整理的java爬虫之基于httpclient的简单Demo(二)的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: JSOUP 教程—— Java爬虫,简易
- 下一篇: java爬虫的2种爬取方式(HTTP||