读书人

HttpClient使用

发布时间: 2012-09-01 09:33:02 作者: rapoo

HttpClient应用

用HttpClient爬网站时有时会遇到一种现象,就是自己写个JSP模拟表单提交可以成功,但是用HttpClient就无法成功。

原因有可能是Header或者Cookies没有设置,因为如果用游览器访问的话,这些参数都可以通过游览器而取到,不需要自己手动设置,而HttpClient则无法做到。所以我们需要对HttpClient进行设置,完整的代码如下:

?

?

?

?

public static String  HttpClientCIB(){      //添加header信息             ?List <Header> headers = new ArrayList <Header>();        headers.add(new Header("Referer", "http://wap.sududa.com/default.aspx"));        headers.add(new Header("User-Agent", "http://wap.sududa.com/default.aspx"));        headers.add(new Header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"));        headers.add(new Header("User-Agent", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)"));        headers.add(new Header("Accept-Language", "zh-cn,zh;q=0.5"));        headers.add(new Header("Host", "wap.sududa.com"));        headers.add(new Header("Accept-Charset", "GB2312,utf-8;q=0.7,*;q=0.7"));       HttpClient httpclient = new HttpClient();        httpclient.getHostConfiguration().getParams().setParameter("http.default-headers", headers);    httpclient.getHostConfiguration().setProxy( "202.84.17.41",8080);httpclient.getHttpConnectionManager().getParams().setConnectionTimeout(50);String result="";    PostMethod httppost = new PostMethod("http://wap.sududa.com/wap/default.aspx");           httppost.getParams().setContentCharset("GB2312");  httppost.setRequestHeader("Connection", "close");                 //添加两个Cookie信息  httpclient.getParams().setCookiePolicy(CookiePolicy.RFC_2109);//RFC_2109是支持较普遍的一个,还有其他cookie协议    HttpState initialState = new HttpState();      Cookie SUDUDA_COM_WapBalance = new Cookie();    SUDUDA_COM_WapBalance.setDomain("wap.sududa.com");    SUDUDA_COM_WapBalance.setPath("/");    SUDUDA_COM_WapBalance.setName("SUDUDA_COM_WapBalance");    SUDUDA_COM_WapBalance.setValue("0.000");      Cookie SUDUDA_COM_WapKey = new Cookie();    SUDUDA_COM_WapKey.setDomain("wap.sududa.com");    SUDUDA_COM_WapKey.setPath("/");    SUDUDA_COM_WapKey.setName("SUDUDA_COM_WapKey");    SUDUDA_COM_WapKey.setValue("56643CAF26F5A7751F1097F4B3D01AC4");          initialState.addCookie(SUDUDA_COM_WapBalance);    initialState.addCookie(SUDUDA_COM_WapKey);          httpclient.setState(initialState);      NameValuePair[] data = { new NameValuePair("__VIEWSTATE", "/wEPDwUKMjAzOTY2ODc0NmRkkLuRhKYz6SYsEOIBQM8bHuiWt2k="),new NameValuePair(".logontest", ""),new NameValuePair("Name", "readls@163.com"),new NameValuePair("Pass","changwei"),  };        httppost.setRequestBody(data);                InputStream is = null;        try {     httpclient.executeMethod(httppost);          BufferedReader bf = new BufferedReader(new InputStreamReader(httppost.getResponseBodyAsStream()));//  result=httppost.getResponseBodyAsStream();            String line;          StringBuffer paramter= new StringBuffer();          while ((line = bf.readLine()) != null){          //    System.out.println(line);                       paramter=paramter.append(line);          }          bf.close();                    result=paramter.toString();  result=result.replaceAll("\n","");          result=result.replaceAll("\r","");          result=result.replaceAll("\t","");             //      System.out.println("||"+result+"||");  }catch( Exception e){  e.printStackTrace();  }   finally {    httppost.releaseConnection();  }  return result;}想获得该网页的Header和Cookie信息,可以通过HttpWatch工具。

读书人网 >编程

热点推荐