读书人

简略HTTP抓取网页内容

发布时间: 2012-11-12 12:31:57 作者: rapoo

简单HTTP抓取网页内容

package com.test;import java.io.BufferedReader;import java.io.InputStream;import java.io.InputStreamReader;import java.net.HttpURLConnection;import java.net.URL;public class TestHttp {public static void main(String[] args) throws Exception {String urlString = "http://www.baidu.com";HttpURLConnection urlConnection = null;URL url = new URL(urlString);urlConnection = (HttpURLConnection) url.openConnection();urlConnection.setRequestMethod("GET");urlConnection.setDoOutput(true);urlConnection.setDoInput(true);urlConnection.setUseCaches(false);InputStream in = urlConnection.getInputStream();BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(in));StringBuffer temp = new StringBuffer();String line = bufferedReader.readLine();while (line != null) {temp.append(line);line = bufferedReader.readLine();}bufferedReader.close();String ecod = urlConnection.getContentEncoding();if (ecod == null)ecod = "UTF-8";String result = new String(temp.toString().getBytes(), ecod);System.out.println(result);}}

读书人网 >编程

热点推荐