读书人

高分在线求怎么获取网页代码

发布时间: 2012-10-05 15:34:34 作者: rapoo

高分在线求如何获取网页代码
获取网页源码我使用的方法如下:
WebClient Client = new WebClient();
Stream strm = Client.OpenRead(URL);
StreamReader sr = new StreamReader(strm, Encoding.Default);
txtCode.Text = sr.ReadToEnd();
sr.Close();
strm.Close();

但是最近获取卓越网页的时候,超过400页就获取不到了。
获取网址地址如下:
http://www.amazon.cn/gp/search/ref=sr_pg_403?rh=n%3A658390051&page=403&bbn=658391051&ie=UTF8&qid=1299726491&tab=books&pageTypeID=658390051#/ref=sr_pg_404?rh=n%3A658390051&page=404&bbn=658391051&ie=UTF8&qid=1299726576

请问如何实现获取,谢谢了。

[解决办法]
try
{
WebClient wc = new WebClient();

wc.Encoding = System.Text.Encoding.UTF8;

var html = wc.DownloadString("http://www.amazon.cn/gp/search/ref=sr_pg_403?rh=n%3A658390051&page=403&bbn=658391051&ie=UTF8&qid=1299726491&tab=books&pageTypeID=658390051#/ref=sr_pg_404?rh=n%3A658390051&page=404&bbn=658391051&ie=UTF8&qid=1299726576");

FileInfo file = new FileInfo(Server.MapPath("123.htm"));

if (file.Exists)
{
file.Delete();
}



using (var stream = file.Create())
{

var bytes = System.Text.Encoding.UTF8.GetBytes(html);

stream.Write(bytes, 0, bytes.Length);

stream.Close();

}
}
catch (Exception ex)
{
Response.Write(ex.Message);
}


我可以抓取数据啊 完全没问题啊
[解决办法]

C# code
using System;using System.Collections.Generic;using System.Linq;using System.Text;using System.Net;using System.IO;using System.IO.Compression;using System.Text.RegularExpressions;namespace WikiPageCreater.Common{    public class PageHelper    {        /// <summary>        /// 根据 url 获取网页编码        /// </summary>        /// <param name="url"></param>        /// <returns></returns>        public static string GetEncoding(string url)        {            HttpWebRequest request = null;            HttpWebResponse response = null;            StreamReader reader = null;            try            {                request = (HttpWebRequest)WebRequest.Create(url);                request.Timeout = 20000;                request.AllowAutoRedirect = false;                response = (HttpWebResponse)request.GetResponse();                if (response.StatusCode == HttpStatusCode.OK && response.ContentLength < 1024 * 1024)                {                    if (response.ContentEncoding != null && response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase))                        reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress));                    else                        reader = new StreamReader(response.GetResponseStream(), Encoding.ASCII);                    string html = reader.ReadToEnd();                    Regex reg_charset = new Regex(@"charset\b\s*=\s*(?<charset>[^""]*)");                    if (reg_charset.IsMatch(html))                    {                        return reg_charset.Match(html).Groups["charset"].Value;                    }                    else if (response.CharacterSet != string.Empty)                    {                        return response.CharacterSet;                    }                    else                        return Encoding.Default.BodyName;                }            }            catch            {            }            finally            {                if (response != null)                {                    response.Close();                    response = null;                }                if (reader != null)                    reader.Close();                if (request != null)                    request = null;            }            return Encoding.Default.BodyName;        }        /// <summary>        /// 根据 url 和 encoding 获取当前url页面的 html 源代码               /// </summary>        /// <param name="url"></param>        /// <param name="encoding"></param>        /// <returns></returns>        public static string GetHtml(string url, Encoding encoding)        {            HttpWebRequest request = null;            HttpWebResponse response = null;            StreamReader reader = null;            try            {                request = (HttpWebRequest)WebRequest.Create(url);                request.Timeout = 20000;                request.AllowAutoRedirect = false;                response = (HttpWebResponse)request.GetResponse();                if (response.StatusCode == HttpStatusCode.OK && response.ContentLength < 1024 * 1024)                {                    if (response.ContentEncoding != null && response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase))                        reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress), encoding);                    else                        reader = new StreamReader(response.GetResponseStream(), encoding);                    string html = reader.ReadToEnd();                    return html;                }            }            catch            {            }            finally            {                if (response != null)                {                    response.Close();                    response = null;                }                if (reader != null)                    reader.Close();                if (request != null)                    request = null;            }            return string.Empty;        }    }} 

读书人网 >C#

热点推荐