读书人

关于WebClient.HttpRequest,XMLHTTP,W

发布时间: 2012-10-13 11:38:17 作者: rapoo

关于WebClient.HttpRequest,XMLHTTP,WebBrowser与IE访问的区别..
现在有一个需求。比如搜索
准备怀孕要做哪些检查 在百度中我公司网站的排名...
需求挺简单的
~~~
大家可以测试一下.使用各种方式

如WebClient,HttpRequest,XMLhttp,WebBrowser去请求百度,取到的HTML源码不尽相同!!!
比如请求
http://www.baidu.com/s?wd=准备怀孕要做哪些检查
将出现以下问题

问题1 : HTML源码不同,导致排位不同,
问题2 : 如果某一种方式请求过多,会出现验证码,但是这时候用IE去访问,是不需要验证码的。

我的分析是
百度可以区分来自浏览器访问 与 各种Com组件或是代码方式的请求

但是:
我使用各种HTTP访问监视工具,如
Fiddler记录下IE的请求时发送的头信息,然后用HttpRequest去一一对应·还是出现这样的问题


下面发代码!
使用HttpRequest自定义方式

C# code
        public static string DownLoadHtml(string url)        {            try            {                CookieContainer c = new CookieContainer();                Uri u = new Uri("http://www.baidu.com");                CookieCollection ccs = new CookieCollection();                ccs.Add(new Cookie("BAIDUID", System.Guid.NewGuid().ToString().ToUpper().Replace("-", "") + ":FG=1"));                c.Add(u, ccs);                HttpWebRequest r = (HttpWebRequest)WebRequest.Create(url);                                //r.Headers["Cache-Control"] = "no-cache";                //r.Headers["Pragma"] = "no-cache";                r.UserAgent = @"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET4.0C; .NET4.0E)";                r.Accept = @"*/*";                r.Host = "www.baidu.com";                r.Headers["Accept-Encoding"] = "gzip, deflate";                r.Headers["Accept-Language"] = "zh-cn";                r.Method = "get";                                //r.Referer = "http://www.baidu.com";                r.CookieContainer = c;                r.AllowAutoRedirect = true;                HttpWebResponse rep = (HttpWebResponse)r.GetResponse();                                Stream receiveStream = rep.GetResponseStream();                      string data = string.Empty;                 string sResponseHeader = rep.Headers["Content-Encoding"];                 if (!string.IsNullOrEmpty(sResponseHeader))                {                    if (sResponseHeader.ToLower().Contains("gzip"))                    {                        byte[] b = DecompressGzip(receiveStream);                        data = System.Text.Encoding.GetEncoding("gb2312").GetString(b);                    }                    else if (sResponseHeader.ToLower().Contains("deflate"))                    {                        byte[] b = DecompressDeflate(receiveStream);                        data = System.Text.Encoding.GetEncoding("gb2312").GetString(b);                     }                }                //                RegexOptions options = RegexOptions.None | RegexOptions.Singleline;                Regex regex = new Regex("<title>(?<title>.*?)</title>", options);                MatchCollection matches = regex.Matches(data);                if (matches != null)                {                    foreach (Match m in matches)                    {                        if (m.Groups["title"].Value.IndexOf("您的访问出错了") >= 0)                        {                            data = "err:访问出错,需填写验证码";                            break;                        }                    }                }                return data;            }            catch(Exception er)            {                return "err:"+er.Message;            }        }        private static byte[] DecompressGzip(Stream streamInput)        {            Stream streamOutput = new MemoryStream();            int iOutputLength = 0;            try            {                byte[] readBuffer = new byte[4096];                /// read from input stream and write to gzip stream                using (GZipStream streamGZip = new GZipStream(streamInput, CompressionMode.Decompress))                {                    int i;                    while ((i = streamGZip.Read(readBuffer, 0, readBuffer.Length)) != 0)                    {                        streamOutput.Write(readBuffer, 0, i);                        iOutputLength = iOutputLength + i;                    }                }            }            catch             {                // todo: handle exception            }            /// read uncompressed data from output stream into a byte array            byte[] buffer = new byte[iOutputLength];            streamOutput.Position = 0;            streamOutput.Read(buffer, 0, buffer.Length);            return buffer;        }        private static byte[] DecompressDeflate(Stream streamInput)        {            Stream streamOutput = new MemoryStream();            int iOutputLength = 0;            try            {                byte[] readBuffer = new byte[4096];                /// read from input stream and write to gzip stream                using (DeflateStream streamGZip = new DeflateStream(streamInput, CompressionMode.Decompress))                {                    int i;                    while ((i = streamGZip.Read(readBuffer, 0, readBuffer.Length)) != 0)                    {                        streamOutput.Write(readBuffer, 0, i);                        iOutputLength = iOutputLength + i;                    }                }            }            catch             {                // todo: handle exception            }            /// read uncompressed data from output stream into a byte array            byte[] buffer = new byte[iOutputLength];            streamOutput.Position = 0;            streamOutput.Read(buffer, 0, buffer.Length);            return buffer;        } 




使用XMLHTTP方式
C# code
        public static void GetHtml(WordShowModel wsm)        {            bool hasFailed = false;            string text = "";            try            {                            Encoding gb2312 = Encoding.GetEncoding("gb2312");                string m = System.Web.HttpUtility.UrlEncode(wsm.showword, gb2312);                var url = string.Format(@"http://www.baidu.com/s?wd={0}&rsv_bp=0&rsv_spt=3&inputT={1}", m, (new Random()).Next(90000));                wsm.errmsg = "正在搜索";                MSXML2.ServerXMLHTTP wc = new MSXML2.ServerXMLHTTP();                //wc.setRequestHeader("Content-Type", "text/html");                //wc.setRequestHeader("Referer", "www.baidu.com");                //wc.setRequestHeader("Cookie", "BAIDUID=" + System.Guid.NewGuid().ToString().ToUpper().Replace("-", "") + ":FG=1");                wc.open("GET",url, false, null, null);                wc.send("");                if (wc.readyState == 4)                {                    ADODB.StreamClass stream = new ADODB.StreamClass();                    stream.Type = ADODB.StreamTypeEnum.adTypeBinary;                    stream.Mode =  ADODB.ConnectModeEnum.adModeReadWrite;                    stream.Open();                    stream.Write(wc.responseBody);                    stream.Position = 0;                    stream.Type =  ADODB.StreamTypeEnum.adTypeText;                    stream.Charset = "gb2312";                    text = stream.ReadText();                    stream.Close();                    stream = null;                }                else                {                    text = "";                }                wsm.HTML = text;            }            catch (Exception exception1)            {                wsm.errmsg = exception1.Message;                hasFailed = true;            }      }



WebClient与WebBrowser就不发了~~~大同小异........



神呀我要疯了~~~~~~

百度你早点灭亡吧



[解决办法]
百度你早点灭亡吧
[解决办法]
不懂!仅仅为了接分!
[解决办法]
百度他妈试试。
[解决办法]
WebClient, WebRequest都无法支持java script运行.
WebBrowser可以.
http://www.codeproject.com/Articles/50544/Using-the-WebBrowser-Control-in-ASP-NET

[解决办法]
好吧,我承认我没看懂代码
[解决办法]

读书人网 >asp.net

热点推荐