HttpWebRequest获取不到源代码
Public Function httpsend_getGB2312(ByVal geturl As String) As String
Dim myHttpWebRequest As System.Net.HttpWebRequest
Dim myHttpWebResponse As System.Net.HttpWebResponse
Try
Dim URL As String = geturl
'Dim myUri As Uri = New Uri(geturl)
Dim myWebRequest As System.Net.WebRequest = System.Net.WebRequest.Create(URL)
myHttpWebRequest = CType(myWebRequest, System.Net.HttpWebRequest)
myHttpWebRequest.KeepAlive = True
myHttpWebRequest.Timeout = 300000
myHttpWebRequest.Method = "GET"
' myHttpWebRequest.ContentType = "text/html; charset=gb2312"
Dim myWebResponse As System.Net.WebResponse = myHttpWebRequest.GetResponse()
'获得响应信息
myHttpWebResponse = CType(myWebResponse, System.Net.HttpWebResponse)
Dim iStatCode As Integer = CInt(myHttpWebResponse.StatusCode)
myHttpWebRequest = CType(myWebRequest, System.Net.HttpWebRequest)
myHttpWebResponse = CType(myWebResponse, System.Net.HttpWebResponse)
Dim myStream As System.IO.Stream = myHttpWebResponse.GetResponseStream()
'Dim htmlCharset As String = "GBK"
Dim htmlEncoding As System.Text.Encoding = System.Text.Encoding.GetEncoding(936)
Dim srReader As System.IO.StreamReader = New System.IO.StreamReader(myStream, htmlEncoding)
Dim sTemp As String = srReader.ReadToEnd()
httpsend_getGB2312 = sTemp
' httpsend_get = UTF82Unicode(sTemp)
srReader.Close()
myStream.Close()
myWebResponse.Close()
myWebRequest.Abort()
Catch WebExcp As System.Net.WebException
'Response.Write(Replace(WebExcp.Message.ToString(), "The remote server returned an error: (500) Internal Server Error.", "服务器出现故障无法连接"))
httpsend_getGB2312 = Replace(WebExcp.Message.ToString(), "The remote server returned an error: (500) Internal Server Error.", "服务器出现故障无法连接")
Catch ex As Exception
'Response.Write(ex.ToString())
httpsend_getGB2312 = ex.ToString
End Try
End Function
代码是从网上找的,获取UTF-8格式的没问题,但是GB2312的就获取不到了, 下面这两句貌似不对
Dim htmlEncoding As System.Text.Encoding = System.Text.Encoding.GetEncoding(936)
Dim srReader As System.IO.StreamReader = New System.IO.StreamReader(myStream, htmlEncoding)
前面请求需要设定么
' myHttpWebRequest.ContentType = "text/html; charset=gb2312" 源代码 HttpWebRequest
[解决办法]
编码格式不对,要和服务器上的编码格式一样
[解决办法]
那个网页使用了gzip ,不是gbk的问题
所以请求头上要加 webrequest.headers.add("Accept-Encoding","gzip, deflate");
[解决办法]
System.IO.Stream response;
System.IO.StreamReader sr;
string result = string.Empty;
string domain = "http://xx.xxx.xx/thread0806.php?fid=7&search=&page=" + page;
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(domain);
request.Method = "GET";
request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
request.Headers.Set("Accept-Charset", "GBK,utf-8;q=0.7,*;q=0.3");
request.Headers.Set("Accept-Language", "zh-cn,zh;q=0.5");
request.Headers.Set("Accept-Encoding", "gzip,deflate,sdch");
request.Host = "xx.xxx.xx";
request.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.47 Safari/536.11";
request.KeepAlive = true;
HttpWebResponse httprp = (HttpWebResponse)request.GetResponse();
httprp.Headers.Set("Content-Encoding", "gzip");
response = httprp.GetResponseStream(); //重新修改后的代码
sr = new System.IO.StreamReader(new GZipStream(response, CompressionMode.Decompress), Encoding.GetEncoding("gb2312"));
result = sr.ReadToEnd();
response.Close();
sr.Close();
return result;
接收代码,并且需要用gzip的方式解码