关于解析网页的问题
最近想学习一下网页解析,不知道各位大哥有没有vb.net的例子。
遇到的困难是这样:
1、寻找解析html的dll
2、dll需指定ID进行解析---寻找ID---使用xpath寻找ID---html不是标准的xml文件,需转换成标准的xml格式。
问题很多,感觉很迷惘。
我想解析百度的mp3.baidu.com,获取歌手列表。请高手指教。
[解决办法]
- C# code
using System;using System.Windows.Forms;namespace WindowsFormsApplication1{ public partial class Form1 : Form { public Form1() { InitializeComponent(); } WebBrowser html; private void Form1_Load(object sender, EventArgs e) { html = new WebBrowser(); html.DocumentText = "<html><head></head><body><div id=\"abc\">def</div></body></html>"; html.DocumentCompleted +=new WebBrowserDocumentCompletedEventHandler(html_DocumentCompleted); } private void html_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e) { MessageBox.Show(html.Document.GetElementById("abc").InnerHtml); } }}
[解决办法]
[解决办法]
<div class="singer"><a target="_blank" href="http://mp3.baidu.com/singerlist/刘德华.html" >刘德华</a>
<div class="singer"><a target="_blank" href="http://mp3.baidu.com/singerlist/凤凰传奇.html" >凤凰传奇</a>
<div class="singer"><a target="_blank" href="http://mp3.baidu.com/singerlist/李宇春.html" >李宇春</a>
......看出什么没?
'根据URL获取HTML
Private Function luck(ByVal url As String) As String
Dim wait As New Form
Dim l As New Label
l.Text = "正在查询网络数据...请稍候..."
wait.FormBorderStyle = Windows.Forms.FormBorderStyle.None
wait.ClientSize = New System.Drawing.Size(292, 64)
wait.WindowState = FormWindowState.Normal
wait.StartPosition = FormStartPosition.CenterScreen
l.Location = New System.Drawing.Point(55, 23)
wait.Controls.Add(l)
wait.Show()
wait.Refresh()
Try
Dim req As Net.HttpWebRequest = Net.WebRequest.Create(url)
Dim res As Net.HttpWebResponse = req.GetResponse()
Dim strm As IO.StreamReader = New IO.StreamReader(res.GetResponseStream(), System.Text.Encoding.GetEncoding(0))
luck = strm.ReadToEnd()
Catch ex As Exception
MsgBox("网络错误!" + ex.Message, MsgBoxStyle.Exclamation, "网络错误")
luck = ""
End Try
wait.Dispose()
End Function
'分析HTML,这部份我写得不好~分析部份建议你参考别的方法
Private Sub Button1_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Button1.Click
Dim url As String
url = "http://luck.cha.la/search/?kw=" & TextBox1.Text.Trim & "&kc=Cha.La&ow="
Dim str As Object = luck(url)
Dim st As String = "color:#f60;font-size:26px"
Dim over As String = "</div><br /><font color"
str = Split(str, st)
str = Split(str(1), over)
Dim show As Object
show = Replace(str(0), ";" & Chr(34) & ">", "")
show = Split(show, "</span><br />")
Label1.Text = show(0)
Label2.Text = show(1)
End Sub
原贴:http://topic.csdn.net/u/20090226/00/289ea2e6-b2a5-4e6d-a1e2-18dbe5fb60fc.html