一个简单的网页数据采集,请教大家有什么好方法
客户的需求是,在和他们同类型网站中采集实时的商品及价格。然后与自己的数据进行比对
打个比方说,客户和它的同行的网站中,几乎都有下面这样的表格显示
类别 价格
商品1 2.63元
商品2 3.41元
商品3 1.28元
在这里,商品的种类差不多都是一样的,都是那么几种,但各网站可能有不同的表现方法,
有的是用
<table>
<tr> <th> 类别 </th> <th> 价格 </th>
<tr> <td> 商品1 </td> <td> 2.63元 </td>
</table>
有的是用div的无序列表来显示。
而且出现的文字形式也有可能不同,例如:
类别 价格
商品一 $2.63
商品二 $3.41
商品三 $1.28
----------------------
我想先实现一个DEMO,大家看看怎么实现
假如有一个TextBox为txtUrl,输入客户同行的网站URL
然后将采集回来的数据经过过滤,返回“类别”和“价格”两种数据,然后写入到数据库
大家给提供个思路,以及关键的类和方法。有示例代码更好。
[解决办法]
//昨天刚写的一个临时用来学习的.你参考一下.using System;using System.Drawing;using System.Collections;using System.ComponentModel;using System.Windows.Forms;using System.Data;using System.Net;using System.Text.RegularExpressions;using System.Xml;using System.IO;namespace GetQ{/// <summary> /// Form1 的摘要说明。/// </summary> public class Form1 : System.Windows.Forms.Form{private System.Windows.Forms.Button button1;private System.Windows.Forms.RichTextBox txtLog;/// <summary> /// 必需的设计器变量。/// </summary> private System.ComponentModel.Container components = null;public Form1(){//// Windows 窗体设计器支持所必需的//InitializeComponent();//// TODO: 在 InitializeComponent 调用后添加任何构造函数代码//}/// <summary> /// 清理所有正在使用的资源。/// </summary> protected override void Dispose( bool disposing ){if( disposing ){if (components != null) {components.Dispose();}}base.Dispose( disposing );}#region Windows 窗体设计器生成的代码/// <summary> /// 设计器支持所需的方法 - 不要使用代码编辑器修改/// 此方法的内容。/// </summary> private void InitializeComponent(){System.Resources.ResourceManager resources = new System.Resources.ResourceManager(typeof(Form1));this.button1 = new System.Windows.Forms.Button();this.txtLog = new System.Windows.Forms.RichTextBox();this.SuspendLayout();// // button1// this.button1.Location = new System.Drawing.Point(424, 464);this.button1.Name = "button1 ";this.button1.TabIndex = 0;this.button1.Text = "开始 ";this.button1.Click += new System.EventHandler(this.button1_Click);// // txtLog// this.txtLog.Location = new System.Drawing.Point(32, 48);this.txtLog.Name = "txtLog ";this.txtLog.Size = new System.Drawing.Size(608, 360);this.txtLog.TabIndex = 1;this.txtLog.Text = " ";// // Form1// this.AutoScaleBaseSize = new System.Drawing.Size(6, 14);this.ClientSize = new System.Drawing.Size(728, 525);this.Controls.Add(this.txtLog);this.Controls.Add(this.button1);this.Icon = ((System.Drawing.Icon)(resources.GetObject( "$this.Icon ")));this.Name = "Form1 ";this.Text = "数据抓取 ";this.Load += new System.EventHandler(this.Form1_Load);this.ResumeLayout(false);}#endregionprivate WebClient c = null;void DoGet(){string Url = "http://www.gdgajj.com/wzks/index.jsp ";//WebClient c = new WebClient();byte[] b = this.c.DownloadData(Url); string strOrgHTML = System.Text.Encoding.Default.GetString(b);Regex reg = new Regex(@ " <script language= " "JavaScript " "> ([\s\S]*?) </script> ");//取得脚本 MatchCollection ms = reg.Matches(strOrgHTML);if(ms.Count < 2){MessageBox.Show( "没有解析,退出! ");return;}string sScript=ms[1].Result( "$1 ");//取得试题内容reg = new Regex(@ " <table width= " "100% " " border= " "0 " " cellpadding= " "2 " " cellspacing= " "1 " " bgcolor= " "#999999 " "> ([\s\S]*?) </table> ");ms = reg.Matches(strOrgHTML);if(ms.Count == 0){MessageBox.Show( "没有解析,退出! ");return;}string strHTML = ms[0].Result( "$1 ");strOrgHTML = " ";//处理试题strHTML=Regex.Replace(strHTML,@ " <(?!img|input)([^> \s]*)[^> ]*?> ", " <$1> ",RegexOptions.IgnoreCase);strHTML =Regex.Replace(strHTML, " </?div[^> ]*?> ", " ");strHTML =Regex.Replace(strHTML, " </?font[^> ]*?> ", " ");strHTML =Regex.Replace(strHTML,@ " <input type= " "radio " " name= " "([^ " "]*?) " "\s*value= " "([A-E]) " "> ", "^$1@$2@ ");strHTML =Regex.Replace(strHTML,@ " <img\s*src= " "([^ " "]*?) " "> ", " <img> $1 </img> ");strHTML= " <table> "+ strHTML + " </table> ";XmlDocument dom = new XmlDocument();dom.LoadXml(strHTML);//this.WriteLog(strHTML);//this.dataGrid1.DataSource = dom.SelectNodes( "//table ");XmlNodeList nl = dom.SelectNodes( "//tr ");this.SaveXml(nl,sScript);///保存这个xml文件 }void SaveXml(XmlNodeList nl,string sScript){string sPath = Path.Combine(Application.StartupPath, "data.xml ");XmlDocument dom = new XmlDocument();if(!File.Exists(sPath)){XmlNode nodeRoot = dom.CreateNode(XmlNodeType.Element, "root ", " ");dom.AppendChild(nodeRoot);}else{try{dom.Load(sPath);}catch(Exception er){MessageBox.Show( "读取xml出错!,请删除后重新生成! "+er.Message, "错误 ",MessageBoxButtons.OK,MessageBoxIcon.Error);return;}}//foreach(XmlNode node in nl)int iNewCount=0;for(int i=0;i <nl.Count;i++){XmlNode node =nl[i];if(i % 2 == 0 ){XmlNode root = dom.ChildNodes[0];string sName=node.InnerText;sName=sName.Substring(2).Trim();if(sName.Substring(0,1) == "、 "){sName = sName.Substring(1).Trim();}if(dom.SelectNodes( "//ask[@name= ' "+ sName+ " '] ").Count > 0){continue;}iNewCount ++;XmlNode ask = dom.CreateNode(XmlNodeType.Element, "ask ", " ");XmlAttribute at = dom.CreateAttribute( " ", "name ", " ");at.Value=sName;ask.Attributes.Append(at);root.AppendChild(ask);i++;node =nl[i];string[] ary =nl[i].ChildNodes[0].InnerText.Split( '^ ');string[] aryOptions = new string[]{ " "};for(int j=0;j <ary.Length;j++){if(ary[j].Trim() == " ") continue;aryOptions =ary[j].Split( '@ ');if(aryOptions.Length == 3){XmlNode nodeAnswer = dom.CreateNode(XmlNodeType.Element, "ans ", " ");nodeAnswer.InnerText=aryOptions[2];ask.AppendChild(nodeAnswer);XmlAttribute at0=dom.CreateAttribute( " ", "id ", " ");at0.Value=aryOptions[0];nodeAnswer.Attributes.Append(at0);at0=dom.CreateAttribute( " ", "aid ", " ");at0.Value=aryOptions[1];nodeAnswer.Attributes.Append(at0);}}//开始在脚本中寻找答案.同一题目的选项的radio的name都是一样的,随便找一个就好.//frmexam.radio37[i].valueRegex reg = new Regex(@ "frmexam\. "+aryOptions[0].Trim()+@ "\[i\]\.value\s*==\s* " "([A-E]) " " ");;Match m = reg.Match(sScript);if(m.Success){XmlAttribute QuestoinA=dom.CreateAttribute( " ", "answer ", " ");QuestoinA.Value=m.Result( "$1 ");ask.Attributes.Append(QuestoinA);}else{this.WriteLog( "没有找到正确答案! ");}//看有没有imagestring sImageURL=node.ChildNodes[1].InnerText.Trim();if(sImageURL != " "){//下载图片并保存string imgId = Guid.NewGuid().ToString();new WebClient().DownloadFile( "http://www.gdgajj.com "+sImageURL,imgId);XmlAttribute imgAt=dom.CreateAttribute( " ", "src ", " ");imgAt.Value=imgId;ask.Attributes.Append(imgAt);}} }dom.Save(sPath);this.WriteLog(DateTime.Now.ToString( "yyyy-MM-dd HH:mm:ss ")+ "\t本次搜索到 "+(nl.Count/2).ToString()+ "条,保存 "+iNewCount.ToString()+ "条,总计 "+dom.ChildNodes[0].ChildNodes.Count.ToString()+ "条 ");this.txtLog.Refresh();//MessageBox.Show( "完成 ", "OK ",MessageBoxButtons.OK,MessageBoxIcon.Information);}void WriteLog(string str){string sFileName=Path.Combine(Application.StartupPath, "log.txt ");StreamWriter sw = new StreamWriter(sFileName,true,System.Text.Encoding.Default);sw.Write(str + "\r\n ");sw.Close();this.txtLog.Text+=str+ "\r\n ";this.txtLog.SelectAll();this.txtLog.ScrollToCaret();}private void button1_Click(object sender, System.EventArgs e){this.txtLog.Focus();//for(int i=0;i <10;i++)//{System.Threading.Thread t = new System.Threading.Thread(new System.Threading.ThreadStart(this._Search));t.Start();//}}void _Search(){while(true){this.DoGet();}}private void Form1_Load(object sender, System.EventArgs e){ this.c= new WebClient();}}}
[解决办法]
一般来说都是用HttpWebRequest 或HttpWebClient先取得网页的源码,然后再通过正则过滤出想要的数据
这是抓取网页源代码的
string PageUrl = "............ ";
System.Net.HttpWebRequest request = (System.Net.HttpWebRequest)System.Net.WebRequest.Create(PageUrl);
request.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.1) Web-Sniffer/1.0.24 ";
System.Net.WebResponse response = request.GetResponse();
System.IO.Stream resStream = response.GetResponseStream();
System.IO.StreamReader sr = new System.IO.StreamReader(resStream, System.Text.Encoding.Default);
richTextBox1.Text = sr.ReadToEnd();
resStream.Close();
sr.Close();
正则需要根据实际情况来写
[解决办法]
//--当初 用来抓 天气信息的:) 应该对你有帮助
using System;
using System.Drawing;
using System.Collections;
using System.ComponentModel;
using System.Windows.Forms;
using System.Data;
using System.Data.SqlClient;
using System.Web;
using System.Net;
using System.IO;
using System.Threading;
namespace startCom
{
public delegate void myDelegate();
/// <summary>
/// Form1 的摘要说明。
/// </summary>
public class Form1 : System.Windows.Forms.Form
{
private System.Windows.Forms.TextBox textBox1;
private System.ComponentModel.IContainer components = null;
public event myDelegate myEventA;
public event myDelegate myEventB;
public Form1()
{
//
// Windows 窗体设计器支持所必需的
//
InitializeComponent();
//
// TODO: 在 InitializeComponent 调用后添加任何构造函数代码
//
}
/// <summary>
/// 清理所有正在使用的资源。
/// </summary>
protected override void Dispose( bool disposing )
{
if( disposing )
{
if (components != null)
{
components.Dispose();
}
}
base.Dispose( disposing );
}
#region Windows 窗体设计器生成的代码
/// <summary>
/// 设计器支持所需的方法 - 不要使用代码编辑器修改
/// 此方法的内容。
/// </summary>
private void InitializeComponent()
{
this.textBox1 = new System.Windows.Forms.TextBox();
this.SuspendLayout();
//
// textBox1
//
this.textBox1.Location = new System.Drawing.Point(0, 0);
this.textBox1.Multiline = true;
this.textBox1.Name = "textBox1 ";
this.textBox1.ScrollBars = System.Windows.Forms.ScrollBars.Both;
this.textBox1.Size = new System.Drawing.Size(504, 496);
this.textBox1.TabIndex = 0;
this.textBox1.Text = " ";
//
// Form1
//
this.AutoScaleBaseSize = new System.Drawing.Size(6, 14);
this.ClientSize = new System.Drawing.Size(504, 493);
this.Controls.Add(this.textBox1);
this.MinimumSize = new System.Drawing.Size(512, 520);
this.Name = "Form1 ";
this.StartPosition = System.Windows.Forms.FormStartPosition.CenterScreen;
this.Text = "服务器数据处理 ";
this.Closing += new System.ComponentModel.CancelEventHandler(this.Form1_Closing);
this.Load += new System.EventHandler(this.Form1_Load);
this.ResumeLayout(false);
}
#endregion
/// <summary>
/// 应用程序的主入口点。
/// </summary>
[STAThread]
static void Main()
{
Application.Run(new Form1());
}
private void Form1_Load(object sender, System.EventArgs e)
{
//timer1.Start();
//timer1.Interval = 1000;//86400000;
//timer1.Enabled = true;
////启动定时器
//timer1.Start();
//timer1.Tick += new EventHandler(doget);//时间到响应方法aa()
//timer1.Tick += new EventHandler(deltemp);
////timer1.Interval = 200;// 则是设定时间间隔,到了响应方法
////timer1.Stop(); //停止
//MessageBox.Show(DateTime.Now.Hour.ToString());
//button1.Enabled = false;
Thread thd = new Thread(new ThreadStart(doEvent));
thd.IsBackground = true;
thd.Start();
//thd.Abort();
//Thread.Sleep(new TimeSpan(100000));
//Thread.Sleep(100000);
}
private void doget()
{
string connStr = "机密去掉:) ";
SqlConnection conn = new SqlConnection(connStr);
SqlCommand comm ;
comm =new SqlCommand( "select cityname from city ",conn);
conn.Open();
SqlDataReader dr = comm.ExecuteReader();
textBox1.Text = "天气预报开始: "+System.DateTime.Now.ToString( "yyyyMMdd ")+ "\r\n ";
while(dr.Read())
{
string ct = dr[ "cityname "].ToString();
textBox1.Text += ct + "\r\n ";
try
{
string tmp = GetContentFromUrll( "http://www.cma.gov.cn/netcenter_news/qxyb/city/index.php?city= "+ System.Web.HttpUtility.UrlEncode(ct,System.Text.Encoding.GetEncoding( "gb2312 ")));
//Response.Write(( "成都 "));
tmp = tmp.Substring(tmp.IndexOf( " <!--未来天气预报开始--> "),tmp.IndexOf( " <!--指数预报结束--> ")-tmp.IndexOf( " <!--未来天气预报开始--> "));
string dt = getString(tmp,1) + "| " + getString(tmp,2) + "| "+getString(tmp,3);
string wt = getString(tmp,5) + "| " + getString(tmp,6) + "| "+getString(tmp,7);
string wd = getString(tmp,9) + "| " + getString(tmp,10) + "| "+getString(tmp,11);
string wl = getString(tmp,13) + "| " + getString(tmp,14) + "| "+getString(tmp,15);
string wen = getString(tmp,17) + "| " + getString(tmp,18) + "| "+getString(tmp,19);
string wu = " ";
if(tmp.IndexOf( "污染指数 ") != -1)
{
wu = "污染指数: "+getString(tmp,21) + " <br/> \n "+ "紫外线指数: "+getString(tmp,23)+ " <br/> \n "+ "舒适度指数: "+getString(tmp,25)+ " <br/> \n "+ "穿衣指数: "+getString(tmp,27);
}
//string sqlCmd = "insert into weather (city,[date],weather,windl,wind,wendu,wuran) values( ' "+ ct + " ', ' "+ dt + " ', ' "+ wt + " ', ' "+ wd + " ', ' "+ wl + " ', ' "+ wen+ " ', ' "+ wu + " ') ";
string sqlCmd = "update weather set [date]= ' "+ dt + " ',weather= ' "+wt+ " ',windl= ' "+ wd + " ',wind= ' "+ wl + " ',wendu= ' "+ wen + " ',wuran= ' "+ wu + " ' where city= ' "+ ct + " ' ";
SqlConnection con = new SqlConnection(connStr);
con.Open();
SqlCommand cmd = new SqlCommand(sqlCmd,con);
cmd.ExecuteNonQuery();
cmd.Dispose();
con.Close();
}
catch(System.Exception er)
{
textBox1.Text += er+ "\r\n ";
//Application.Exit();
}
//Application.DoEvents();
}
textBox1.Text += "天气预报结束\r\n ";
dr.Close();
conn.Close();
comm.Dispose();
conn.Dispose();
}
private void deltemp()
{
string path = "d:\\wap\\tmp\\ ";
DirectoryInfo DInfo=new DirectoryInfo(path);
//string sum= " ";
try
{
foreach(FileInfo NextFile in DInfo.GetFiles())
{
File.Delete(path+NextFile.Name);
}
Application.DoEvents();
textBox1.Text += "Tmp文件删除成功! "+System.DateTime.Now.ToString( "yyyyMMdd ")+ "\r\n ";;
}
catch(Exception er)
{
textBox1.Text += "文件删除有误! "+er.ToString();
}
}
private string getString(string tmp,int i)
{
string temp = tmp.ToLower();
int _index = temp.IndexOf( " <font ");
for(int n = 1 ; n < i ; n++)
{
_index = temp.IndexOf( " </font> ",_index,temp.Length-_index-1)+7;
}
//Response.Write(_index);
temp = temp.Substring(_index,temp.IndexOf( " </font> ",_index,temp.Length-_index-1)-_index);
return System.Text.RegularExpressions.Regex.Replace(System.Text.RegularExpressions.Regex.Replace(temp, " <(.|[\f\n\r\t\v])+?> |[\n\r\t] ", " ",System.Text.RegularExpressions.RegexOptions.IgnoreCase), "[( )|( )]+ ", " ",System.Text.RegularExpressions.RegexOptions.IgnoreCase);
}
private string GetContentFromUrll(string _requestUrl)
{
string _StrResponse = " ";
HttpWebRequest _WebRequest = ( HttpWebRequest )WebRequest.Create( _requestUrl );
_WebRequest.Method = "GET ";
WebResponse _WebResponse = _WebRequest.GetResponse();
StreamReader _ResponseStream = new StreamReader( _WebResponse.GetResponseStream(), System.Text.Encoding.GetEncoding( "gb2312 "));
_StrResponse = _ResponseStream.ReadToEnd();
_WebResponse.Close();
_ResponseStream.Close();
return _StrResponse;
}
void doEvent()
{
//int weather = int.Parse(weatherTime.Text);
//int del = int.Parse(fileTime.Text);
//if(weather < 1 || weather > 24 || del < 1 || del > 24)
//{
//MessageBox.Show( "时间输入有错! ");
//button1.Enabled = true;
//return ;
//}
while(true)
{
DateTime now = DateTime.Now;
if(now.Hour == 9)
{
myEventA = new myDelegate(doget);
}
if(now.Hour == 3)
{
myEventB = new myDelegate(deltemp);
}
if(myEventA != null) myEventA();
if(myEventB != null) myEventB();
Application.DoEvents();
Thread.Sleep(600000);
}
}
private void Form1_Closing(object sender, System.ComponentModel.CancelEventArgs e)
{
Application.Exit();
}
}
}
[解决办法]
你指定网页阿,不要整站啊!
[解决办法]
楼主到底是要采集什么,看你的代码,除了javascript和 <> 内的html标记外,楼主都存进arr里了,arr.Length当然会过长了,过滤出你想要的内容再存吧
[解决办法]
mark
[解决办法]
偶用webclienet 晚上回家把代码贴出来
[解决办法]
再帮你顶一下..LZ别急...
高人马上出现..就在LX
[解决办法]
你们可以去看看这个页面 http://www.igxe.com.cn/inc/DisplayOffer.cfm?gid=10
我要取出这个页面显示的所有值,并写入本地数据库。
有谁做过这样大量采集吗
================
这也叫大量?
数据处理上用正则
<td nowrap> .*?229:(.*?) </td> .*? <td> (.*?) </td> .*? <td> (.*?) </td>
组1-3 分别是服务器 价格 数量
还有上面的System.IO.StreamReader sr = new StreamReader(strm, System.Text.Encoding.Default);
改成UTF-8
[解决办法]
收藏
[解决办法]
mark
[解决办法]
using System;
using System.Drawing;
using System.Collections;
using System.ComponentModel;
using System.Windows.Forms;
using System.Data;
using System.Data.SqlClient;
using System.Web;
using System.Net;
using System.IO;
using System.Threading;
namespace startCom
{
public delegate void myDelegate();
/// <summary>
/// Form1 的摘要说明。
/// </summary>
public class Form1 : System.Windows.Forms.Form
{
private System.Windows.Forms.TextBox textBox1;
private System.ComponentModel.IContainer components = null;
public event myDelegate myEventA;
public event myDelegate myEventB;
public Form1()
{
//
// Windows 窗体设计器支持所必需的
//
InitializeComponent();
//
// TODO: 在 InitializeComponent 调用后添加任何构造函数代码
//
}
/// <summary>
/// 清理所有正在使用的资源。
/// </summary>
protected override void Dispose( bool disposing )
{
if( disposing )
{
if (components != null)
{
components.Dispose();
}
}
base.Dispose( disposing );
}
#region Windows 窗体设计器生成的代码
/// <summary>
/// 设计器支持所需的方法 - 不要使用代码编辑器修改
/// 此方法的内容。
/// </summary>
private void InitializeComponent()
{
this.textBox1 = new System.Windows.Forms.TextBox();
this.SuspendLayout();
//
// textBox1
//
this.textBox1.Location = new System.Drawing.Point(0, 0);
this.textBox1.Multiline = true;
this.textBox1.Name = "textBox1 ";
this.textBox1.ScrollBars = System.Windows.Forms.ScrollBars.Both;
this.textBox1.Size = new System.Drawing.Size(504, 496);
this.textBox1.TabIndex = 0;
this.textBox1.Text = " ";
//
// Form1
//
this.AutoScaleBaseSize = new System.Drawing.Size(6, 14);
this.ClientSize = new System.Drawing.Size(504, 493);
this.Controls.Add(this.textBox1);
this.MinimumSize = new System.Drawing.Size(512, 520);
this.Name = "Form1 ";
this.StartPosition = System.Windows.Forms.FormStartPosition.CenterScreen;
this.Text = "服务器数据处理 ";
this.Closing += new System.ComponentModel.CancelEventHandler(this.Form1_Closing);
this.Load += new System.EventHandler(this.Form1_Load);
this.ResumeLayout(false);
}
#endregion
/// <summary>
/// 应用程序的主入口点。
/// </summary>
[STAThread]
static void Main()
{
Application.Run(new Form1());
}
private void Form1_Load(object sender, System.EventArgs e)
{
//timer1.Start();
//timer1.Interval = 1000;//86400000;
//timer1.Enabled = true;
////启动定时器
//timer1.Start();
//timer1.Tick += new EventHandler(doget);//时间到响应方法aa()
//timer1.Tick += new EventHandler(deltemp);
////timer1.Interval = 200;// 则是设定时间间隔,到了响应方法
////timer1.Stop(); //停止
//MessageBox.Show(DateTime.Now.Hour.ToString());
//button1.Enabled = false;
Thread thd = new Thread(new ThreadStart(doEvent));
thd.IsBackground = true;
thd.Start();
//thd.Abort();
//Thread.Sleep(new TimeSpan(100000));
//Thread.Sleep(100000);
}
private void doget()
{
string connStr = "机密去掉:) ";
SqlConnection conn = new SqlConnection(connStr);
SqlCommand comm ;
comm =new SqlCommand( "select cityname from city ",conn);
conn.Open();
SqlDataReader dr = comm.ExecuteReader();
textBox1.Text = "天气预报开始: "+System.DateTime.Now.ToString( "yyyyMMdd ")+ "\r\n ";
while(dr.Read())
{
string ct = dr[ "cityname "].ToString();
textBox1.Text += ct + "\r\n ";
try
{
string tmp = GetContentFromUrll( "http://www.cma.gov.cn/netcenter_news/qxyb/city/index.php?city= "+ System.Web.HttpUtility.UrlEncode(ct,System.Text.Encoding.GetEncoding( "gb2312 ")));
//Response.Write(( "成都 "));
tmp = tmp.Substring(tmp.IndexOf( " <!--未来天气预报开始--> "),tmp.IndexOf( " <!--指数预报结束--> ")-tmp.IndexOf( " <!--未来天气预报开始--> "));
string dt = getString(tmp,1) + "| " + getString(tmp,2) + "| "+getString(tmp,3);
string wt = getString(tmp,5) + "| " + getString(tmp,6) + "| "+getString(tmp,7);
string wd = getString(tmp,9) + "| " + getString(tmp,10) + "| "+getString(tmp,11);
string wl = getString(tmp,13) + "| " + getString(tmp,14) + "| "+getString(tmp,15);
string wen = getString(tmp,17) + "| " + getString(tmp,18) + "| "+getString(tmp,19);
string wu = " ";
if(tmp.IndexOf( "污染指数 ") != -1)
{
wu = "污染指数: "+getString(tmp,21) + " <br/> \n "+ "紫外线指数: "+getString(tmp,23)+ " <br/> \n "+ "舒适度指数: "+getString(tmp,25)+ " <br/> \n "+ "穿衣指数: "+getString(tmp,27);
}
//string sqlCmd = "insert into weather (city,[date],weather,windl,wind,wendu,wuran) values( ' "+ ct + " ', ' "+ dt + " ', ' "+ wt + " ', ' "+ wd + " ', ' "+ wl + " ', ' "+ wen+ " ', ' "+ wu + " ') ";
string sqlCmd = "update weather set [date]= ' "+ dt + " ',weather= ' "+wt+ " ',windl= ' "+ wd + " ',wind= ' "+ wl + " ',wendu= ' "+ wen + " ',wuran= ' "+ wu + " ' where city= ' "+ ct + " ' ";
SqlConnection con = new SqlConnection(connStr);
con.Open();
SqlCommand cmd = new SqlCommand(sqlCmd,con);
cmd.ExecuteNonQuery();
cmd.Dispose();
con.Close();
}
catch(System.Exception er)
{
textBox1.Text += er+ "\r\n ";
//Application.Exit();
}
//Application.DoEvents();
}
textBox1.Text += "天气预报结束\r\n ";
dr.Close();
conn.Close();
comm.Dispose();
conn.Dispose();
}
private void deltemp()
{
string path = "d:\\wap\\tmp\\ ";
DirectoryInfo DInfo=new DirectoryInfo(path);
//string sum= " ";
try
{
foreach(FileInfo NextFile in DInfo.GetFiles())
{
File.Delete(path+NextFile.Name);
}
Application.DoEvents();
textBox1.Text += "Tmp文件删除成功! "+System.DateTime.Now.ToString( "yyyyMMdd ")+ "\r\n ";;
}
catch(Exception er)
{
textBox1.Text += "文件删除有误! "+er.ToString();
}
}
private string getString(string tmp,int i)
{
string temp = tmp.ToLower();
int _index = temp.IndexOf( " <font ");
for(int n = 1 ; n < i ; n++)
{
_index = temp.IndexOf( " </font> ",_index,temp.Length-_index-1)+7;
}
//Response.Write(_index);
temp = temp.Substring(_index,temp.IndexOf( " </font> ",_index,temp.Length-_index-1)-_index);
return System.Text.RegularExpressions.Regex.Replace(System.Text.RegularExpressions.Regex.Replace(temp, " <(.|[\f\n\r\t\v])+?> |[\n\r\t] ", " ",System.Text.RegularExpressions.RegexOptions.IgnoreCase), "[( )|( )]+ ", " ",System.Text.RegularExpressions.RegexOptions.IgnoreCase);
}
private string GetContentFromUrll(string _requestUrl)
{
string _StrResponse = " ";
HttpWebRequest _WebRequest = ( HttpWebRequest )WebRequest.Create( _requestUrl );
_WebRequest.Method = "GET ";
WebResponse _WebResponse = _WebRequest.GetResponse();
StreamReader _ResponseStream = new StreamReader( _WebResponse.GetResponseStream(), System.Text.Encoding.GetEncoding( "gb2312 "));
_StrResponse = _ResponseStream.ReadToEnd();
_WebResponse.Close();
_ResponseStream.Close();
return _StrResponse;
}
void doEvent()
{
//int weather = int.Parse(weatherTime.Text);
//int del = int.Parse(fileTime.Text);
//if(weather < 1 || weather > 24 || del < 1 || del > 24)
//{
//MessageBox.Show( "时间输入有错! ");
//button1.Enabled = true;
//return ;
//}
while(true)
{
DateTime now = DateTime.Now;
if(now.Hour == 9)
{
myEventA = new myDelegate(doget);
}
if(now.Hour == 3)
{
myEventB = new myDelegate(deltemp);
}
if(myEventA != null) myEventA();
if(myEventB != null) myEventB();
Application.DoEvents();
Thread.Sleep(600000);
}
}
private void Form1_Closing(object sender, System.ComponentModel.CancelEventArgs e)
{
Application.Exit();
}
}
}
学学