//抓取数据并做分析(保存到数据库) public partial class Form1 : Form { DataTable dt = new DataTable(); public Form1() { InitializeComponent(); label.Visible = false; progressBar1.Visible = false; } public void Bind() { // string strStartCity = HttpUtility.UrlEncode(tbStartCity.Text, System.Text.Encoding.GetEncoding("gb2312")); // string strEndCtiy = HttpUtility.UrlEncode(tbEndCity.Text, System.Text.Encoding.GetEncoding("gb2312")); #region MyRegion string firstPage = "C:\\Documents and Settings\\Administrator\\桌面\\c.html"; //string firstPage = "http://www.linkosky.com/UI/AirTicket/SingleFlightShowAllV.aspx? CT=00&JT=01&OC=SHA&DD=2010-05-12&DT=00&DC=PEK&AL=ALL&DR=true&ET=True&SPID=00015032&ORGID=15144"; try { WebClient astoWebClient = new WebClient(); astoWebClient.Credentials = CredentialCache.DefaultCredentials; //获取或设置用于对向Internet资源的请求进行身份验证的网络凭据。 Byte[] pageData = astoWebClient.DownloadData(firstPage); //从指定网站下载数据 string pageHtml = Encoding.Default.GetString(pageData); //获取的网站页面采用的是GB2312格式 //string pageHtml = Encoding.UTF8.GetString(pageData); //获取的网站页面采用的是UTF-8格式 pageHtml = pageHtml.Trim(); //先去掉头部多余的空格 int m = pageHtml.IndexOf(" "); //找出" "的位置 if (m == -1) { return; //没有查找到数据,直接返回 } string pageText = pageHtml.Remove(0, m + 18); //删除" "以上的html文本 int n = pageText.IndexOf(" "); //找出" "的位置 string keyText = pageText.Remove(n - 86); //删除" "以下的html文本 GetData(keyText); } catch (WebException webEx) { MessageBox.Show(webEx.ToString()); } #endregion } //分析HTML 数据 private void GetData(string ddd) { System.Data.DataRow dr; dt = new DataTable(); dt.Columns.Add(new System.Data.DataColumn("航空公司", typeof(System.String))); dt.Columns.Add(new System.Data.DataColumn("航班号", typeof(System.String))); dt.Columns.Add(new System.Data.DataColumn("机型", typeof(System.String))); dt.Columns.Add(new System.Data.DataColumn("起飞时间-城市", typeof(System.String))); dt.Columns.Add(new System.Data.DataColumn("到达时间-城市", typeof(System.String))); dt.Columns.Add(new System.Data.DataColumn("舱位类型", typeof(System.String))); dt.Columns.Add(new System.Data.DataColumn("剩余座位", typeof(System.String))); dt.Columns.Add(new System.Data.DataColumn("票面价", typeof(System.String))); dt.Columns.Add(new System.Data.DataColumn("返点", typeof(System.String))); dt.Columns.Add(new System.Data.DataColumn("净价", typeof(System.String))); string fileConent = string.Empty; string tableContent = string.Empty; string rowContent = string.Empty; string columnConent = string.Empty; string rowPatterm = @"]*>[\s\S]*?<\/tr>"; string columnPattern = @" ]*>[\s\S]*?<\/td>"; dr = dt.NewRow(); MatchCollection rowCollection = Regex.Matches(ddd, rowPatterm, RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture); //对tr进行筛选 for (int i = 1; i < rowCollection.Count; i++) { rowContent = rowCollection[i].Value; MatchCollection columnCollection = Regex.Matches(rowContent, columnPattern, RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture); //对td进行筛选 if (i % 3 != 0) { #region 数据筛选 if (i > 2) { if (i % 3 !=0 && i % 3 != 2) { dr = dt.NewRow(); } } else { if (i % 2 != 0) { dr = dt.NewRow(); } } for (int j = 0; j < columnCollection.Count; j++) { if (j < 5) { columnConent = columnCollection[j].Value; int iBodyStart = columnConent.IndexOf(">", 0); int iTableEnd = columnConent.IndexOf("", iBodyStart); string strWeb = columnConent.Substring(iBodyStart + 1, iTableEnd - iBodyStart - 1); //获取最终数据 if (i > 2) { if (i % 3 != 0 && i % 3 != 2) { dr[j] = strWeb; } else { dr[j + 5] = strWeb; } } else { if (i % 2 != 0 ) { dr[j] = strWeb; } else { dr[j + 5] = strWeb; } } } } if (i > 2) { if ((i % 3 == 0) || (i % 3 == 2)) { dt.Rows.Add(dr); add(dr[0].ToString(), dr[1].ToString(), dr[2].ToString(), dr[3].ToString(), dr[4].ToString(), dr[5].ToString(), dr[6].ToString(), dr[7].ToString(), dr[8].ToString(), dr[9].ToString()); } } else { if (i % 2 == 0) { dt.Rows.Add(dr); add(dr[0].ToString(), dr[1].ToString(), dr[2].ToString(), dr[3].ToString(), dr[4].ToString(), dr[5].ToString(), dr[6].ToString(), dr[7].ToString(), dr[8].ToString(), dr[9].ToString()); } } #endregion } } } //添加到数据库 public void add(string fAirlineName,string fAirlineNo, string fAirlineType, string fsTime_City, string feTime_City, string fSeatType,string fSeatNum, string fPrice, string fBackNum, string fNetPrice) { SqlParameter[] ps = new SqlParameter[] { new SqlParameter("@fAirlineName",fAirlineName), new SqlParameter("@fAirlineNo",fAirlineNo), new SqlParameter("@fAirlineType",fAirlineType), new SqlParameter("@fsTime_City",fsTime_City), new SqlParameter("@feTime_City",feTime_City), new SqlParameter("@fSeatType",fSeatType), new SqlParameter("@fSeatNum",fSeatNum), new SqlParameter("@fPrice",fPrice), new SqlParameter("@fBackNum",fBackNum), new SqlParameter("@fNetPrice",fNetPrice) }; try { WindowsFormsApplication1.SqlHelper.RunProcedureReturnBool("tAirline_Add", ps); } catch (System.Exception e) { throw e; } } --------------------- 以下数据和方法是用于在WINFROM下执行(以上部分是关键)---------------------------------- private void btnSearch_Click(object sender, EventArgs e) { label.Text = "请稍后,系统正在解析数据..."; label.Visible = true; progressBar1.Visible = true; btnSearch.Enabled = false; worker = new BackgroundWorker(); worker.WorkerReportsProgress = true; worker.WorkerSupportsCancellation = true; worker.DoWork += new DoWorkEventHandler(worker_DoWork); worker.ProgressChanged += new ProgressChangedEventHandler(worker_ProgressChanged); worker.RunWorkerCompleted += new RunWorkerCompletedEventHandler(worker_RunWorkerCompleted); worker.RunWorkerAsync(); } private void worker_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e) { try { Bind(); if (e.Cancelled) { label.Text = "Cancelled"; } else if (e.Error != null) { label.Text = "Error"; } else { btnSearch.Enabled = true; if (dt != null && dt.Rows.Count > 0) { dataGridView1.DataSource = dt; } progressBar1.Value = 0; progressBar1.Visible = false; label.Visible = false; } } catch (Exception exts) { MessageBox.Show(exts.ToString()); } } private void worker_DoWork(object sender, DoWorkEventArgs e) { MoveList((BackgroundWorker)sender, e); } private BackgroundWorker worker = null; private void MoveList(BackgroundWorker backgroundWorker, DoWorkEventArgs e) { for (int i = 0; i < 10; i++) { if (worker.CancellationPending) { e.Cancel = true; break; } else { worker.ReportProgress((i + 1) * (100 / 10), i); Thread.Sleep(500); } } } private void worker_ProgressChanged(object sender, ProgressChangedEventArgs e) { progressBar1.Value = e.ProgressPercentage; }
最后执行结果如下图