博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
C# 抓取页面table数据并分析到数据库
阅读量:5064 次
发布时间:2019-06-12

本文共 9894 字,大约阅读时间需要 32 分钟。

//抓取数据并做分析(保存到数据库) public partial class Form1 : Form     {
DataTable dt = new DataTable(); public Form1() {
InitializeComponent(); label.Visible = false; progressBar1.Visible = false; } public void Bind() {
// string strStartCity = HttpUtility.UrlEncode(tbStartCity.Text, System.Text.Encoding.GetEncoding("gb2312")); // string strEndCtiy = HttpUtility.UrlEncode(tbEndCity.Text, System.Text.Encoding.GetEncoding("gb2312")); #region MyRegion string firstPage = "C:\\Documents and Settings\\Administrator\\桌面\\c.html"; //string firstPage = "http://www.linkosky.com/UI/AirTicket/SingleFlightShowAllV.aspx? CT=00&JT=01&OC=SHA&DD=2010-05-12&DT=00&DC=PEK&AL=ALL&DR=true&ET=True&SPID=00015032&ORGID=15144"; try {
WebClient astoWebClient = new WebClient(); astoWebClient.Credentials = CredentialCache.DefaultCredentials; //获取或设置用于对向Internet资源的请求进行身份验证的网络凭据。 Byte[] pageData = astoWebClient.DownloadData(firstPage); //从指定网站下载数据 string pageHtml = Encoding.Default.GetString(pageData); //获取的网站页面采用的是GB2312格式 //string pageHtml = Encoding.UTF8.GetString(pageData); //获取的网站页面采用的是UTF-8格式 pageHtml = pageHtml.Trim(); //先去掉头部多余的空格 int m = pageHtml.IndexOf("
"); //找出"
"的位置 if (m == -1) {
return; //没有查找到数据,直接返回 } string pageText = pageHtml.Remove(0, m + 18); //删除"
"以上的html文本 int n = pageText.IndexOf("
"); //找出"
"的位置 string keyText = pageText.Remove(n - 86); //删除"
"以下的html文本 GetData(keyText); } catch (WebException webEx) {
MessageBox.Show(webEx.ToString()); } #endregion } //分析HTML 数据 private void GetData(string ddd) {
System.Data.DataRow dr; dt = new DataTable(); dt.Columns.Add(new System.Data.DataColumn("航空公司", typeof(System.String))); dt.Columns.Add(new System.Data.DataColumn("航班号", typeof(System.String))); dt.Columns.Add(new System.Data.DataColumn("机型", typeof(System.String))); dt.Columns.Add(new System.Data.DataColumn("起飞时间-城市", typeof(System.String))); dt.Columns.Add(new System.Data.DataColumn("到达时间-城市", typeof(System.String))); dt.Columns.Add(new System.Data.DataColumn("舱位类型", typeof(System.String))); dt.Columns.Add(new System.Data.DataColumn("剩余座位", typeof(System.String))); dt.Columns.Add(new System.Data.DataColumn("票面价", typeof(System.String))); dt.Columns.Add(new System.Data.DataColumn("返点", typeof(System.String))); dt.Columns.Add(new System.Data.DataColumn("净价", typeof(System.String))); string fileConent = string.Empty; string tableContent = string.Empty; string rowContent = string.Empty; string columnConent = string.Empty; string rowPatterm = @"
]*>[\s\S]*?<\/tr>"; string columnPattern = @"
]*>[\s\S]*?<\/td>"; dr = dt.NewRow(); MatchCollection rowCollection = Regex.Matches(ddd, rowPatterm, RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture); //对tr进行筛选 for (int i = 1; i < rowCollection.Count; i++) {
rowContent = rowCollection[i].Value; MatchCollection columnCollection = Regex.Matches(rowContent, columnPattern, RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture); //对td进行筛选 if (i % 3 != 0) { #region 数据筛选 if (i > 2) {
if (i % 3 !=0 && i % 3 != 2) {
dr = dt.NewRow(); } } else {
if (i % 2 != 0) {
dr = dt.NewRow(); } } for (int j = 0; j < columnCollection.Count; j++) {
if (j < 5) {
columnConent = columnCollection[j].Value; int iBodyStart = columnConent.IndexOf(">", 0); int iTableEnd = columnConent.IndexOf("", iBodyStart); string strWeb = columnConent.Substring(iBodyStart + 1, iTableEnd - iBodyStart - 1); //获取最终数据 if (i > 2) {
if (i % 3 != 0 && i % 3 != 2) {
dr[j] = strWeb; } else {
dr[j + 5] = strWeb; } } else {
if (i % 2 != 0 ) {
dr[j] = strWeb; } else {
dr[j + 5] = strWeb; } } } } if (i > 2) {
if ((i % 3 == 0) || (i % 3 == 2)) {
dt.Rows.Add(dr); add(dr[0].ToString(), dr[1].ToString(), dr[2].ToString(), dr[3].ToString(), dr[4].ToString(), dr[5].ToString(), dr[6].ToString(), dr[7].ToString(), dr[8].ToString(), dr[9].ToString()); } } else {
if (i % 2 == 0) {
dt.Rows.Add(dr); add(dr[0].ToString(), dr[1].ToString(), dr[2].ToString(), dr[3].ToString(), dr[4].ToString(), dr[5].ToString(), dr[6].ToString(), dr[7].ToString(), dr[8].ToString(), dr[9].ToString()); } } #endregion } } } //添加到数据库 public void add(string fAirlineName,string fAirlineNo, string fAirlineType, string fsTime_City, string feTime_City, string fSeatType,string fSeatNum, string fPrice, string fBackNum, string fNetPrice) {
SqlParameter[] ps = new SqlParameter[] { new SqlParameter("@fAirlineName",fAirlineName), new SqlParameter("@fAirlineNo",fAirlineNo), new SqlParameter("@fAirlineType",fAirlineType), new SqlParameter("@fsTime_City",fsTime_City), new SqlParameter("@feTime_City",feTime_City), new SqlParameter("@fSeatType",fSeatType), new SqlParameter("@fSeatNum",fSeatNum), new SqlParameter("@fPrice",fPrice), new SqlParameter("@fBackNum",fBackNum), new SqlParameter("@fNetPrice",fNetPrice) }; try {
WindowsFormsApplication1.SqlHelper.RunProcedureReturnBool("tAirline_Add", ps); } catch (System.Exception e) {
throw e; } } --------------------- 以下数据和方法是用于在WINFROM下执行(以上部分是关键)---------------------------------- private void btnSearch_Click(object sender, EventArgs e) {
label.Text = "请稍后,系统正在解析数据..."; label.Visible = true; progressBar1.Visible = true; btnSearch.Enabled = false; worker = new BackgroundWorker(); worker.WorkerReportsProgress = true; worker.WorkerSupportsCancellation = true; worker.DoWork += new DoWorkEventHandler(worker_DoWork); worker.ProgressChanged += new ProgressChangedEventHandler(worker_ProgressChanged); worker.RunWorkerCompleted += new RunWorkerCompletedEventHandler(worker_RunWorkerCompleted); worker.RunWorkerAsync(); } private void worker_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e) {
try {
Bind(); if (e.Cancelled) {
label.Text = "Cancelled"; } else if (e.Error != null) {
label.Text = "Error"; } else {
btnSearch.Enabled = true; if (dt != null && dt.Rows.Count > 0) {
dataGridView1.DataSource = dt; } progressBar1.Value = 0; progressBar1.Visible = false; label.Visible = false; } } catch (Exception exts) {
MessageBox.Show(exts.ToString()); } } private void worker_DoWork(object sender, DoWorkEventArgs e) {
MoveList((BackgroundWorker)sender, e); } private BackgroundWorker worker = null; private void MoveList(BackgroundWorker backgroundWorker, DoWorkEventArgs e) {
for (int i = 0; i < 10; i++) {
if (worker.CancellationPending) {
e.Cancel = true; break; } else {
worker.ReportProgress((i + 1) * (100 / 10), i); Thread.Sleep(500); } } } private void worker_ProgressChanged(object sender, ProgressChangedEventArgs e) {
progressBar1.Value = e.ProgressPercentage; }

 

   最后执行结果如下图

 

转载于:https://www.cnblogs.com/wangchunming/archive/2012/04/01/2427899.html

你可能感兴趣的文章
编写一个函数isMerge,判断一个字符串str是否可以由其他两个字符串part1和part2“组合”而成...
查看>>
NYOJ-613//HDU-1176-免费馅饼,数字三角形的兄弟~~
查看>>
graphite custom functions
查看>>
一个自己写的判断2个相同对象的属性值差异的工具类
查看>>
oracle连接的三个配置文件(转)
查看>>
Python内置函数(29)——help
查看>>
oracle导出/导入 expdp/impdp
查看>>
Objective - C基础: 第四天 - 10.SEL类型的基本认识
查看>>
Android TextView加上阴影效果
查看>>
《梦断代码》读书笔记(三)
查看>>
Java8 Lambda表达应用 -- 单线程游戏server+异步数据库操作
查看>>
[Unity3D]Unity3D游戏开发MatchTarget的作用攀登效果实现
查看>>
AngularJS学习篇(一)
查看>>
关于Xshell无法连接centos6.4的问题
查看>>
css3动画——基本准则
查看>>
输入月份和日期,得出是今年第几天
查看>>
pig自定义UDF
查看>>
Kubernetes 运维学习笔记
查看>>
spring security 11种过滤器介绍
查看>>
代码实现导航栏分割线
查看>>