c# 多线程挖泛目录标题
using System;
using System.Collections.Generic;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using System.Windows.Forms;
using Amib.Threading;
using CsharpHttpHelper;
using System.Threading;
using System.IO;
namespace Demo
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
CheckForIllegalCrossThreadCalls = false;
}
Thread obj;
private void button1_Click(object sender, EventArgs e)
{
obj = new Thread(new ThreadStart(delegate
{
obj.IsBackground = true;
int count = Convert.ToInt32(textBox3.Text.Trim());
var list = new List<int>(count);
for(int s=0;s<count;s++)
{
list.Add(s);
}
// Parallel.ForEach(list, (p, state) => { GetResultstring(p.ToString()); });
Parallel.ForEach(list, new ParallelOptions { MaxDegreeOfParallelism = 50 }, (p, state) => { GetResultstring(p.ToString()); });
}));
obj.Start();
}
int i = 1;
string path;
private string GetResultstring(string str)
{
try
{
HttpHelper http = new HttpHelper();
HttpItem item = new HttpItem
{
URL = textBox1.Text.Trim() + GetRandomString(7, true, true, false, false, "") + "/",
Method = "get",
UserAgent = "Mozilla/5.0 (compatible; Baiduspider-render/2.0; +http://www.baidu.com/search/spider.html)"
};
HttpResult result = http.GetHtml(item);
str = HttpHelper.GetHtmlTitle(result.Html);
label1.Text = "当前网址:" + item.URL;
string title = HttpHelper.GetBetweenHtml(str, "^", "_");
if (title == "")
{
title = HttpHelper.GetBetweenHtml(str, "^", "-");
}
string mc1 = @"[\u4e00-\u9fbb]";
str = unHTMLEncode(title);
// MessageBox.Show(str);
TextBox text = new TextBox();
MatchCollection mc = Regex.Matches(str, mc1);
foreach (Match m in mc)
{
text.AppendText(m.ToString());
}
string title1 = HttpHelper.GetBetweenHtml(textBox1.Text, "http://", "/");
path = AppDomain.CurrentDomain.BaseDirectory + "" + title1 + ".txt";
FileStream fs = new FileStream(path, FileMode.OpenOrCreate, FileAccess.Read, FileShare.ReadWrite);
if (text.TextLength != 0)
{
using (StreamWriter writer = new StreamWriter(path, true))
{
writer.WriteLine(text.Text);
}
textBox2.AppendText(text.Text + "\r\n");
textBox4.AppendText(text.Text+" 已经写入文件" + "\r\n");
}
label3.Text = "当前采集第:" + i++.ToString() + " 词";
}
catch(Exception ex)
{
}
return str;
}
/// <summary>
/// Unicode 转换 ASCII
/// </summary>
/// <param name="theText">解码内容</param>
/// <returns></returns>
public static string unHTMLEncode(string theText)
{
try
{
string output = string.Empty;
if (theText.Contains("&#"))
{
return System.Text.RegularExpressions.Regex.Replace(
theText,
@"&#(?<Value>[a-zA-Z0-9]{5});",
m =>
{
return ((char)int.Parse(m.Groups["Value"].Value)).ToString();
});
}
string[] stringArray = theText.Split(' ');
if (stringArray.Length == 1)
return theText;
for (int i = 0; i < stringArray.Length - 1; i++)
{
int n;
if (int.TryParse(stringArray[i], out n))
output += (char)int.Parse(stringArray[i]);
else
output += stringArray[i];
}
return output;
}
catch (Exception ex)
{
throw ex;
}
}
///<summary>
///生成随机字符串
///</summary>
///<param name="length">目标字符串的长度</param>
///<param name="useNum">是否包含数字,1=包含,默认为包含</param>
///<param name="useLow">是否包含小写字母,1=包含,默认为包含</param>
///<param name="useUpp">是否包含大写字母,1=包含,默认为包含</param>
///<param name="useSpe">是否包含特殊字符,1=包含,默认为不包含</param>
///<param name="custom">要包含的自定义字符,直接输入要包含的字符列表</param>
///<returns>指定长度的随机字符串</returns>
public static string GetRandomString(int length, bool useNum, bool useLow, bool useUpp, bool useSpe, string custom)
{
byte[] b = new byte[4];
new System.Security.Cryptography.RNGCryptoServiceProvider().GetBytes(b);
Random r = new Random(BitConverter.ToInt32(b, 0));
string s = null, str = custom;
if (useNum == true) { str += "0123456789"; }
if (useLow == true) { str += "abcdefghijklmnopqrstuvwxyz"; }
if (useUpp == true) { str += "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; }
if (useSpe == true) { str += "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; }
for (int i = 0; i < length; i++)
{
s += str.Substring(r.Next(0, str.Length - 1), 1);
}
return s;
}
private void button2_Click(object sender, EventArgs e)
{
obj.Abort();
}
private void button4_Click(object sender, EventArgs e)
{
}
private void button3_Click(object sender, EventArgs e)
{
}
}
}
using System.Collections.Generic;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using System.Windows.Forms;
using Amib.Threading;
using CsharpHttpHelper;
using System.Threading;
using System.IO;
namespace Demo
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
CheckForIllegalCrossThreadCalls = false;
}
Thread obj;
private void button1_Click(object sender, EventArgs e)
{
obj = new Thread(new ThreadStart(delegate
{
obj.IsBackground = true;
int count = Convert.ToInt32(textBox3.Text.Trim());
var list = new List<int>(count);
for(int s=0;s<count;s++)
{
list.Add(s);
}
// Parallel.ForEach(list, (p, state) => { GetResultstring(p.ToString()); });
Parallel.ForEach(list, new ParallelOptions { MaxDegreeOfParallelism = 50 }, (p, state) => { GetResultstring(p.ToString()); });
}));
obj.Start();
}
int i = 1;
string path;
private string GetResultstring(string str)
{
try
{
HttpHelper http = new HttpHelper();
HttpItem item = new HttpItem
{
URL = textBox1.Text.Trim() + GetRandomString(7, true, true, false, false, "") + "/",
Method = "get",
UserAgent = "Mozilla/5.0 (compatible; Baiduspider-render/2.0; +http://www.baidu.com/search/spider.html)"
};
HttpResult result = http.GetHtml(item);
str = HttpHelper.GetHtmlTitle(result.Html);
label1.Text = "当前网址:" + item.URL;
string title = HttpHelper.GetBetweenHtml(str, "^", "_");
if (title == "")
{
title = HttpHelper.GetBetweenHtml(str, "^", "-");
}
string mc1 = @"[\u4e00-\u9fbb]";
str = unHTMLEncode(title);
// MessageBox.Show(str);
TextBox text = new TextBox();
MatchCollection mc = Regex.Matches(str, mc1);
foreach (Match m in mc)
{
text.AppendText(m.ToString());
}
string title1 = HttpHelper.GetBetweenHtml(textBox1.Text, "http://", "/");
path = AppDomain.CurrentDomain.BaseDirectory + "" + title1 + ".txt";
FileStream fs = new FileStream(path, FileMode.OpenOrCreate, FileAccess.Read, FileShare.ReadWrite);
if (text.TextLength != 0)
{
using (StreamWriter writer = new StreamWriter(path, true))
{
writer.WriteLine(text.Text);
}
textBox2.AppendText(text.Text + "\r\n");
textBox4.AppendText(text.Text+" 已经写入文件" + "\r\n");
}
label3.Text = "当前采集第:" + i++.ToString() + " 词";
}
catch(Exception ex)
{
}
return str;
}
/// <summary>
/// Unicode 转换 ASCII
/// </summary>
/// <param name="theText">解码内容</param>
/// <returns></returns>
public static string unHTMLEncode(string theText)
{
try
{
string output = string.Empty;
if (theText.Contains("&#"))
{
return System.Text.RegularExpressions.Regex.Replace(
theText,
@"&#(?<Value>[a-zA-Z0-9]{5});",
m =>
{
return ((char)int.Parse(m.Groups["Value"].Value)).ToString();
});
}
string[] stringArray = theText.Split(' ');
if (stringArray.Length == 1)
return theText;
for (int i = 0; i < stringArray.Length - 1; i++)
{
int n;
if (int.TryParse(stringArray[i], out n))
output += (char)int.Parse(stringArray[i]);
else
output += stringArray[i];
}
return output;
}
catch (Exception ex)
{
throw ex;
}
}
///<summary>
///生成随机字符串
///</summary>
///<param name="length">目标字符串的长度</param>
///<param name="useNum">是否包含数字,1=包含,默认为包含</param>
///<param name="useLow">是否包含小写字母,1=包含,默认为包含</param>
///<param name="useUpp">是否包含大写字母,1=包含,默认为包含</param>
///<param name="useSpe">是否包含特殊字符,1=包含,默认为不包含</param>
///<param name="custom">要包含的自定义字符,直接输入要包含的字符列表</param>
///<returns>指定长度的随机字符串</returns>
public static string GetRandomString(int length, bool useNum, bool useLow, bool useUpp, bool useSpe, string custom)
{
byte[] b = new byte[4];
new System.Security.Cryptography.RNGCryptoServiceProvider().GetBytes(b);
Random r = new Random(BitConverter.ToInt32(b, 0));
string s = null, str = custom;
if (useNum == true) { str += "0123456789"; }
if (useLow == true) { str += "abcdefghijklmnopqrstuvwxyz"; }
if (useUpp == true) { str += "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; }
if (useSpe == true) { str += "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; }
for (int i = 0; i < length; i++)
{
s += str.Substring(r.Next(0, str.Length - 1), 1);
}
return s;
}
private void button2_Click(object sender, EventArgs e)
{
obj.Abort();
}
private void button4_Click(object sender, EventArgs e)
{
}
private void button3_Click(object sender, EventArgs e)
{
}
}
}
免责声明:本站所有资源均由网络搜集用户投稿而来,若有侵权,违法违规侵权内容请联系本站并提供必要证明,本站将及时删除违法违规内容,谢谢合作!