Asp.Net网站和WinForm开发过程中,常常需要抓取某个网站或地址的源代码,所以写一个读取网页源代码工具类是有必要的:
/// <summary>
/// 网页操作类
/// </summary>
public class HTML
{
/// <summary>
/// 获取网页源代码
/// </summary>
/// <param name="url">URL路径</param>
/// <param name="encoding">编码方式</param>
/// <returns></returns>
public string GetHTML(string url, string encoding)
{
WebClient web = new WebClient();
byte[] buffer = web.DownloadData(url);
return Encoding.GetEncoding(encoding).GetString(buffer);
}
/// <summary>
/// WebClient读取源代码
/// </summary>
/// <param name="url"></param>
/// <param name="encoding"></param>
/// <returns></returns>
public string GetWebClient(string url, string encoding)
{
string strHTML = "";
WebClient myWebClient = new WebClient();
Stream myStream = myWebClient.OpenRead(url);
StreamReader sr = new StreamReader(myStream, System.Text.Encoding.GetEncoding(encoding));
strHTML = sr.ReadToEnd();
myStream.Close();
return strHTML;
}
/// <summary>
/// WebRequest读取源代码
/// </summary>
/// <param name="url"></param>
/// <param name="encoding"></param>
/// <returns></returns>
public string GetWebRequest(string url, string encoding)
{
Uri uri = new Uri(url);
WebRequest myReq = WebRequest.Create(uri);
WebResponse result = myReq.GetResponse();
Stream receviceStream = result.GetResponseStream();
StreamReader readerOfStream = new StreamReader(receviceStream, System.Text.Encoding.GetEncoding(encoding));
string strHTML = readerOfStream.ReadToEnd();
readerOfStream.Close();
receviceStream.Close();
result.Close();
return strHTML;
}
/// <summary>
/// HttpWebRequest读取源代码
/// </summary>
/// <param name="url"></param>
/// <param name="encoding"></param>
/// <returns></returns>
public string GetHttpWebRequest(string url, string encoding)
{
Uri uri = new Uri(url);
HttpWebRequest myReq = (HttpWebRequest)WebRequest.Create(uri);
myReq.UserAgent = "User-Agent:Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705";
myReq.Accept = "*/*";
myReq.KeepAlive = true;
myReq.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5");
HttpWebResponse result = (HttpWebResponse)myReq.GetResponse();
Stream receviceStream = result.GetResponseStream();
StreamReader readerOfStream = new StreamReader(receviceStream, System.Text.Encoding.GetEncoding(encoding));
string strHTML = readerOfStream.ReadToEnd();
readerOfStream.Close();
receviceStream.Close();
result.Close();
return strHTML;
}
}
评论列表: