华域联盟 .Net asp.net采集网页图片的具体方法

asp.net采集网页图片的具体方法

在网上找了下大多都是通过字符串操作找出img标签,这种方式操作起来比较麻烦,而且代码看起来比较累。这里我用的方法是通过WebBrowser来加载一个页面,然后HTMLDocument类来操作省去了字符串操作的步骤,直接调用GetElementsByTagName把所有图片地址返回到一个HtmlElementCollection对象里。

代码如下:

复制代码 代码如下:

using System;

using System.Collections.Generic;

using System.Linq;

using System.Text;

using System.Text.RegularExpressions;

using System.Net;

using System.IO;

using System.Windows.Forms;

namespace WindowsFormsApplication1

{

    public class GatherPic

    {

        private string savePath;

        private string getUrl;

        private WebBrowser wb;

        private int iImgCount;

        //初始化参数

        public GatherPic(string sWebUrl, string sSavePath)

        {

            this.getUrl = sWebUrl;

            this.savePath = sSavePath;

        }

        //开始采集

        public bool start()

        {

            if (getUrl.Trim().Equals(""))

            {

                MessageBox.Show("哪来的虾米连网址都没输!");

                return false;

            }

            this.wb = new WebBrowser();

            this.wb.Navigate(getUrl);

            //委托事件

            this.wb.DocumentCompleted += new System.Windows.Forms.WebBrowserDocumentCompletedEventHandler(DocumentCompleted);

            return true;

        }

        //WebBrowser.DocumentCompleted委托事件

        private void DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)

        {

            //页面里框架iframe加载完成不掉用SearchImgList()

            if (e.Url != wb.Document.Url) return;

            SearchImgList();

        }

        //检查出所有图片并采集到本地

        public void SearchImgList()

        {

            string sImgUrl;

            //取得所有图片地址

            HtmlElementCollection elemColl = this.wb.Document.GetElementsByTagName("img");

            this.iImgCount = elemColl.Count;

            foreach (HtmlElement elem in elemColl)

            {

                sImgUrl = elem.GetAttribute("src");

                //调用保存远程图片函数

                SaveImageFromWeb(sImgUrl, this.savePath);

            }

        }

        //保存远程图片函数

        public int SaveImageFromWeb(string imgUrl, string path)

        {

            string imgName = imgUrl.ToString().Substring(imgUrl.ToString().LastIndexOf("/") + 1);

            path = path + "\\" + imgName;

            string defaultType = ".jpg";

            string[] imgTypes = new string[] { ".jpg", ".jpeg", ".png", ".gif", ".bmp" };

            string imgType = imgUrl.ToString().Substring(imgUrl.ToString().LastIndexOf("."));

            foreach (string it in imgTypes)

            {

                if (imgType.ToLower().Equals(it))

                    break;

                if (it.Equals(".bmp"))

                    imgType = defaultType;

            }

            try

            {

                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(imgUrl);

                request.UserAgent = "Mozilla/6.0 (MSIE 6.0; Windows NT 5.1; Natas.Robot)";

                request.Timeout = 10000;

                WebResponse response = request.GetResponse();

                Stream stream = response.GetResponseStream();

                if (response.ContentType.ToLower().StartsWith("image/"))

                {

                    byte[] arrayByte = new byte[1024];

                    int imgLong = (int)response.ContentLength;

                    int l = 0;

                    // CreateDirectory(path);

                    FileStream fso = new FileStream(path, FileMode.Create);

                    while (l < imgLong)

                    {

                        int i = stream.Read(arrayByte, 0, 1024);

                        fso.Write(arrayByte, 0, i);

                        l += i;

                    }

                    fso.Close();

                    stream.Close();

                    response.Close();

                    return 1;

                }

                else

                {

                    return 0;

                }

            }

            catch (WebException)

            {

                return 0;

            }

            catch (UriFormatException)

            {

                return 0;

            }

        }

    }

}

//-----------------调用代码--------------------

GatherPic gatherpic = new GatherPic(“http://www.baidu.com”,"C:\test");

//请确保c:\下存在test路径

gatherpic.start()
您可能感兴趣的文章:

  • 利用MSXML2.XmlHttp和Adodb.Stream采集图片
  • asp.net(c#)做一个网页数据采集工具
  • asp.net c#采集需要登录页面的实现原理及代码
  • PHP远程采集图片详细教程
  • asp.net采集页面上所有图像图片资源的具体方法

本文由 华域联盟 原创撰写:华域联盟 » asp.net采集网页图片的具体方法

转载请保留出处和原文链接:https://www.cnhackhy.com/50663.htm

本文来自网络,不代表华域联盟立场,转载请注明出处。

作者: sterben

发表回复

联系我们

联系我们

2551209778

在线咨询: QQ交谈

邮箱: [email protected]

工作时间:周一至周五,9:00-17:30,节假日休息

关注微信
微信扫一扫关注我们

微信扫一扫关注我们

关注微博
返回顶部