C# 中的 HTML 转图像
将 HTML 文档捕获为图像。
引言
在本文中,我将向您展示如何使用 WebBrowser
对象和 IViewObject.Draw
方法来捕获 HTML 文档为图像。根据 MSDN 的说法,该方法将对象的表示形式绘制到指定的设备上下文上。 在我们开始之前,我想提到一下,获得的结果与使用商业库获得的结果相同,所以我希望这对某人有用。
IViewObject 接口
我们首先要做的事情是定义 IViewObject
接口。
[ComVisible(true), ComImport()]
[GuidAttribute("0000010d-0000-0000-C000-000000000046")]
[InterfaceTypeAttribute(ComInterfaceType.InterfaceIsIUnknown)]
public interface IViewObject
{
[return: MarshalAs(UnmanagedType.I4)]
[PreserveSig]
int Draw(
[MarshalAs(UnmanagedType.U4)] UInt32 dwDrawAspect,
int lindex,
IntPtr pvAspect,
[In] IntPtr ptd,
IntPtr hdcTargetDev,
IntPtr hdcDraw,
[MarshalAs(UnmanagedType.Struct)] ref Rectangle lprcBounds,
[MarshalAs(UnmanagedType.Struct)] ref Rectangle lprcWBounds,
IntPtr pfnContinue,
[MarshalAs(UnmanagedType.U4)] UInt32 dwContinue);
[PreserveSig]
int GetColorSet([In, MarshalAs(UnmanagedType.U4)] int dwDrawAspect,
int lindex, IntPtr pvAspect,[In] IntPtr ptd,
IntPtr hicTargetDev, [Out] IntPtr ppColorSet);
[PreserveSig]
int Freeze([In, MarshalAs(UnmanagedType.U4)] int dwDrawAspect,
int lindex, IntPtr pvAspect, [Out] IntPtr pdwFreeze);
[PreserveSig]
int Unfreeze([In, MarshalAs(UnmanagedType.U4)] int dwFreeze);
void SetAdvise([In, MarshalAs(UnmanagedType.U4)] int aspects,
[In, MarshalAs(UnmanagedType.U4)] int advf,
[In, MarshalAs(UnmanagedType.Interface)] IAdviseSink pAdvSink);
void GetAdvise([In, Out, MarshalAs(UnmanagedType.LPArray)] int[] paspects,
[In, Out, MarshalAs(UnmanagedType.LPArray)] int[] advf,
[In, Out, MarshalAs(UnmanagedType.LPArray)] IAdviseSink[] pAdvSink);
}
以下是 Draw
方法接受的参数的摘要描述(这是我们将使用的唯一方法)
UInt32 dwDrawAspect
- 指定要绘制的方面。有效值取自DVASPECT
和DVASPECT2
枚举。 在此示例中,我使用DVASPECT.CONTENT
,因此传递的值为 1。int lindex
- 绘制操作中对象中感兴趣的部分。当前仅支持 -1。IntPtr pvAspect
- 指向附加信息的指针。IntPtr ptd
- 描述将渲染对象的设备。我们将针对默认目标设备进行渲染,因此传递的值将为IntPtr.Zero
。IntPtr hdcTargetDev
- 指示ptd
参数的目标设备的上下文信息。IntPtr hdcDraw
- 绘制的设备上下文。ref Rectangle lprcBounds
- 捕获图像的大小。ref Rectangle lprcWBounds
- 我们想要捕获的WebBrowser
对象区域。IntPtr pfnContinue
- 指向回调函数(此处未使用)。UInt32 dwContinue
- 作为参数传递给函数的值(此处未使用)。
HtmlCapture 类
现在我们已经定义了 IViewObject
接口,是时候创建一个将用于将网页捕获为图像的类了。
public class HtmlCapture
{
private WebBrowser web;
private Timer tready;
private Rectangle screen;
private Size? imgsize=null;
//an event that triggers when the html document is captured
public delegate void HtmlCaptureEvent(object sender,
Uri url, Bitmap image);
public event HtmlCaptureEvent HtmlImageCapture;
//class constructor
public HtmlCapture()
{
//initialise the webbrowser and the timer
web = new WebBrowser();
tready = new Timer();
tready.Interval = 2000;
screen = Screen.PrimaryScreen.Bounds;
//set the webbrowser width and hight
web.Width = screen.Width;
web.Height = screen.Height;
//suppress script errors and hide scroll bars
web.ScriptErrorsSuppressed = true;
web.ScrollBarsEnabled = false;
//attached events
web.Navigating +=
new WebBrowserNavigatingEventHandler(web_Navigating);
web.DocumentCompleted += new
WebBrowserDocumentCompletedEventHandler(web_DocumentCompleted);
tready.Tick += new EventHandler(tready_Tick);
}
#region Public methods
public void Create(string url)
{
imgsize = null;
web.Navigate(url);
}
public void Create(string url,Size imgsz)
{
this.imgsize = imgsz;
web.Navigate(url);
}
#endregion
#region Events
void web_DocumentCompleted(object sender,
WebBrowserDocumentCompletedEventArgs e)
{
//start the timer
tready.Start();
}
void web_Navigating(object sender, WebBrowserNavigatingEventArgs e)
{
//stop the timer
tready.Stop();
}
void tready_Tick(object sender, EventArgs e)
{
//stop the timer
tready.Stop();
//capture html as an image
//...
}
#endregion
}
如您所见,我正在使用 Timer
对象来确定 HTML 文档是否已完全加载并可以捕获。 我这样做的原因是 HTML 文档可以多次触发 DocumentCompleted
事件。 文档完全加载后,将调用 tready_Tick
方法。
void tready_Tick(object sender, EventArgs e)
{
//stop the timer
tready.Stop();
//get the size of the document's body
Rectangle body = web.Document.Body.ScrollRectangle;
//check if the document width/height is greater than screen width/height
Rectangle docRectangle = new Rectangle()
{
Location=new Point(0,0),
Size=new Size(body.Width > screen.Width ? body.Width : screen.Width,
body.Height > screen.Height ? body.Height : screen.Height)
};
//set the width and height of the WebBrowser object
web.Width = docRectangle.Width;
web.Height = docRectangle.Height;
//if the imgsize is null, the size of the image will
//be the same as the size of webbrowser object
//otherwise set the image size to imgsize
Rectangle imgRectangle;
if (imgsize == null)
imgRectangle = docRectangle;
else
imgRectangle = new Rectangle()
{
Location=new Point(0,0),
Size =imgsize.Value
};
//create a bitmap object
Bitmap bitmap = new Bitmap(imgRectangle.Width,imgRectangle.Height);
//get the viewobject of the WebBrowser
IViewObject ivo = web.Document.DomDocument as IViewObject;
using (Graphics g = Graphics.FromImage(bitmap))
{
//get the handle to the device context and draw
IntPtr hdc = g.GetHdc();
ivo.Draw(1, -1, IntPtr.Zero, IntPtr.Zero,
IntPtr.Zero, hdc, ref imgRectangle,
ref docRectangle, IntPtr.Zero, 0);
g.ReleaseHdc(hdc);
}
//invoke the HtmlImageCapture event
HtmlImageCapture(this, web.Url, bitmap);
}
使用代码
HtmlCapture
具有一个重载的 Create
方法。 如果您使用 Create(string url)
方法,则图像的大小将与 HTML 文档的大小相同。 如果要创建 HTML 文档的缩略图,请使用 Create(string url,Size imgsz)
。
private void button2_Click(object sender, EventArgs e)
{
HtmlCapture hc = new HtmlCapture();
hc.HtmlImageCapture +=
new HtmlCapture.HtmlCaptureEvent(hc_HtmlImageCapture);
hc.Create("https://codeproject.org.cn");
//or
hc.Create("https://codeproject.org.cn",new Size(200,300));
}
void hc_HtmlImageCapture(object sender, Uri url, Bitmap image)
{
image.Save("C:/"+ url.Authority+ ".bmp");
}