生成PDF文檔我們通常可以通過文檔轉換的形式來得到想要的PDF,但我們也可以通過最直接的方式來創建PDF文檔,由此本篇文章將介紹C#如何來創建帶圖、文元素的PDF文檔。同理,對於需要讀取PDF文檔的情況,我們也可以分情況來讀取想要的文檔元素(文本、圖片)。在下面的示例中將做詳細介紹。
示例要點梳理:
一、創建PDF文檔(可支持中文)
1.寫入文本(包括頁邊距、字體、字號等設置)
2.繪入圖片
二、讀取PDF文檔
1.讀取文本
1.1讀取全部文本
1.2讀取指定區域文本
2.讀取圖片
使用工具:Free Spire.PDF for .NET4.3 (最新免費版)
注:在編輯代碼前引用Spire.PDF.dll,dll文件可在安裝路徑下的Bin文件夾中獲取。
一、創建PDF文檔
C#
using Spire.Pdf;
using Spire.Pdf.Graphics;
using System.Drawing;
namespace CreatePDF_PDF
{
class Program
{
static void Main(string[] args)
{
//初始化一個PdfDocument類實例
PdfDocument document = new PdfDocument();
//聲明 PdfUnitConvertor和PdfMargins類對象
PdfUnitConvertor unitCvtr = new PdfUnitConvertor();
PdfMargins margins = new PdfMargins();
//設置頁邊距
margins.Top = unitCvtr.ConvertUnits(2.54f, PdfGraphicsUnit.Centimeter, PdfGraphicsUnit.Point);
margins.Bottom = margins.Top;
margins.Left = unitCvtr.ConvertUnits(3.17f, PdfGraphicsUnit.Centimeter, PdfGraphicsUnit.Point);
margins.Right = margins.Left;
//新添加一個A4大小的頁面
PdfPageBase page = document.Pages.Add(PdfPageSize.A4, margins);
//自定義PdfTrueTypeFont、PdfPen實例,設置字體類型、字號和字體顏色
PdfTrueTypeFont font = new PdfTrueTypeFont(new Font("楷體", 11f),true);
PdfPen pen = new PdfPen(Color.Black);
//調用DrawString()方法在指定位置寫入文本
string text = ("《蝶戀花 送春》 \n 樓外垂楊千萬縷,欲系青春,少住春還去。猶自風前飄柳絮,隨春且看歸何處?\n 綠滿山川聞杜宇,便作無情,莫也愁人苦。把酒送春春不語,黃昏卻下瀟瀟雨。");
page.Canvas.DrawString(text, font, pen, 15, 13);
//加載圖片,並調用DrawImage()方法在指定位置繪入圖片
PdfImage image = PdfImage.FromFile("image1.jpg");
float width = image.Width * 0.55f;
float height = image.Height * 0.55f;
float y = (page.Canvas.ClientSize.Width - width) / 3;
page.Canvas.DrawImage(image, y, 60, width, height);
//保存並打開文檔
document.SaveToFile("PDF創建.pdf");
System.Diagnostics.Process.Start("PDF創建.pdf");
}
}
}
創建結果:
VB.NET
Imports Spire.Pdf
Imports Spire.Pdf.Graphics
Imports System.Drawing
Namespace CreatePDF_PDF
Class Program
Private Shared Sub Main(ByVal args As String())
Dim document As PdfDocument = New PdfDocument()
Dim unitCvtr As PdfUnitConvertor = New PdfUnitConvertor()
Dim margins As PdfMargins = New PdfMargins()
margins.Top = unitCvtr.ConvertUnits(2.54F, PdfGraphicsUnit.Centimeter, PdfGraphicsUnit.Point)
margins.Bottom = margins.Top
margins.Left = unitCvtr.ConvertUnits(3.17F, PdfGraphicsUnit.Centimeter, PdfGraphicsUnit.Point)
margins.Right = margins.Left
Dim page As PdfPageBase = document.Pages.Add(PdfPageSize.A4, margins)
Dim font As PdfTrueTypeFont = New PdfTrueTypeFont(New Font("楷體", 11F), True)
Dim pen As PdfPen = New PdfPen(Color.Black)
Dim text As String =("《蝶戀花 送春》 " & vbLf & " 樓外垂楊千萬縷,欲系青春,少住春還去。猶自風前飄柳絮,隨春且看歸何處?" & vbLf & " 綠滿山川聞杜宇,便作無情,莫也愁人苦。把酒送春春不語,黃昏卻下瀟瀟雨。")
page.Canvas.DrawString(text, font, pen, 15, 13)
Dim image As PdfImage = PdfImage.FromFile("image1.jpg")
Dim width As Single = image.Width * 0.55F
Dim height As Single = image.Height * 0.55F
Dim y As Single =(page.Canvas.ClientSize.Width - width) / 3
page.Canvas.DrawImage(image, y, 60, width, height)
document.SaveToFile("PDF創建.pdf")
System.Diagnostics.Process.Start("PDF創建.pdf")
End Sub
End Class
End Namespace
這裏創建PDF文檔,Spire.PDF支持直接生成PDF文檔並同時實現添加文本、圖片、圖形、水印、表格、頁眉頁腳、頁碼等操作,這裏示例代碼以添加文本、圖片爲例,有需要可以參見以下博客:
- C# 添加PDF水印
- C# 創建PDF表格
二、 讀取PDF文檔
測試文檔:
1. 讀取PDF文本
1.1讀取全部文本
C#
using Spire.Pdf;
using System;
using System.IO;
using System.Text;
namespace ExtractText_PDF
{
class Program
{
static void Main(string[] args)
{
//實例化PdfDocument類對象,並加載PDF文檔
PdfDocument doc = new PdfDocument();
doc.LoadFromFile("sample.pdf");
//實例化一個StringBuilder 對象
StringBuilder content = new StringBuilder();
//遍歷文檔所有PDF頁面,提取文本
foreach (PdfPageBase page in doc.Pages)
{
content.Append(page.ExtractText());
}
//將提取到的文本寫爲.txt格式並保存到本地路徑
String fileName = "獲取文本.txt";
File.WriteAllText(fileName, content.ToString());
System.Diagnostics.Process.Start("獲取文本.txt");
}
}
}
讀取結果:
VB.NET
Imports Spire.Pdf
Imports System
Imports System.IO
Imports System.Text
Namespace ExtractText_PDF
Class Program
Private Shared Sub Main(ByVal args As String())
Dim doc As PdfDocument = New PdfDocument()
doc.LoadFromFile("sample.pdf")
Dim content As StringBuilder = New StringBuilder()
For Each page As PdfPageBase In doc.Pages
content.Append(page.ExtractText())
Next
Dim fileName As String = "獲取文本.txt"
File.WriteAllText(fileName, content.ToString())
System.Diagnostics.Process.Start("獲取文本.txt")
End Sub
End Class
End Namespace
1.2 讀取指定區域文本
C#
using Spire.Pdf;
using System.IO;
using System.Text;
using System.Drawing;
namespace ExtractText1_PDF
{
class Program
{
static void Main(string[] args)
{
//創建PdfDocument類實例,並加載PDF文檔
PdfDocument pdf = new PdfDocument();
pdf.LoadFromFile("sample.pdf");
//獲取PDF第一頁
PdfPageBase page = pdf.Pages[0];
//從第一頁的指定矩形區域內提取文本
string text = page.ExtractText(new RectangleF(50, 50, 500, 170));
//保存文本到.txt文件,並打開文檔
StringBuilder sb = new StringBuilder();
sb.AppendLine(text);
File.WriteAllText("Extract.txt", sb.ToString());
System.Diagnostics.Process.Start("Extract.txt");
}
}
}
讀取結果:
(此時讀取的就只是指定區域內的文本)
VB.NET
Imports Spire.Pdf
Imports System.IO
Imports System.Text
Imports System.Drawing
Namespace ExtractText1_PDF
Class Program
Private Shared Sub Main(ByVal args As String())
Dim pdf As PdfDocument = New PdfDocument()
pdf.LoadFromFile("sample.pdf")
Dim page As PdfPageBase = pdf.Pages(0)
Dim text As String = page.ExtractText(New RectangleF(50, 50, 500, 170))
Dim sb As StringBuilder = New StringBuilder()
sb.AppendLine(text)
File.WriteAllText("Extract.txt", sb.ToString())
System.Diagnostics.Process.Start("Extract.txt")
End Sub
End Class
End Namespace
2. 讀取PDF圖片
C#
using Spire.Pdf;
using System.Collections.Generic;
using System.Drawing;
namespace ExtractImages_PDF
{
class Program
{
static void Main(string[] args)
{
//創建一個PdfDocument類對象,加載PDF測試文檔
PdfDocument doc = new PdfDocument();
doc.LoadFromFile("sample.pdf");
//聲明List類對象
List<Image> ListImage = new List<Image>();
//遍歷PDF文檔所有頁面
for (int i = 0; i < doc.Pages.Count; i++)
{
//獲取文檔所有頁,並提取頁面中的所有圖片
PdfPageBase page = doc.Pages[i];
Image[] images = page.ExtractImages();
if (images != null && images.Length > 0)
{
ListImage.AddRange(images);
}
}
//將獲取到的圖片保存到本地路徑
if (ListImage.Count > 0)
{
for (int i = 0; i < ListImage.Count; i++)
{
Image image = ListImage[i];
image.Save("image" + (i + 1).ToString() + ".png", System.Drawing.Imaging.ImageFormat.Png);
}
//打開獲取到的圖片
System.Diagnostics.Process.Start("image1.png");
}
}
}
}
讀取結果:
VB.NET
Imports Spire.Pdf
Imports System.Collections.Generic
Imports System.Drawing
Namespace ExtractImages_PDF
Class Program
Private Shared Sub Main(ByVal args As String())
Dim doc As PdfDocument = New PdfDocument()
doc.LoadFromFile("sample.pdf")
Dim ListImage As List(Of Image) = New List(Of Image)()
For i As Integer = 0 To doc.Pages.Count - 1
Dim page As PdfPageBase = doc.Pages(i)
Dim images As Image() = page.ExtractImages()
If images IsNot Nothing AndAlso images.Length > 0 Then
ListImage.AddRange(images)
End If
Next
If ListImage.Count > 0 Then
For i As Integer = 0 To ListImage.Count - 1
Dim image As Image = ListImage(i)
image.Save("image" & (i + 1).ToString() & ".png", System.Drawing.Imaging.ImageFormat.Png)
Next
System.Diagnostics.Process.Start("image1.png")
End If
End Sub
End Class
End Namespace