C# /VB.NET 創建、讀取PDF文檔

生成PDF文檔我們通常可以通過文檔轉換的形式來得到想要的PDF,但我們也可以通過最直接的方式來創建PDF文檔,由此本篇文章將介紹C#如何來創建帶圖、文元素的PDF文檔。同理,對於需要讀取PDF文檔的情況,我們也可以分情況來讀取想要的文檔元素(文本、圖片)。在下面的示例中將做詳細介紹。

示例要點梳理:

一、創建PDF文檔(可支持中文)
1.寫入文本(包括頁邊距、字體、字號等設置)
2.繪入圖片
二、讀取PDF文檔
1.讀取文本
1.1讀取全部文本
1.2讀取指定區域文本
2.讀取圖片
使用工具:Free Spire.PDF for .NET4.3 (最新免費版)
注:在編輯代碼前引用Spire.PDF.dll,dll文件可在安裝路徑下的Bin文件夾中獲取。

一、創建PDF文檔

C#

using Spire.Pdf;
using Spire.Pdf.Graphics;
using System.Drawing;

namespace CreatePDF_PDF
{
    class Program
    {
        static void Main(string[] args)
        {
            //初始化一個PdfDocument類實例
            PdfDocument document = new PdfDocument();

            //聲明 PdfUnitConvertor和PdfMargins類對象
            PdfUnitConvertor unitCvtr = new PdfUnitConvertor();
            PdfMargins margins = new PdfMargins();

            //設置頁邊距
            margins.Top = unitCvtr.ConvertUnits(2.54f, PdfGraphicsUnit.Centimeter, PdfGraphicsUnit.Point);
            margins.Bottom = margins.Top;
            margins.Left = unitCvtr.ConvertUnits(3.17f, PdfGraphicsUnit.Centimeter, PdfGraphicsUnit.Point);
            margins.Right = margins.Left;

            //新添加一個A4大小的頁面
            PdfPageBase page = document.Pages.Add(PdfPageSize.A4, margins);

            //自定義PdfTrueTypeFont、PdfPen實例,設置字體類型、字號和字體顏色
            PdfTrueTypeFont font = new PdfTrueTypeFont(new Font("楷體", 11f),true);
            PdfPen pen = new PdfPen(Color.Black);

            //調用DrawString()方法在指定位置寫入文本
            string text = ("《蝶戀花 送春》 \n 樓外垂楊千萬縷,欲系青春,少住春還去。猶自風前飄柳絮,隨春且看歸何處?\n 綠滿山川聞杜宇,便作無情,莫也愁人苦。把酒送春春不語,黃昏卻下瀟瀟雨。");
            page.Canvas.DrawString(text, font, pen, 15, 13);

            //加載圖片,並調用DrawImage()方法在指定位置繪入圖片
            PdfImage image = PdfImage.FromFile("image1.jpg");
            float width = image.Width * 0.55f;
            float height = image.Height * 0.55f;
            float y = (page.Canvas.ClientSize.Width - width) / 3;
            page.Canvas.DrawImage(image, y, 60, width, height);

            //保存並打開文檔
            document.SaveToFile("PDF創建.pdf");
            System.Diagnostics.Process.Start("PDF創建.pdf");
        }
    }
}

創建結果:
C# /VB.NET 創建、讀取PDF文檔

VB.NET

Imports Spire.Pdf
Imports Spire.Pdf.Graphics
Imports System.Drawing

Namespace CreatePDF_PDF

    Class Program

        Private Shared Sub Main(ByVal args As String())
            Dim document As PdfDocument = New PdfDocument()
            Dim unitCvtr As PdfUnitConvertor = New PdfUnitConvertor()
            Dim margins As PdfMargins = New PdfMargins()
            margins.Top = unitCvtr.ConvertUnits(2.54F, PdfGraphicsUnit.Centimeter, PdfGraphicsUnit.Point)
            margins.Bottom = margins.Top
            margins.Left = unitCvtr.ConvertUnits(3.17F, PdfGraphicsUnit.Centimeter, PdfGraphicsUnit.Point)
            margins.Right = margins.Left
            Dim page As PdfPageBase = document.Pages.Add(PdfPageSize.A4, margins)
            Dim font As PdfTrueTypeFont = New PdfTrueTypeFont(New Font("楷體", 11F), True)
            Dim pen As PdfPen = New PdfPen(Color.Black)
            Dim text As String =("《蝶戀花 送春》 " & vbLf & " 樓外垂楊千萬縷,欲系青春,少住春還去。猶自風前飄柳絮,隨春且看歸何處?" & vbLf & " 綠滿山川聞杜宇,便作無情,莫也愁人苦。把酒送春春不語,黃昏卻下瀟瀟雨。")
            page.Canvas.DrawString(text, font, pen, 15, 13)
            Dim image As PdfImage = PdfImage.FromFile("image1.jpg")
            Dim width As Single = image.Width * 0.55F
            Dim height As Single = image.Height * 0.55F
            Dim y As Single =(page.Canvas.ClientSize.Width - width) / 3
            page.Canvas.DrawImage(image, y, 60, width, height)
            document.SaveToFile("PDF創建.pdf")
            System.Diagnostics.Process.Start("PDF創建.pdf")
        End Sub
    End Class
End Namespace

這裏創建PDF文檔,Spire.PDF支持直接生成PDF文檔並同時實現添加文本、圖片、圖形、水印、表格、頁眉頁腳、頁碼等操作,這裏示例代碼以添加文本、圖片爲例,有需要可以參見以下博客:


using Spire.Pdf;
using System;
using System.IO;
using System.Text;
namespace ExtractText_PDF
{
    class Program
    {
        static void Main(string[] args)
        {
            //實例化PdfDocument類對象,並加載PDF文檔
            PdfDocument doc = new PdfDocument();
            doc.LoadFromFile("sample.pdf");

            //實例化一個StringBuilder 對象
            StringBuilder content = new StringBuilder();

            //遍歷文檔所有PDF頁面,提取文本
            foreach (PdfPageBase page in doc.Pages)
            {
                content.Append(page.ExtractText());
            }

            //將提取到的文本寫爲.txt格式並保存到本地路徑
            String fileName = "獲取文本.txt";
            File.WriteAllText(fileName, content.ToString());
            System.Diagnostics.Process.Start("獲取文本.txt");
        }
    }
}

讀取結果:
C# /VB.NET 創建、讀取PDF文檔

VB.NET

Imports Spire.Pdf
Imports System
Imports System.IO
Imports System.Text

Namespace ExtractText_PDF

    Class Program

        Private Shared Sub Main(ByVal args As String())
            Dim doc As PdfDocument = New PdfDocument()
            doc.LoadFromFile("sample.pdf")
            Dim content As StringBuilder = New StringBuilder()
            For Each page As PdfPageBase In doc.Pages
                content.Append(page.ExtractText())
            Next

            Dim fileName As String = "獲取文本.txt"
            File.WriteAllText(fileName, content.ToString())
            System.Diagnostics.Process.Start("獲取文本.txt")
        End Sub
    End Class
End Namespace

1.2 讀取指定區域文本

C#

using Spire.Pdf;
using System.IO;
using System.Text;
using System.Drawing;

namespace ExtractText1_PDF
{
    class Program
    {
        static void Main(string[] args)
        {
            //創建PdfDocument類實例,並加載PDF文檔
            PdfDocument pdf = new PdfDocument(); 
            pdf.LoadFromFile("sample.pdf");

            //獲取PDF第一頁  
            PdfPageBase page = pdf.Pages[0];

            //從第一頁的指定矩形區域內提取文本  
            string text = page.ExtractText(new RectangleF(50, 50, 500, 170));

            //保存文本到.txt文件,並打開文檔
            StringBuilder sb = new StringBuilder();
            sb.AppendLine(text);
            File.WriteAllText("Extract.txt", sb.ToString());
            System.Diagnostics.Process.Start("Extract.txt");
        }
    }
}

讀取結果:
(此時讀取的就只是指定區域內的文本)
C# /VB.NET 創建、讀取PDF文檔

VB.NET

Imports Spire.Pdf
Imports System.IO
Imports System.Text
Imports System.Drawing

Namespace ExtractText1_PDF

    Class Program

        Private Shared Sub Main(ByVal args As String())
            Dim pdf As PdfDocument = New PdfDocument()
            pdf.LoadFromFile("sample.pdf")
            Dim page As PdfPageBase = pdf.Pages(0)
            Dim text As String = page.ExtractText(New RectangleF(50, 50, 500, 170))
            Dim sb As StringBuilder = New StringBuilder()
            sb.AppendLine(text)
            File.WriteAllText("Extract.txt", sb.ToString())
            System.Diagnostics.Process.Start("Extract.txt")
        End Sub
    End Class
End Namespace

2. 讀取PDF圖片

C#

using Spire.Pdf;
using System.Collections.Generic;
using System.Drawing;

namespace ExtractImages_PDF
{
    class Program
    {
        static void Main(string[] args)
        {
            //創建一個PdfDocument類對象,加載PDF測試文檔
            PdfDocument doc = new PdfDocument(); 
            doc.LoadFromFile("sample.pdf");

            //聲明List類對象
            List<Image> ListImage = new List<Image>();

            //遍歷PDF文檔所有頁面
            for (int i = 0; i < doc.Pages.Count; i++)
            {
                //獲取文檔所有頁,並提取頁面中的所有圖片
                PdfPageBase page = doc.Pages[i];
                Image[] images = page.ExtractImages();
                if (images != null && images.Length > 0)
                {
                    ListImage.AddRange(images);
                }

            }
            //將獲取到的圖片保存到本地路徑
            if (ListImage.Count > 0)
            {
                for (int i = 0; i < ListImage.Count; i++)
                {
                    Image image = ListImage[i];
                    image.Save("image" + (i + 1).ToString() + ".png", System.Drawing.Imaging.ImageFormat.Png);
                }
                //打開獲取到的圖片
                System.Diagnostics.Process.Start("image1.png");
            }

        }
    }
}

讀取結果:
C# /VB.NET 創建、讀取PDF文檔

VB.NET

Imports Spire.Pdf
Imports System.Collections.Generic
Imports System.Drawing

Namespace ExtractImages_PDF

    Class Program

        Private Shared Sub Main(ByVal args As String())
            Dim doc As PdfDocument = New PdfDocument()
            doc.LoadFromFile("sample.pdf")
            Dim ListImage As List(Of Image) = New List(Of Image)()
            For i As Integer = 0 To doc.Pages.Count - 1
                Dim page As PdfPageBase = doc.Pages(i)
                Dim images As Image() = page.ExtractImages()
                If images IsNot Nothing AndAlso images.Length > 0 Then
                    ListImage.AddRange(images)
                End If
            Next

            If ListImage.Count > 0 Then
                For i As Integer = 0 To ListImage.Count - 1
                    Dim image As Image = ListImage(i)
                    image.Save("image" & (i + 1).ToString() & ".png", System.Drawing.Imaging.ImageFormat.Png)
                Next

                System.Diagnostics.Process.Start("image1.png")
            End If
        End Sub
    End Class
End Namespace
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章