How to load a PDF document in C# and VB.Net

How to load a PDF document in C# and VB.Net


  1. Load from a file:
    
    DocumentCore dc = DocumentCore.Load(@"d:\Book.pdf");
    
    The dc object represents a document loaded into memory. The file format is detected automatically from the file extension: ".pdf" .

    After loading you'll get the document presented as the Tree Of Objects, where the root node is DocumentCore class.

    To guarantee that a loadable content is really PDF and set some loading options, use PdfLoadOptions as 2nd parameter.

    
    DocumentCore dc = DocumentCore.Load(@"d:\Book.pdf", new PdfLoadOptions());
    
  2. Load from a Stream:
    
                // Assume that we already have a PDF document as bytes array.
                byte[] pdfBytes = null;
                // pdfBytes = ...
    
                DocumentCore dc = null;
                using (MemoryStream pdfStream = new MemoryStream(fileBytes))
                {
                    // Specifying PdfLoadOptions we explicitly set that a loadable document is PDF.
                    PdfLoadOptions pdfLO = new PdfLoadOptions()
                    {
                        // 'false' - means to load vector graphics as is. Don't transform it to raster images.
                        RasterizeVectorGraphics = false,
    
                        // The PDF format doesn't have real tables, in fact it's a set of orthogonal graphic lines.
                        // In case of 'true' the component will detect and recreate tables from graphic lines.
                        DetectTables = false,
    
                        // 'true' - Load embedded fonts from PDF document, even if the font with the same name is installed in your System.
                        PreserveEmbeddedFonts = false,
    
                        // Load only the 1st page from the document.
                        PageIndex = 0,
                        PageCount = 1
                    };
    
                    // Load a PDF document from the MemoryStream.
                    dc = DocumentCore.Load(pdfStream, new PdfLoadOptions());
                }
                // Here we can do with our document 'dc' anything we need.
 

Complete code

using System;
using System.IO;
using SautinSoft.Document;

namespace Example
{
    class Program
    {
       
        static void Main(string[] args)
        {
            LoadPDFFromFile();
            //LoadPDFFromStream();
        }

        /// <summary>
        /// Loads a PDF document into DocumentCore (dc) from a file.
        /// </summary>
        /// <remarks>
        /// Details: https://www.sautinsoft.com/products/document/examples/load-pdf-document-net-csharp-vb.php
        /// </remarks>
        static void LoadPDFFromFile()
        {
            string filePath = @"..\..\example.pdf";

            // The file format is detected automatically from the file extension: ".pdf".
            // But as shown in the example below, we can specify PdfLoadOptions as 2nd parameter
            // to explicitly set that a loadable document has PDF format.
            DocumentCore dc = DocumentCore.Load(filePath);

            if (dc != null)
                Console.WriteLine("Loaded successfully!");
            Console.ReadKey();
        }

        /// <summary>
        /// Loads a PDF document into DocumentCore (dc) from a MemoryStream.
        /// </summary>
        /// <remarks>
        /// Details: https://www.sautinsoft.com/products/document/examples/load-pdf-document-net-csharp-vb.php
        /// </remarks>
        static void LoadPDFFromStream()
        {
            // Assume that we already have a PDF document as bytes array.
            byte[] fileBytes = File.ReadAllBytes(@"..\..\example.pdf");

            DocumentCore dc = null;

            // Create a MemoryStream
            using (MemoryStream pdfStream = new MemoryStream(fileBytes))
            {
                // Specifying PdfLoadOptions we explicitly set that a loadable document is PDF.
                PdfLoadOptions pdfLO = new PdfLoadOptions()
                {
                    // 'false' - means to load vector graphics as is. Don't transform it to raster images.
                    RasterizeVectorGraphics = false,

                    // The PDF format doesn't have real tables, in fact it's a set of orthogonal graphic lines.
                    // In case of 'true' the component will detect and recreate tables from graphic lines.
                    DetectTables = false,

                    // 'true' - Load embedded fonts from PDF document, even if the font with the same name is installed in your System.
                    PreserveEmbeddedFonts = false,

                    // Load only the 1st page from the document.
                    PageIndex = 0,
                    PageCount = 1
                };

                // Load a PDF document from the MemoryStream.
                dc = DocumentCore.Load(pdfStream, new PdfLoadOptions());
            }
            if (dc != null)
                Console.WriteLine("Loaded successfully!");
            Console.ReadKey();
        }
    }
}

Download.

        
            Imports System
Imports System.IO
Imports SautinSoft.Document

Module Sample
    Sub Main()
        LoadPDFFromFile()
        'LoadPDFFromStream()
    End Sub

    ''' <summary>
    ''' Loads a PDF document into DocumentCore (dc) from a file.
    ''' </summary>
    ''' <remarks>
    ''' Details: https://www.sautinsoft.com/products/document/examples/load-pdf-document-net-csharp-vb.php
    ''' </remarks>
    Sub LoadPDFFromFile()
        Dim filePath As String = "..\example.pdf"

        ' The file format is detected automatically from the file extension: ".pdf".
        ' But as shown in the example below, we can specify PdfLoadOptions as 2nd parameter
        ' to explicitly set that a loadable document has PDF format.
        Dim dc As DocumentCore = DocumentCore.Load(filePath)

        If dc IsNot Nothing Then
            Console.WriteLine("Loaded successfully!")
            Console.ReadKey()
        End If
    End Sub

    ''' <summary>
    ''' Loads a PDF document into DocumentCore (dc) from a MemoryStream.
    ''' </summary>
    ''' <remarks>
    ''' Details: https://www.sautinsoft.com/products/document/examples/load-pdf-document-net-csharp-vb.php
    ''' </remarks>
    Sub LoadPDFFromStream()
        ' Assume that we already have a PDF document as bytes array.
        Dim fileBytes() As Byte = File.ReadAllBytes("..\example.pdf")

        Dim dc As DocumentCore = Nothing

        ' Create a MemoryStream
        Using pdfStream As New MemoryStream(fileBytes)

            ' Specifying PdfLoadOptions we explicitly set that a loadable document is PDF.
            Dim pdfLO As New PdfLoadOptions() With {
            .RasterizeVectorGraphics = False,
            .DetectTables = False,
            .PreserveEmbeddedFonts = False,
            .PageIndex = 0,
            .PageCount = 1
        }

        ' RasterizeVectorGraphics = False
        ' This means to load vector graphics as is. Don't transform it to raster images.

        ' DetectTables = False
        ' This means don't detect tables.
        ' The PDF format doesn't have real tables, in fact it's a set of orthogonal graphic lines.
        ' Set it to 'True' and the component will detect and recreate tables from graphic lines.

        'PreserveEmbeddedFonts = False
        ' True - Means to load embedded fonts from PDF document, 
        ' even if the font with the same name is installed in your System.

            ' Load a PDF document from the MemoryStream.
            dc = DocumentCore.Load(pdfStream, New PdfLoadOptions())
        End Using
        If dc IsNot Nothing Then
            Console.WriteLine("Loaded successfully!")
            Console.ReadKey()
        End If
    End Sub
End Module

Download.


If you need a new code example or have a question: email us at support@sautinsoft.com or ask at Online Chat (right-bottom corner of this page) or use the Form below:



Questions and suggestions from you are always welcome!

We are developing .Net components since 2002. We know PDF, DOCX, RTF, HTML, XLSX and Images formats. If you need any assistance with creating, modifying or converting documents in various formats, we can help you. We will write any code example for you absolutely free.

© SautinSoft 2002 - 2019