Convert PDF to Word in memory using C# and .NET


This simple Console Application shows how to convert PDF to DOCX (RTF) in memory via two methods.
The first method converts PDF into DOCX format using arrays of bytes.
The second method shows how to convert PDF to RTF opearing with MemoryStream.

Complete code

using System;
using System.IO;

namespace Sample
{
    class Sample
    {
        static void Main(string[] args)
        {
            ConvertPdfToDocxBytes();
            //ConvertPdfToRtfStream();
        }

        private static void ConvertPdfToDocxBytes()
        {
            string pdfFile = Path.GetFullPath(@"..\..\..\simple text.pdf");

            // Assume that we already have a PDF document as array of bytes.
            byte[] pdf = File.ReadAllBytes(pdfFile);
            byte[] docx = null;
                                             // Get your free 30-day key here:   
            // https://sautinsoft.com/start-for-free/
			
            // Convert PDF to word in memory
            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();

            f.OpenPdf(pdf);

            if (f.PageCount > 0)
            {
                // Convert pdf to word in memory.
                docx = f.ToWord();

                // Save word document to a file only for demonstration purposes.
                if (docx != null)
                {
                    //3. Save to DOCX document to a file for demonstration purposes.
                    string wordFile = "Result.docx";
                    File.WriteAllBytes(wordFile, docx);
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(wordFile) { UseShellExecute = true });
                }
            }
        }
        private static void ConvertPdfToRtfStream()
        {
            string pdfFile = Path.GetFullPath(@"..\..\..\simple text.pdf");
            MemoryStream rtfStream = new MemoryStream();
            // Convert PDF to word in memory
                                             // Get your free 30-day key here:   
            // https://sautinsoft.com/start-for-free/
			
            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();

            // Assume that we already have a PDF document as stream.
            using (FileStream pdfStream = new FileStream(pdfFile, FileMode.Open, FileAccess.Read))
            {
                f.OpenPdf(pdfStream);

                if (f.PageCount > 0)
                {
                    f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Rtf;
                    int res = f.ToWord(rtfStream);

                    // Save rtfStream to a file for demonstration purposes.
                    if (res == 0)
                    {
                        string rtfFile = "Result.rtf";
                        File.WriteAllBytes(rtfFile, rtfStream.ToArray());
                        System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(rtfFile) { UseShellExecute = true });
                    }
                }
            }
        }
    }
}

Download

Imports System.IO
Imports System.Drawing.Imaging
Imports System.Collections.Generic
Imports SautinSoft

Module Sample

    Sub Main()
        ConvertPdfToDocxBytes()
        'ConvertPdfToRtfStream()
    End Sub
    Private Sub ConvertPdfToDocxBytes()
        Dim pdfFile As String = Path.GetFullPath("..\..\..\simple text.pdf")

        ' Assume that we already have a PDF document as array of bytes.
        Dim pdf() As Byte = File.ReadAllBytes(pdfFile)
        Dim docx() As Byte = Nothing
                                ' Get your free 30-day key here: 
                                ' https://sautinsoft.com/start-for-free/
		
        ' Convert PDF to word in memory
        Dim f As New SautinSoft.PdfFocus()

        f.OpenPdf(pdf)

        If f.PageCount > 0 Then
            ' Convert pdf to word in memory.
            docx = f.ToWord()

            ' Save word document to a file only for demonstration purposes.
            If docx IsNot Nothing Then
                '3. Save to DOCX document to a file for demonstration purposes.
                Dim wordFile As String = "Result.docx"
                File.WriteAllBytes(wordFile, docx)
                System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(wordFile) With {.UseShellExecute = True})
            End If
        End If
    End Sub
    Private Sub ConvertPdfToRtfStream()
        Dim pdfFile As String = Path.GetFullPath("..\..\..\simple text.pdf")
        Dim rtfStream As New MemoryStream()
        ' Convert PDF to word in memory
        Dim f As New SautinSoft.PdfFocus()
        'this property is necessary only for registered version
        'f.Serial = "XXXXXXXXXXX"

        ' Assume that we already have a PDF document as stream.
        Using pdfStream As New FileStream(pdfFile, FileMode.Open, FileAccess.Read)
            f.OpenPdf(pdfStream)

            If f.PageCount > 0 Then
                f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Rtf
                Dim res As Integer = f.ToWord(rtfStream)

                ' Save rtfStream to a file for demonstration purposes.
                If res = 0 Then
                    Dim rtfFile As String = "Result.rtf"
                    File.WriteAllBytes(rtfFile, rtfStream.ToArray())
                    System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(rtfFile) With {.UseShellExecute = True})
                End If
            End If
        End Using
    End Sub
End Module

Download


If you need a new code example or have a question: email us at support@sautinsoft.com or ask at Online Chat (right-bottom corner of this page) or use the Form below:



Questions and suggestions from you are always welcome!

We are developing .Net components since 2002. We know PDF, DOCX, RTF, HTML, XLSX and Images formats. If you need any assistance with creating, modifying or converting documents in various formats, we can help you. We will write any code example for you absolutely free.