How to convert all files to a single XLS file in C# and .NET


This example shows how to convert all files (.pdf, .docx, .rtf) located in the same directory in a single XLS workbook file.

Download the resulting file: Result-Single.xls

Complete code

using System;
using System.IO;
using System.Collections.Generic;
using SautinSoft.Document;
using SautinSoft;

namespace Sample
{
    class Sample
    {

        static void Main(string[] args)
        {
            // Get your free 30-day key here:   
            // https://sautinsoft.com/start-for-free/

            ConvertToSingleXls();
        }

        /// <summary>
        /// How to convert all files to a single XLS file.
        /// </summary>
        /// <remarks>
        /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/from-customers-convert-pdf-docx-rtf-to-single-xls-workbook-net-csharp-vb.php
        /// </remarks>
        public static void ConvertToSingleXls()
        {
            // In this example we'll use not only Document .Net component, but also
            // another SautinSoft 'component - PDF Focus .Net (to perform conversion from PDF to single xls workbook).
			// First of all, please perform "Rebuild Solution" to restore PDF Focus .Net package from NuGet.

            // Our steps:
            // 1. Convert all RTF, DOCX, PDF files into a single PDF document. (by Document .Net).
            // 2. Convert the single PDF into a single XLS workbook. (by PDF Focus .Net).

            byte[] singlePdfBytes = null;

            // This file we need only to show intermediate result.
            string singlePdfFile = "Single.pdf";
            string workingDir = @"..\..\..\";
            string singleXlsFile = "Single.xls";

            List<string> supportedFiles = new List<string>();

            foreach (string file in Directory.GetFiles(workingDir, "*.*"))
            {
                string ext = Path.GetExtension(file).ToLower();

                if (ext == ".pdf" || ext == ".docx" || ext == ".rtf")
                    supportedFiles.Add(file);
            }

            // Create single pdf.
            DocumentCore singlePDF = new DocumentCore();

            foreach (string file in supportedFiles)
            {
                DocumentCore dc = DocumentCore.Load(file);

                Console.WriteLine("Adding: {0}...", Path.GetFileName(file));

                // Create import session.
                ImportSession session = new ImportSession(dc, singlePDF, StyleImportingMode.KeepSourceFormatting);

                // Loop through all sections in the source document.
                foreach (Section sourceSection in dc.Sections)
                {
                    // Because we are copying a section from one document to another,
                    // it is required to import the Section into the destination document.
                    // This adjusts any document-specific references to styles, bookmarks, etc.
                    //
                    // Importing a element creates a copy of the original element, but the copy
                    // is ready to be inserted into the destination document.
                    Section importedSection = singlePDF.Import<Section>(sourceSection, true, session);

                    // First section start from new page.
                    if (dc.Sections.IndexOf(sourceSection) == 0)
                        importedSection.PageSetup.SectionStart = SectionStart.NewPage;

                    // Now the new section can be appended to the destination document.
                    singlePDF.Sections.Add(importedSection);
                }
            }

            // Save our single document into PDF format in memory.
            // Let's save our document to a MemoryStream.
            using (MemoryStream Pdf = new MemoryStream())
            {
                singlePDF.Save(Pdf, new PdfSaveOptions()
                {
                    Compliance = PdfCompliance.PDF_A1a
                });
                singlePdfBytes = Pdf.ToArray();
            }

            // Open the result for demonstration purposes.
            File.WriteAllBytes(singlePdfFile, singlePdfBytes);
            System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(singlePdfFile) { UseShellExecute = true });

            SautinSoft.PdfFocus f = new PdfFocus();
          
            f.OpenPdf(singlePdfBytes);

            if (f.PageCount > 0)
                f.ToExcel(singleXlsFile);

            // Open the result for demonstration purposes.
            System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(singleXlsFile) { UseShellExecute = true });
        }
    }
}

Download

Imports System
Imports System.IO
Imports System.Collections.Generic
Imports SautinSoft.Document
Imports SautinSoft

Namespace Sample
	Friend Class Sample

		Shared Sub Main(ByVal args() As String)
			ConvertToSingleXls()
		End Sub
                ''' Get your free 30-day key here:   
                ''' https://sautinsoft.com/start-for-free/
		''' <summary>
		''' How to convert all files to a single XLS file.
		''' </summary>
		''' <remarks>
		''' Details: https://sautinsoft.com/products/document/help/net/developer-guide/from-customers-convert-pdf-docx-rtf-to-single-xls-workbook-net-csharp-vb.php
		''' </remarks>
		Public Shared Sub ConvertToSingleXls()
			' In this example we'll use not only Document .Net component, but also
			' another SautinSoft 'component - PDF Focus .Net (to perform conversion from PDF to single xls workbook).
			' First of all, please perform "Rebuild Solution" to restore PDF Focus .Net package from NuGet.

			' Our steps:
			' 1. Convert all RTF, DOCX, PDF files into a single PDF document. (by Document .Net).
			' 2. Convert the single PDF into a single XLS workbook. (by PDF Focus .Net).

			Dim singlePdfBytes() As Byte = Nothing

			' This file we need only to show intermediate result.
			Dim singlePdfFile As String = "Single.pdf"
			Dim workingDir As String = "..\..\..\"
			Dim singleXlsFile As String = "Single.xls"

			Dim supportedFiles As New List(Of String)()

			For Each file As String In Directory.GetFiles(workingDir, "*.*")
				Dim ext As String = Path.GetExtension(file).ToLower()

				If ext = ".pdf" OrElse ext = ".docx" OrElse ext = ".rtf" Then
					supportedFiles.Add(file)
				End If
			Next file

			' Create single pdf.
			Dim singlePDF As New DocumentCore()

			For Each file As String In supportedFiles
				Dim dc As DocumentCore = DocumentCore.Load(file)

				Console.WriteLine("Adding: {0}...", Path.GetFileName(file))

				' Create import session.
				Dim session As New ImportSession(dc, singlePDF, StyleImportingMode.KeepSourceFormatting)

				' Loop through all sections in the source document.
				For Each sourceSection As Section In dc.Sections
					' Because we are copying a section from one document to another,
					' it is required to import the Section into the destination document.
					' This adjusts any document-specific references to styles, bookmarks, etc.
					'
					' Importing a element creates a copy of the original element, but the copy
					' is ready to be inserted into the destination document.
					Dim importedSection As Section = singlePDF.Import(Of Section)(sourceSection, True, session)

					' First section start from new page.
					If dc.Sections.IndexOf(sourceSection) = 0 Then
						importedSection.PageSetup.SectionStart = SectionStart.NewPage
					End If

					' Now the new section can be appended to the destination document.
					singlePDF.Sections.Add(importedSection)
				Next sourceSection
			Next file

			' Save our single document into PDF format in memory.
			' Let's save our document to a MemoryStream.
			Using Pdf As New MemoryStream()
				singlePDF.Save(Pdf, New PdfSaveOptions() With {.Compliance = PdfCompliance.PDF_A1a})
				singlePdfBytes = Pdf.ToArray()
			End Using

			' Open the result for demonstration purposes.
			File.WriteAllBytes(singlePdfFile, singlePdfBytes)
			System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(singlePdfFile) With {.UseShellExecute = True})

			Dim f As SautinSoft.PdfFocus = New PdfFocus()

			f.OpenPdf(singlePdfBytes)

			If f.PageCount > 0 Then
				f.ToExcel(singleXlsFile)
			End If

			' Open the result for demonstration purposes.
			System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(singleXlsFile) With {.UseShellExecute = True})
		End Sub
	End Class
End Namespace

Download


If you need a new code example or have a question: email us at support@sautinsoft.com or ask at Online Chat (right-bottom corner of this page) or use the Form below:



Questions and suggestions from you are always welcome!

We are developing .Net components since 2002. We know PDF, DOCX, RTF, HTML, XLSX and Images formats. If you need any assistance with creating, modifying or converting documents in various formats, we can help you. We will write any code example for you absolutely free.