Convert All Files to Searchable PDFs

26 Mar 2019CPOL
This program will convert office, text and image files to PDFs. To use this program, drag your file(s) or folders onto the script file. Files in sub-folders will be converted too.

Using the Code

The VBS script uses MS Office to convert Excel, Word, Text and Power Point documents.

The VBS script uses free Tesseract library (by Google) to convert images to PDF.

Const sTesseractPath = "C:\Program Files (x86)\Tesseract-OCR\tesseract.exe"  'Download Here: 
Const sFileSuffix = "_out" 'Appends at the end of output file name

Set fso = CreateObject("Scripting.FileSystemObject")
Set oShell = WScript.CreateObject ("")
Dim iCount: iCount = 0
Dim oLog
Dim bLogUsed: bLogUsed = False
Dim sFolderPath: sFolderPath = GetFolderPath()
Dim excel, word, powerPoint
Set excel = Nothing
Set word = Nothing
Set powerPoint = Nothing

if WScript.Arguments.Count = 0 then
    MsgBox "Please drop office and image files or folders to convert them to searchable PDFs"

    Set oLog = fso.CreateTextFile(WScript.ScriptFullName & ".log", True)

    For i = 0 to WScript.Arguments.Count -1
      sFile = WScript.Arguments(i)
      If fso.FileExists(sFile) Then        
        ProcessFile sFile
      ElseIf fso.FolderExists(sFile) Then  
        ProcessFolder sFile
      End If


    If bLogUsed = False Then
    'Delete unused Log
    fso.DeleteFile WScript.ScriptFullName & ".log"
    End If

    MsgBox "Created " & iCount & " PDFs" 
End if

Sub ProcessFolder(sFolder)
  Set oFolder = fso.GetFolder(sFolder)
  For Each oFile in oFolder.Files
    ProcessFile oFile.Path
   For Each oSubfolder in oFolder.SubFolders
    ProcessFolder oSubfolder.Path
End Sub

Sub ProcessFile(sFile)
  Dim iPos, sFileBase, sOutPdf, sOutPdfNoExt
  iPos = InStrRev(sFile,".")
  sFileBase = Mid(sFile,1,iPos - 1)
  sOutPdfNoExt = sFileBase & sFileSuffix
  sOutPdf = sOutPdfNoExt & ".pdf"

  If fso.FileExists(sOutPdf) Then    
    Msg sOutPdf & " already exists"
    Exit Sub
  End If
  sFileExt = LCASE(fso.GetExtensionName(sFile))

  Select Case sFileExt

    Case "xlsx", "xls", "csv"
      ExcelToPdf sFile, sOutPdf
    Case "docx", "doc", "txt", "rtf", "sql"
      WordToPdf sFile, sOutPdf
    Case "pptx", "ppt"
      PowerPointToPdf sFile, sOutPdf
    Case "bmp","pnm","png","jfif","jpeg","jpg","tiff","gif"
      ImgToPdf sFile, sOutPdfNoExt
    Case Else
      Msg "File type: " & sFileExt & " is not supported"
  End Select  

  If fso.FileExists(sOutPdf) Then    
    iCount = iCount + 1
    Msg sOutPdf & ".pdf could not be created"
  End If

End Sub

Sub ImgToPdf(sInFile, sOutPdf)
  If fso.FileExists(sTesseractPath) = False Then 
    MsgBox "Tesseract is not installed. Download Here: _
            If is installed, modify the first line of this script file to point it to tesseract.exe"
    oShell.Run "chrome -url"
  End If """" & sTesseractPath & """ """ & sInFile & """ """ & sOutPdf & """ pdf", 1 , True   
End Sub

Sub ExcelToPdf(sFrom, sTo)

  If excel is Nothing Then
    Set excel = CreateObject("Excel.Application")
  End If
  excel.ScreenUpdating = false
  excel.DisplayAlerts = false

  Set workbook = excel.Workbooks.Open(sFrom)
  workbook.ExportAsFixedFormat 0, sTo

  Set workbook = Nothing
End Sub

Sub WordToPdf(sFrom, sTo)

  If word is Nothing Then
    Set word = CreateObject("Word.Application")
  End If

  Set doc = word.Documents.Open(sFrom)
  doc.SaveAs2 sTo, 17
  Set doc = Nothing
End Sub

Sub PowerPointToPdf(sFrom, sTo)

  If powerPoint is Nothing Then
    Set powerPoint = CreateObject("PowerPoint.Application")
  End If
  Const msoFalse = 0
  Set pres = powerPoint.Presentations.Open(sFrom, , , msoFalse)
  pres.SaveAs sTo, 32
  Set pres = Nothing
End Sub

Sub CloseOfficeApps()
    If Not excel is Nothing Then
      Set excel = Nothing
    End If

    If Not word is Nothing Then
      Set word = Nothing
    End If
    If Not powerPoint is Nothing Then
      Set powerPoint = Nothing
    End If
End Sub

Function GetFolderPath()
    Dim oFile 'As Scripting.File
    Set oFile = fso.GetFile(WScript.ScriptFullName)
    GetFolderPath = oFile.ParentFolder
End Function

Sub Msg(s)
  oLog.WriteLine Now & vbTab & s
  bLogUsed = True
End Sub

The script will add (_out) prefix to each PDF file. The prefix can be changed in Line2. Here is a script that will move all PDF files with (_out) prefix to a folder with (_out) prefix.

Set fso = CreateObject("Scripting.FileSystemObject")
Dim sFileSuffix: sFileSuffix = "_out" 'Appends at the end of output file name
Dim sInFolder: sInFolder = ""
Dim sOutFolder: sOutFolder = ""

if WScript.Arguments.Count <> 1 then
    MsgBox "Please drop folder to move OCR PDF files to " & sFileSuffix & " folder"

    If WScript.Arguments.Count = 1 Then
      sFolder = WScript.Arguments(i)      
      If fso.FolderExists(sFolder) Then  
        sInFolder = sFolder
        sOutFolder = sFolder & sFileSuffix 
        ProcessFolder sFolder        
        MsgBox "Done"
      End If
    End If
End if

Sub ProcessFolder(sFolder)

  iPrefixLen = Len(sFileSuffix) + 4
  sSuffix = Replace(sFolder,sInFolder, "")
  sTargetFolder = sOutFolder & "" & sSuffix

  If fso.FolderExists(sTargetFolder) = False Then
    fso.CreateFolder sTargetFolder
  End If

  Set oFolder = fso.GetFolder(sFolder)
  For Each oFile in oFolder.Files
    If Right(oFile.Path, iPrefixLen) = sFileSuffix & ".pdf" Then
      sOutFile = Mid(oFile.Name, 1, Len(oFile.Name) - iPrefixLen) & ".pdf"
      fso.MoveFile oFile.Path, sTargetFolder & "\" & sOutFile
    End If
  For Each oSubfolder in oFolder.SubFolders
    ProcessFolder oSubfolder.Path
End Sub

I have been using this script for some time and decided to share it. I hope someone else will find this useful. If you want to merge all of these PDFs, you can use the PDF Merge application I created earlier.


This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


About the Author

Igor Krupitsky
Web Developer
United States United States
Igor is a business intelligence consultant working in Tampa, Florida. He has a BS in Finance from University of South Carolina and Masters in Information Management System from University of South Florida. He also has following professional certifications: MCSD, MCDBA, MCAD.

Comments and Discussions

QuestionWhat language? What file name? Pin
cpGlenn27-Mar-19 20:22
MembercpGlenn27-Mar-19 20:22 
AnswerRe: What language? What file name? Pin
Igor Krupitsky28-Mar-19 3:59
mvaIgor Krupitsky28-Mar-19 3:59 
This is VBS file. Not tools are needed to run this.

Tesseract is required if you want to convert image files. The script will prompt you to install it.
MS Office is required to convert office documents.

Please look at the "Tagged as" section of the article. It says: VBScript, VBA, VBS.
GeneralRe: What language? What file name? Pin
cpGlenn28-Mar-19 12:35
MembercpGlenn28-Mar-19 12:35 

Posted 26 Mar 2019

Tagged as


