65.9K
CodeProject 正在变化。 阅读更多。
Home

将所有文件转换为可搜索的 PDF

starIconstarIconstarIconstarIconstarIcon

5.00/5 (5投票s)

2019年3月26日

CPOL
viewsIcon

13890

downloadIcon

384

这个程序可以将Office、文本和图像文件转换为PDF。

引言

这个程序可以将Office、文本和图像文件转换为PDF。要使用这个程序,请将文件或文件夹拖放到脚本文件上。子文件夹中的文件也会被转换。

Using the Code

VBS脚本使用MS Office将Excel、Word、文本和PowerPoint文档转换为PDF。

VBS脚本使用免费的 Tesseract 库(由Google开发)将图像转换为PDF。

Const sTesseractPath = "C:\Program Files (x86)\Tesseract-OCR\tesseract.exe"  'Download Here: 
                                             'https://github.com/UB-Mannheim/tesseract/wiki
Const sFileSuffix = "_out" 'Appends at the end of output file name

Set fso = CreateObject("Scripting.FileSystemObject")
Set oShell = WScript.CreateObject ("WSCript.shell")
Dim iCount: iCount = 0
Dim oLog
Dim bLogUsed: bLogUsed = False
Dim sFolderPath: sFolderPath = GetFolderPath()
Dim excel, word, powerPoint
Set excel = Nothing
Set word = Nothing
Set powerPoint = Nothing

if WScript.Arguments.Count = 0 then
    MsgBox "Please drop office and image files or folders to convert them to searchable PDFs"

Else
    Set oLog = fso.CreateTextFile(WScript.ScriptFullName & ".log", True)

    For i = 0 to WScript.Arguments.Count -1
      sFile = WScript.Arguments(i)
      
      If fso.FileExists(sFile) Then        
        ProcessFile sFile
        
      ElseIf fso.FolderExists(sFile) Then  
        ProcessFolder sFile
      End If
      
    Next

    CloseOfficeApps
    oLog.Close    

    If bLogUsed = False Then
    'Delete unused Log
    fso.DeleteFile WScript.ScriptFullName & ".log"
    End If

    MsgBox "Created " & iCount & " PDFs" 
End if

Sub ProcessFolder(sFolder)
  Set oFolder = fso.GetFolder(sFolder)
  For Each oFile in oFolder.Files
    ProcessFile oFile.Path
  Next
  
   For Each oSubfolder in oFolder.SubFolders
    ProcessFolder oSubfolder.Path
   Next
End Sub

Sub ProcessFile(sFile)
  Dim iPos, sFileBase, sOutPdf, sOutPdfNoExt
  iPos = InStrRev(sFile,".")
  sFileBase = Mid(sFile,1,iPos - 1)
  sOutPdfNoExt = sFileBase & sFileSuffix
  sOutPdf = sOutPdfNoExt & ".pdf"

  If fso.FileExists(sOutPdf) Then    
    Msg sOutPdf & " already exists"
    Exit Sub
  End If
  
  sFileExt = LCASE(fso.GetExtensionName(sFile))

  Select Case sFileExt

    Case "xlsx", "xls", "csv"
      ExcelToPdf sFile, sOutPdf
     
    Case "docx", "doc", "txt", "rtf", "sql"
      WordToPdf sFile, sOutPdf
      
    Case "pptx", "ppt"
      PowerPointToPdf sFile, sOutPdf
      
    Case "bmp","pnm","png","jfif","jpeg","jpg","tiff","gif"
      ImgToPdf sFile, sOutPdfNoExt
      
    Case Else
      Msg "File type: " & sFileExt & " is not supported"
      
  End Select  

  If fso.FileExists(sOutPdf) Then    
    iCount = iCount + 1
  Else
    Msg sOutPdf & ".pdf could not be created"
  End If

End Sub

Sub ImgToPdf(sInFile, sOutPdf)
  If fso.FileExists(sTesseractPath) = False Then 
    MsgBox "Tesseract is not installed. Download Here: _
            https://github.com/UB-Mannheim/tesseract/wiki.  _
            If is installed, modify the first line of this script file to point it to tesseract.exe"
    oShell.Run "chrome -url https://github.com/UB-Mannheim/tesseract/wiki"
    WScript.Quit
  End If
  
  oShell.run """" & sTesseractPath & """ """ & sInFile & """ """ & sOutPdf & """ pdf", 1 , True   
End Sub

Sub ExcelToPdf(sFrom, sTo)

  If excel is Nothing Then
    Set excel = CreateObject("Excel.Application")
  End If
  
  excel.ScreenUpdating = false
  excel.DisplayAlerts = false

  Set workbook = excel.Workbooks.Open(sFrom)
  workbook.ExportAsFixedFormat 0, sTo

  workbook.Close()
  Set workbook = Nothing
End Sub

Sub WordToPdf(sFrom, sTo)

  If word is Nothing Then
    Set word = CreateObject("Word.Application")
  End If

  Set doc = word.Documents.Open(sFrom)
  doc.Activate()
  doc.SaveAs2 sTo, 17
  doc.Close()
  Set doc = Nothing
End Sub

Sub PowerPointToPdf(sFrom, sTo)

  If powerPoint is Nothing Then
    Set powerPoint = CreateObject("PowerPoint.Application")
  End If
  
  Const msoFalse = 0
  Set pres = powerPoint.Presentations.Open(sFrom, , , msoFalse)
  pres.SaveAs sTo, 32
  pres.Close
  Set pres = Nothing
End Sub

Sub CloseOfficeApps()
    If Not excel is Nothing Then
      excel.Quit()
      Set excel = Nothing
    End If

    If Not word is Nothing Then
      word.Quit()
      Set word = Nothing
    End If
    
    If Not powerPoint is Nothing Then
      powerPoint.Quit()
      Set powerPoint = Nothing
    End If
End Sub

Function GetFolderPath()
    Dim oFile 'As Scripting.File
    Set oFile = fso.GetFile(WScript.ScriptFullName)
    GetFolderPath = oFile.ParentFolder
End Function

Sub Msg(s)
  oLog.WriteLine Now & vbTab & s
  bLogUsed = True
End Sub

脚本会在每个PDF文件名前添加 (_out) 前缀。可以在 Line2 中更改此前缀。这里有一个脚本,可以将所有带有 (_out) 前缀的PDF文件移动到带有 (_out) 前缀的文件夹中。

Set fso = CreateObject("Scripting.FileSystemObject")
Dim sFileSuffix: sFileSuffix = "_out" 'Appends at the end of output file name
Dim sInFolder: sInFolder = ""
Dim sOutFolder: sOutFolder = ""

if WScript.Arguments.Count <> 1 then
    MsgBox "Please drop folder to move OCR PDF files to " & sFileSuffix & " folder"

Else
    If WScript.Arguments.Count = 1 Then
      sFolder = WScript.Arguments(i)      
      If fso.FolderExists(sFolder) Then  
        sInFolder = sFolder
        sOutFolder = sFolder & sFileSuffix 
        ProcessFolder sFolder        
        MsgBox "Done"
      End If
    End If
End if

Sub ProcessFolder(sFolder)

  iPrefixLen = Len(sFileSuffix) + 4
  sSuffix = Replace(sFolder,sInFolder, "")
  sTargetFolder = sOutFolder & "" & sSuffix

  If fso.FolderExists(sTargetFolder) = False Then
    fso.CreateFolder sTargetFolder
  End If

  Set oFolder = fso.GetFolder(sFolder)
  For Each oFile in oFolder.Files
    
    If Right(oFile.Path, iPrefixLen) = sFileSuffix & ".pdf" Then
      sOutFile = Mid(oFile.Name, 1, Len(oFile.Name) - iPrefixLen) & ".pdf"
      fso.MoveFile oFile.Path, sTargetFolder & "\" & sOutFile
    End If
  Next
  
  For Each oSubfolder in oFolder.SubFolders
    ProcessFolder oSubfolder.Path
  Next
End Sub

我使用这个脚本一段时间了,并决定分享它。希望对其他人有用。如果您想合并所有这些PDF文件,可以使用我之前创建的 PDF合并 应用程序。

© . All rights reserved.