将所有文件转换为可搜索的 PDF





5.00/5 (5投票s)
这个程序可以将Office、文本和图像文件转换为PDF。
引言
这个程序可以将Office、文本和图像文件转换为PDF。要使用这个程序,请将文件或文件夹拖放到脚本文件上。子文件夹中的文件也会被转换。
Using the Code
VBS脚本使用MS Office将Excel、Word、文本和PowerPoint文档转换为PDF。
VBS脚本使用免费的 Tesseract 库(由Google开发)将图像转换为PDF。
Const sTesseractPath = "C:\Program Files (x86)\Tesseract-OCR\tesseract.exe" 'Download Here:
'https://github.com/UB-Mannheim/tesseract/wiki
Const sFileSuffix = "_out" 'Appends at the end of output file name
Set fso = CreateObject("Scripting.FileSystemObject")
Set oShell = WScript.CreateObject ("WSCript.shell")
Dim iCount: iCount = 0
Dim oLog
Dim bLogUsed: bLogUsed = False
Dim sFolderPath: sFolderPath = GetFolderPath()
Dim excel, word, powerPoint
Set excel = Nothing
Set word = Nothing
Set powerPoint = Nothing
if WScript.Arguments.Count = 0 then
MsgBox "Please drop office and image files or folders to convert them to searchable PDFs"
Else
Set oLog = fso.CreateTextFile(WScript.ScriptFullName & ".log", True)
For i = 0 to WScript.Arguments.Count -1
sFile = WScript.Arguments(i)
If fso.FileExists(sFile) Then
ProcessFile sFile
ElseIf fso.FolderExists(sFile) Then
ProcessFolder sFile
End If
Next
CloseOfficeApps
oLog.Close
If bLogUsed = False Then
'Delete unused Log
fso.DeleteFile WScript.ScriptFullName & ".log"
End If
MsgBox "Created " & iCount & " PDFs"
End if
Sub ProcessFolder(sFolder)
Set oFolder = fso.GetFolder(sFolder)
For Each oFile in oFolder.Files
ProcessFile oFile.Path
Next
For Each oSubfolder in oFolder.SubFolders
ProcessFolder oSubfolder.Path
Next
End Sub
Sub ProcessFile(sFile)
Dim iPos, sFileBase, sOutPdf, sOutPdfNoExt
iPos = InStrRev(sFile,".")
sFileBase = Mid(sFile,1,iPos - 1)
sOutPdfNoExt = sFileBase & sFileSuffix
sOutPdf = sOutPdfNoExt & ".pdf"
If fso.FileExists(sOutPdf) Then
Msg sOutPdf & " already exists"
Exit Sub
End If
sFileExt = LCASE(fso.GetExtensionName(sFile))
Select Case sFileExt
Case "xlsx", "xls", "csv"
ExcelToPdf sFile, sOutPdf
Case "docx", "doc", "txt", "rtf", "sql"
WordToPdf sFile, sOutPdf
Case "pptx", "ppt"
PowerPointToPdf sFile, sOutPdf
Case "bmp","pnm","png","jfif","jpeg","jpg","tiff","gif"
ImgToPdf sFile, sOutPdfNoExt
Case Else
Msg "File type: " & sFileExt & " is not supported"
End Select
If fso.FileExists(sOutPdf) Then
iCount = iCount + 1
Else
Msg sOutPdf & ".pdf could not be created"
End If
End Sub
Sub ImgToPdf(sInFile, sOutPdf)
If fso.FileExists(sTesseractPath) = False Then
MsgBox "Tesseract is not installed. Download Here: _
https://github.com/UB-Mannheim/tesseract/wiki. _
If is installed, modify the first line of this script file to point it to tesseract.exe"
oShell.Run "chrome -url https://github.com/UB-Mannheim/tesseract/wiki"
WScript.Quit
End If
oShell.run """" & sTesseractPath & """ """ & sInFile & """ """ & sOutPdf & """ pdf", 1 , True
End Sub
Sub ExcelToPdf(sFrom, sTo)
If excel is Nothing Then
Set excel = CreateObject("Excel.Application")
End If
excel.ScreenUpdating = false
excel.DisplayAlerts = false
Set workbook = excel.Workbooks.Open(sFrom)
workbook.ExportAsFixedFormat 0, sTo
workbook.Close()
Set workbook = Nothing
End Sub
Sub WordToPdf(sFrom, sTo)
If word is Nothing Then
Set word = CreateObject("Word.Application")
End If
Set doc = word.Documents.Open(sFrom)
doc.Activate()
doc.SaveAs2 sTo, 17
doc.Close()
Set doc = Nothing
End Sub
Sub PowerPointToPdf(sFrom, sTo)
If powerPoint is Nothing Then
Set powerPoint = CreateObject("PowerPoint.Application")
End If
Const msoFalse = 0
Set pres = powerPoint.Presentations.Open(sFrom, , , msoFalse)
pres.SaveAs sTo, 32
pres.Close
Set pres = Nothing
End Sub
Sub CloseOfficeApps()
If Not excel is Nothing Then
excel.Quit()
Set excel = Nothing
End If
If Not word is Nothing Then
word.Quit()
Set word = Nothing
End If
If Not powerPoint is Nothing Then
powerPoint.Quit()
Set powerPoint = Nothing
End If
End Sub
Function GetFolderPath()
Dim oFile 'As Scripting.File
Set oFile = fso.GetFile(WScript.ScriptFullName)
GetFolderPath = oFile.ParentFolder
End Function
Sub Msg(s)
oLog.WriteLine Now & vbTab & s
bLogUsed = True
End Sub
脚本会在每个PDF文件名前添加 (_out
) 前缀。可以在 Line2
中更改此前缀。这里有一个脚本,可以将所有带有 (_out
) 前缀的PDF文件移动到带有 (_out
) 前缀的文件夹中。
Set fso = CreateObject("Scripting.FileSystemObject")
Dim sFileSuffix: sFileSuffix = "_out" 'Appends at the end of output file name
Dim sInFolder: sInFolder = ""
Dim sOutFolder: sOutFolder = ""
if WScript.Arguments.Count <> 1 then
MsgBox "Please drop folder to move OCR PDF files to " & sFileSuffix & " folder"
Else
If WScript.Arguments.Count = 1 Then
sFolder = WScript.Arguments(i)
If fso.FolderExists(sFolder) Then
sInFolder = sFolder
sOutFolder = sFolder & sFileSuffix
ProcessFolder sFolder
MsgBox "Done"
End If
End If
End if
Sub ProcessFolder(sFolder)
iPrefixLen = Len(sFileSuffix) + 4
sSuffix = Replace(sFolder,sInFolder, "")
sTargetFolder = sOutFolder & "" & sSuffix
If fso.FolderExists(sTargetFolder) = False Then
fso.CreateFolder sTargetFolder
End If
Set oFolder = fso.GetFolder(sFolder)
For Each oFile in oFolder.Files
If Right(oFile.Path, iPrefixLen) = sFileSuffix & ".pdf" Then
sOutFile = Mid(oFile.Name, 1, Len(oFile.Name) - iPrefixLen) & ".pdf"
fso.MoveFile oFile.Path, sTargetFolder & "\" & sOutFile
End If
Next
For Each oSubfolder in oFolder.SubFolders
ProcessFolder oSubfolder.Path
Next
End Sub
我使用这个脚本一段时间了,并决定分享它。希望对其他人有用。如果您想合并所有这些PDF文件,可以使用我之前创建的 PDF合并 应用程序。