場景
有些PDF加密(不知道密碼),不能複製,不能打印,不能另存為,只有讀的權限。
怎麼破?
用Java編寫應用讀取,親測有效!
工具
JDK8、eclipse、maven
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.21</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>fontbox</artifactId>
<version>2.0.21</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>jempbox</artifactId>
<version>1.8.13</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>xmpbox</artifactId>
<version>2.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>preflight</artifactId>
<version>2.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox-tools</artifactId>
<version>2.0.0</version>
</dependency>
Java代碼
public static void main(String[] args)
{
String pdfpath="F:\\PDF";//PDF文件的路徑,文件夾
String outputPath="F:\\PDF";//以txt文件輸出的路徑,文件夾
try
{
File pdfFile=new File(pdfpath);
File[] files=pdfFile.listFiles();
for(File file:files)
{
PDDocument doc=PDDocument.load(file);
int pagenum=doc.getNumberOfPages();
FileOutputStream fos=new FileOutputStream( outputPath+"/"+file.getName().replace(".pdf","")+".txt");
Writer writer=new OutputStreamWriter(fos,"UTF-8");
PDFTextStripper stripper=new PDFTextStripper();
stripper.setSortByPosition(true);
stripper.setStartPage(0);
stripper.setEndPage(pagenum);
stripper.writeText(doc,writer);
writer.close();
doc.close();
}
}
catch(Exception e)
{
e.printStackTrace();
}
}