2.0版本以后PDFBox抽取pdf文件内的图片
public static List<RenderedImage> getImagesFromPDF(PDDocument document) throws IOException {
List<RenderedImage> images = new ArrayList<RenderedImage>();
for (PDPage page : document.getPages()) {
images.addAll(getImagesFromResources(page.getResources()));
}
return images;
}
private static List<RenderedImage> getImagesFromResources(PDResources resources) throws IOException {
List<RenderedImage> images = new ArrayList<RenderedImage>();
for (COSName xObjectName : resources.getXObjectNames()) {
PDXObject xObject = resources.getXObject(xObjectName);
if (xObject instanceof PDFormXObject) {
continue;
//images.addAll(getImagesFromResources(((PDFormXObject) xObject).getResources()));
} else if (xObject instanceof PDImageXObject) {
PDImageXObject obj = (PDImageXObject) xObject;
//获取图片后缀
System.out.println(obj.getSuffix());
images.add(obj.getImage());
}
}
return images;
}
转载:https://blog.csdn.net/weixin_42435196/article/details/111245401
查看评论