public class Test {
String getText(PDPage page, Rectangle2D rect) throws IOException {
PDFTextStripperByArea stripper = new PDFTextStripperByArea();
stripper.addRegion("region", rect);
stripper.extractRegions(page);
return stripper.getTextForRegion("region");
}
boolean contains(PDPage page, String text) {
try {
PDRectangle pr = page.getBBox();
Rectangle2D rect = new Rectangle2D.Float(
pr.getLowerLeftX(), pr.getLowerLeftY(),
pr.getWidth(), pr.getHeight());
String textOnPage = getText(page, rect);
return textOnPage.indexOf(text) != -1;
} catch (IOException ex) {
throw new UncheckedIOException(ex);
}
}
int[] getPageNumbers(PDDocument doc, String text) {
return IntStream.range(0, doc.getNumberOfPages())
.filter(ix -> contains(doc.getPage(ix), text))
.toArray();
}
public static void main( String[] args ) {
if (args.length != 2) {
System.err.println("Parameter: file.pdf text");
System.exit(0);
}
Test test = new Test();
try(FileInputStream fis = new FileInputStream(args[0])) {
PDDocument doc = PDDocument.load(fis);
int[] pages = test.getPageNumbers(doc, args[1]);
System.out.println(Arrays.toString(pages));
doc.close();
} catch (IOException ex) {
ex.printStackTrace();
}
}
}