diff --git a/src/main/java/ru/mcs/udk/UdkDjvuScanner.java b/src/main/java/ru/mcs/udk/UdkDjvuScanner.java index e4a1a10..a72a940 100644 --- a/src/main/java/ru/mcs/udk/UdkDjvuScanner.java +++ b/src/main/java/ru/mcs/udk/UdkDjvuScanner.java @@ -1,26 +1,41 @@ package ru.mcs.udk; +import net.sourceforge.tess4j.TesseractException; + +import javax.imageio.ImageIO; +import java.awt.image.BufferedImage; import java.io.File; import java.io.IOException; +import static ru.mcs.udk.DocumentUtils.findUDK; + public class UdkDjvuScanner implements UDKScanner { @Override public void getUDK(File djvuFile) { + String udk = ""; for (int pageIndex = 0; pageIndex < 6; pageIndex++) { - String outputFile = String.format("%s_page_%d.png", djvuFile.getName(), pageIndex); - Process process = null; + String outputFile = String.format("temp/page_%d.tiff", pageIndex); + Process process; try { process = new ProcessBuilder("ddjvu", - "-format=png", + "-format=tiff", + "-quality=90", "-page=" + pageIndex, djvuFile.getAbsolutePath(), outputFile).start(); int exitCode = process.waitFor(); + if (exitCode == 0) { + BufferedImage image = ImageIO.read(new File(outputFile)); + String text = DocumentUtils.getText(image); + udk = findUDK(text); + } } catch (InterruptedException | IOException e) { throw new RuntimeException(e); + } catch (TesseractException e) { + e.printStackTrace(); } - System.out.printf("%s;%s;%s;%s\n", djvuFile.getPath(), "", "", ""); } + System.out.printf("%s;%s;%s;%s\n", djvuFile.getPath(), udk, "", ""); } } diff --git a/src/main/java/ru/mcs/udk/UdkPdfScanner.java b/src/main/java/ru/mcs/udk/UdkPdfScanner.java index 713d511..a682312 100644 --- a/src/main/java/ru/mcs/udk/UdkPdfScanner.java +++ b/src/main/java/ru/mcs/udk/UdkPdfScanner.java @@ -27,7 +27,7 @@ String text = stripper.getText(document); String udk = findUDK(text); - languageBook = isCyrillic(String.valueOf(file.getName())) ? "ru" : "en"; + languageBook = isCyrillic(file.getName()) ? "ru" : "en"; if ((udk == null || udk.isBlank()) && languageBook.equals("ru")) { udk = getUdkByImage(document);