diff --git a/images/page_djvu_1.tiff b/images/page_djvu_1.tiff index 1fab802..8a1929d 100644 --- a/images/page_djvu_1.tiff +++ b/images/page_djvu_1.tiff Binary files differ diff --git a/images/page_djvu_2.tiff b/images/page_djvu_2.tiff index dbb2781..24e692d 100644 --- a/images/page_djvu_2.tiff +++ b/images/page_djvu_2.tiff Binary files differ diff --git a/images/page_djvu_3.tiff b/images/page_djvu_3.tiff index f9e0368..1ec559b 100644 --- a/images/page_djvu_3.tiff +++ b/images/page_djvu_3.tiff Binary files differ diff --git a/images/page_djvu_4.tiff b/images/page_djvu_4.tiff index 20f01df..9cfdbb4 100644 --- a/images/page_djvu_4.tiff +++ b/images/page_djvu_4.tiff Binary files differ diff --git a/images/page_djvu_5.tiff b/images/page_djvu_5.tiff index 5c9a2fb..6bd23d0 100644 --- a/images/page_djvu_5.tiff +++ b/images/page_djvu_5.tiff Binary files differ diff --git a/images/page_djvu_6.tiff b/images/page_djvu_6.tiff index d8968dc..35864f7 100644 --- a/images/page_djvu_6.tiff +++ b/images/page_djvu_6.tiff Binary files differ diff --git a/src/main/java/ru/mcs/udk/scanner/impl/DJVUScanner.java b/src/main/java/ru/mcs/udk/scanner/impl/DJVUScanner.java index 2f37e4d..bb69b63 100644 --- a/src/main/java/ru/mcs/udk/scanner/impl/DJVUScanner.java +++ b/src/main/java/ru/mcs/udk/scanner/impl/DJVUScanner.java @@ -21,8 +21,10 @@ DocumentInfo documentInfo = new DocumentInfo(); documentInfo.setError(""); documentInfo.setUdk(""); + documentInfo.setLanguage(""); + documentInfo.setFileSize(DocumentUtils.getSizeFile(djvuFile)); documentInfo.setDocumentFormat(DocumentFormat.DJVU); - String outputFile = "page_djvu_%d.tiff"; + String outputFile = "page_%d_djvu.tiff"; Process process; try { process = new ProcessBuilder("program/djvu/ddjvu.exe", @@ -35,7 +37,7 @@ int exitCode = process.waitFor(); if (exitCode == 0) { for (int index = 1; index <= 6; index++) { - BufferedImage image = ImageIO.read(new File(String.format("images/page_djvu_%s.tiff", index))); + BufferedImage image = ImageIO.read(new File(String.format("images/page_%s_djvu.tiff", index))); String text = DocumentUtils.getText(image); String udk = findUDK(text); if (udk != null && !udk.isEmpty()) { diff --git a/src/main/java/ru/mcs/udk/scanner/impl/PDFScanner.java b/src/main/java/ru/mcs/udk/scanner/impl/PDFScanner.java index af54cb5..7941c05 100644 --- a/src/main/java/ru/mcs/udk/scanner/impl/PDFScanner.java +++ b/src/main/java/ru/mcs/udk/scanner/impl/PDFScanner.java @@ -38,7 +38,7 @@ documentInfo.setError(""); documentInfo.setUdk(""); documentInfo.setDocumentFormat(DocumentFormat.PDF); - documentInfo.setFileSize(getSizeFile(file)); + documentInfo.setFileSize(DocumentUtils.getSizeFile(file)); try (PDDocument document = Loader.loadPDF(file)) { PDFTextStripper stripper = new PDFTextStripper(); // Устанавливаем диапазон страниц для анализа (первые семь страниц) @@ -60,12 +60,7 @@ return documentInfo; } - private String getSizeFile(File file) { - double sizeInMB = (double) file.length() / (1024 * 1024); // Размер в мегабайтах - DecimalFormat df = new DecimalFormat("#.##"); // Форматирование до двух знаков после запятой - return df.format(sizeInMB) + " Mb"; - } private static String getUdkByImage(PDDocument document) throws IOException, TesseractException { PDFRenderer renderer = new PDFRenderer(document); diff --git a/src/main/java/ru/mcs/udk/utils/DocumentUtils.java b/src/main/java/ru/mcs/udk/utils/DocumentUtils.java index 39078ec..8993313 100644 --- a/src/main/java/ru/mcs/udk/utils/DocumentUtils.java +++ b/src/main/java/ru/mcs/udk/utils/DocumentUtils.java @@ -5,6 +5,7 @@ import java.awt.image.BufferedImage; import java.io.File; +import java.text.DecimalFormat; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -53,7 +54,7 @@ if (matcher.find()) { return matcher.group(1); } - return null; + return ""; } public static boolean isCyrillic(String fileName) { @@ -62,4 +63,11 @@ Matcher matcher = pattern.matcher(fileName); return matcher.matches(); } + + public static String getSizeFile(File file) { + double sizeInMB = (double) file.length() / (1024 * 1024); // Размер в мегабайтах + DecimalFormat df = new DecimalFormat("#.##"); // Форматирование до двух знаков после запятой + + return df.format(sizeInMB) + " Mb"; + } }