package ru.mcs.udk.document.impl;
import net.sourceforge.tess4j.TesseractException;
import ru.mcs.udk.wrapper.DocumentInfo;
import ru.mcs.udk.document.DocumentScanner;
import ru.mcs.udk.utils.DocumentUtils;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import static ru.mcs.udk.utils.DocumentUtils.findUDK;
public class DJVUScanner implements DocumentScanner {
@Override
public DocumentInfo getUDK(File djvuFile) {
// Засекаем время начала поиска
long startTime = System.currentTimeMillis();
DocumentInfo documentInfo = new DocumentInfo();
documentInfo.setError("");
documentInfo.setUdk("");
for (int pageIndex = 1; pageIndex < 7; pageIndex++) {
String outputFile = String.format("temp/page_%d.tiff", pageIndex);
Process process;
try {
process = new ProcessBuilder("ddjvu",
"-format=tiff",
"-quality=90",
"-page=" + pageIndex,
djvuFile.getAbsolutePath(),
outputFile).start();
int exitCode = process.waitFor();
if (exitCode == 0) {
BufferedImage image = ImageIO.read(new File(outputFile));
String text = DocumentUtils.getText(image);
String udk = findUDK(text);
documentInfo.setUdk(udk);
if (udk != null && !udk.isEmpty()) {
documentInfo.setLanguage("ru");
break;
}
}
} catch (InterruptedException | IOException | TesseractException e) {
documentInfo.setError(e.getMessage());
}
}
long endTime = System.currentTimeMillis();
documentInfo.setTime(endTime - startTime);
return documentInfo;
}
}