package io.annot8.components.tesseract.processors;

import io.annot8.api.capabilities.Capabilities;
import io.annot8.api.components.annotations.ComponentDescription;
import io.annot8.api.components.annotations.ComponentName;
import io.annot8.api.components.annotations.ComponentTags;
import io.annot8.api.components.annotations.SettingsClass;
import io.annot8.api.components.responses.ProcessorResponse;
import io.annot8.api.context.Context;
import io.annot8.api.data.Content;
import io.annot8.api.data.Item;
import io.annot8.api.settings.Description;
import io.annot8.common.components.AbstractProcessor;
import io.annot8.common.components.AbstractProcessorDescriptor;
import io.annot8.common.components.capabilities.SimpleCapabilities;
import io.annot8.common.data.content.FileContent;
import io.annot8.common.data.content.Image;
import io.annot8.common.data.content.Text;
import java.awt.image.BufferedImage;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
import net.sourceforge.tess4j.util.LoadLibs;
import org.apache.commons.io.FilenameUtils;

@ComponentDescription("Use Tesseract to extract text from images stored in FileContent, or directly from Image content")
@ComponentTags({"image", "text", "ocr", "tesseract"})
@ComponentName("Tesseract OCR")
@SettingsClass(Settings.class)
/* loaded from: input_file:io/annot8/components/tesseract/processors/OCR.class */
public class OCR extends AbstractProcessorDescriptor<Processor, Settings> {

    /* loaded from: input_file:io/annot8/components/tesseract/processors/OCR$Processor.class */
    public static class Processor extends AbstractProcessor {
        private final ITesseract instance;
        private final List<String> extensions;

        public Processor(List<String> list, ITesseract iTesseract) {
            this.extensions = list;
            this.instance = iTesseract;
        }

        public ProcessorResponse process(Item item) {
            item.getContents(FileContent.class).filter(fileContent -> {
                return this.extensions.contains(FilenameUtils.getExtension(((File) fileContent.getData()).getName()).toLowerCase());
            }).forEach(fileContent2 -> {
                createTextContent(item, (String) metrics().timer("ocr-file", new String[0]).record(() -> {
                    try {
                        return this.instance.doOCR((File) fileContent2.getData());
                    } catch (TesseractException e) {
                        log().error("Unable to extract text from File content {}", fileContent2.getId(), e);
                        return null;
                    }
                }), fileContent2);
            });
            item.getContents(Image.class).forEach(image -> {
                createTextContent(item, (String) metrics().timer("ocr-image", new String[0]).record(() -> {
                    try {
                        return this.instance.doOCR((BufferedImage) image.getData());
                    } catch (TesseractException e) {
                        log().error("Unable to extract text from Image content {}", image.getId(), e);
                        return null;
                    }
                }), image);
            });
            return ProcessorResponse.ok();
        }

        private Text createTextContent(Item item, String str, Content<?> content) {
            if (str == null || str.isBlank()) {
                return null;
            }
            return (Text) ((Content.Builder) ((Content.Builder) item.createContent(Text.class).withDescription("OCR from " + content.getId()).withData(str).withProperties(content.getProperties())).withProperty("parent", content.getId())).save();
        }
    }

    /* loaded from: input_file:io/annot8/components/tesseract/processors/OCR$Settings.class */
    public static class Settings implements io.annot8.api.settings.Settings {
        private List<String> extensions = Arrays.asList("bmp", "gif", "jpg", "jpeg", "tif", "tiff");
        private List<String> configs = new ArrayList();
        private String dataPath = LoadLibs.extractTessResources("tessdata").toString();
        private String language = "eng";
        private int ocrEngine = 3;
        private int pageSegmentation = -1;
        private Map<String, String> variables = new HashMap();

        @Description("List of file extensions (case insensitive) that will be OCR'd")
        public List<String> getExtensions() {
            return this.extensions;
        }

        public void setExtensions(List<String> list) {
            this.extensions = list;
        }

        @Description("List of Tesseract configs")
        public List<String> getConfigs() {
            return this.configs;
        }

        public void setConfigs(List<String> list) {
            this.configs = list;
        }

        @Description("Path to Tesseract models")
        public String getDataPath() {
            return this.dataPath;
        }

        public void setDataPath(String str) {
            this.dataPath = str;
        }

        @Description("Expected language of text")
        public String getLanguage() {
            return this.language;
        }

        public void setLanguage(String str) {
            this.language = str;
        }

        @Description("Tesseract engine to use")
        public int getOcrEngine() {
            return this.ocrEngine;
        }

        public void setOcrEngine(int i) {
            this.ocrEngine = i;
        }

        @Description("Tesseract page segmentation setting")
        public int getPageSegmentation() {
            return this.pageSegmentation;
        }

        public void setPageSegmentation(int i) {
            this.pageSegmentation = i;
        }

        @Description("Additional Tesseract variables")
        public Map<String, String> getVariables() {
            return this.variables;
        }

        public void setVariables(Map<String, String> map) {
            this.variables = map;
        }

        public boolean validate() {
            return (this.extensions == null || this.extensions.isEmpty() || this.configs == null || this.dataPath == null || this.dataPath.isEmpty() || this.language == null || this.language.isEmpty() || this.variables == null) ? false : true;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Processor createComponent(Context context, Settings settings) {
        Tesseract tesseract = new Tesseract();
        if (!settings.getConfigs().isEmpty()) {
            tesseract.setConfigs(settings.getConfigs());
        }
        tesseract.setDatapath(settings.getDataPath());
        tesseract.setLanguage(settings.getLanguage());
        tesseract.setOcrEngineMode(settings.getOcrEngine());
        tesseract.setPageSegMode(settings.getPageSegmentation());
        Map<String, String> variables = settings.getVariables();
        Objects.requireNonNull(tesseract);
        variables.forEach(tesseract::setTessVariable);
        return new Processor(settings.getExtensions(), tesseract);
    }

    public Capabilities capabilities() {
        return new SimpleCapabilities.Builder().withProcessesContent(FileContent.class).withProcessesContent(Image.class).withCreatesContent(Text.class).build();
    }
}
