package net.sf.okapi.steps.batchtranslation;

import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Hashtable;
import java.util.Locale;
import java.util.Map;
import net.htmlparser.jericho.Element;
import net.htmlparser.jericho.Source;
import net.sf.okapi.common.BOMNewlineEncodingDetector;
import net.sf.okapi.common.Event;
import net.sf.okapi.common.EventType;
import net.sf.okapi.common.ISegmenter;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.Util;
import net.sf.okapi.common.XMLWriter;
import net.sf.okapi.common.exceptions.OkapiException;
import net.sf.okapi.common.filters.IFilter;
import net.sf.okapi.common.filters.IFilterConfigurationMapper;
import net.sf.okapi.common.filterwriter.TMXWriter;
import net.sf.okapi.common.resource.Code;
import net.sf.okapi.common.resource.ITextUnit;
import net.sf.okapi.common.resource.MultiEvent;
import net.sf.okapi.common.resource.PipelineParameters;
import net.sf.okapi.common.resource.RawDocument;
import net.sf.okapi.common.resource.Segment;
import net.sf.okapi.common.resource.TextContainer;
import net.sf.okapi.common.resource.TextFragment;
import net.sf.okapi.filters.openxml.Namespace;
import net.sf.okapi.lib.segmentation.SRXDocument;
import net.sf.okapi.lib.translation.QueryUtil;
import net.sf.okapi.lib.xliff2.Const;
import net.sf.okapi.tm.pensieve.common.TranslationUnit;
import net.sf.okapi.tm.pensieve.common.TranslationUnitVariant;
import net.sf.okapi.tm.pensieve.seeker.ITmSeeker;
import net.sf.okapi.tm.pensieve.seeker.TmSeekerFactory;
import net.sf.okapi.tm.pensieve.writer.ITmWriter;
import net.sf.okapi.tm.pensieve.writer.TmWriterFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:net/sf/okapi/steps/batchtranslation/BatchTranslator.class */
public class BatchTranslator {
    private final Logger LOGGER = LoggerFactory.getLogger(getClass());
    private IFilterConfigurationMapper fcMapper;
    private IFilter filter;
    private RawDocument rawDoc;
    private QueryUtil qutil;
    private File htmlSourceFile;
    private File htmlTargetFile;
    private File originalStoreFile;
    private Parameters params;
    private ITmWriter tmWriter;
    private TMXWriter tmxWriter;
    private RawDocument tmxRawDoc;
    private LocaleId srcLoc;
    private LocaleId trgLoc;
    private int subDocId;
    private int currentSubDocId;
    private boolean initDone;
    private Map<String, String> attributes;
    private SimpleStore store;
    private ITmSeeker existingTm;
    private ITmSeeker currentTm;
    private int docInternalMatches;
    private int totalInternalMatches;
    private int docExternalMatches;
    private int totalExternalMatches;
    private int docEntries;
    private int totalEntries;
    private ISegmenter segmenter;
    private String rootDir;
    private String inputRootDir;

    public BatchTranslator(IFilterConfigurationMapper iFilterConfigurationMapper, Parameters parameters, String str, String str2) {
        this.fcMapper = iFilterConfigurationMapper;
        this.params = parameters;
        this.rootDir = str;
        this.inputRootDir = str2;
        if (this.params == null) {
            this.params = new Parameters();
        }
        this.qutil = new QueryUtil();
        this.initDone = false;
    }

    protected void finalize() {
        closeAll();
    }

    private void closeAll() {
        if (this.tmxWriter != null) {
            this.tmxWriter.writeEndDocument();
            this.tmxWriter.close();
            this.tmxWriter = null;
            this.tmxRawDoc.finalizeOutput();
        }
        if (this.existingTm != null) {
            this.existingTm.close();
            this.existingTm = null;
        }
        if (this.currentTm != null) {
            this.currentTm.close();
            this.currentTm = null;
        }
        this.initDone = false;
    }

    public Event endBatch() {
        this.LOGGER.info("");
        if (this.currentTm != null) {
            this.LOGGER.info("Total matches from TM being built = {}", Integer.valueOf(this.totalInternalMatches));
            this.LOGGER.info("Total matches from existing TM = {}", Integer.valueOf(this.totalExternalMatches));
        }
        this.LOGGER.info("Total entries sent to translation = {}", Integer.valueOf(this.totalEntries));
        closeAll();
        if (!this.params.getSendTMX()) {
            return null;
        }
        ArrayList arrayList = new ArrayList();
        PipelineParameters pipelineParameters = new PipelineParameters();
        pipelineParameters.setOutputURI(this.tmxRawDoc.getInputURI());
        pipelineParameters.setSourceLocale(this.tmxRawDoc.getSourceLocale());
        pipelineParameters.setTargetLocale(this.tmxRawDoc.getTargetLocale());
        pipelineParameters.setOutputEncoding(this.tmxRawDoc.getEncoding());
        pipelineParameters.setInputRawDocument(this.tmxRawDoc);
        pipelineParameters.setFilterConfigurationId(this.tmxRawDoc.getFilterConfigId());
        pipelineParameters.setBatchInputCount(1);
        arrayList.add(new Event(EventType.PIPELINE_PARAMETERS, pipelineParameters));
        arrayList.add(new Event(EventType.START_BATCH_ITEM));
        arrayList.add(new Event(EventType.RAW_DOCUMENT, this.tmxRawDoc));
        arrayList.add(new Event(EventType.END_BATCH_ITEM));
        return new Event(EventType.MULTI_EVENT, new MultiEvent(arrayList));
    }

    private void initialize() {
        if (this.params.getMakeTMX()) {
            URI uri = new File(LocaleId.replaceVariables(Util.fillInputRootDirectoryVariable(Util.fillRootDirectoryVariable(this.params.getTmxPath(), this.rootDir), this.inputRootDir), this.srcLoc, this.trgLoc)).toURI();
            this.tmxRawDoc = new RawDocument(uri, BOMNewlineEncodingDetector.UTF_8, this.srcLoc, this.trgLoc, "okf_tmx");
            this.tmxWriter = new TMXWriter(this.tmxRawDoc.createOutputFile(uri).getAbsolutePath());
            this.tmxWriter.writeStartDocument(this.srcLoc, this.trgLoc, getClass().getCanonicalName(), "1", "sentence", this.params.getMarkAsMT() ? "MT-based" : null, "unknown");
        }
        this.attributes = new Hashtable();
        if (this.params.getMarkAsMT()) {
            this.attributes.put(TMXWriter.CREATIONID, Util.MTFLAG);
        }
        if (!Util.isEmpty(this.params.getOrigin())) {
            this.attributes.put("Txt::Origin", this.params.getOrigin());
        }
        this.initDone = true;
        this.store = new SimpleStore();
        this.totalInternalMatches = 0;
        this.totalExternalMatches = 0;
        this.totalEntries = 0;
        if (this.params.getCheckExistingTm()) {
            this.existingTm = TmSeekerFactory.createFileBasedTmSeeker(LocaleId.replaceVariables(Util.fillInputRootDirectoryVariable(Util.fillRootDirectoryVariable(this.params.getExistingTm(), this.rootDir), this.inputRootDir), this.srcLoc, this.trgLoc));
        }
        this.segmenter = null;
        if (this.params.getSegment()) {
            SRXDocument sRXDocument = new SRXDocument();
            sRXDocument.loadRules(LocaleId.replaceVariables(Util.fillInputRootDirectoryVariable(Util.fillRootDirectoryVariable(this.params.getSrxPath(), this.rootDir), this.inputRootDir), this.srcLoc, this.trgLoc));
            this.segmenter = sRXDocument.compileLanguageRules(this.srcLoc, null);
        }
    }

    public void processDocument(RawDocument rawDocument) {
        this.rawDoc = rawDocument;
        this.srcLoc = this.rawDoc.getSourceLocale();
        this.trgLoc = this.rawDoc.getTargetLocale();
        if (!this.initDone) {
            initialize();
        }
        this.filter = this.fcMapper.createFilter(rawDocument.getFilterConfigId(), this.filter);
        if (this.filter == null) {
            throw new OkapiException(String.format("No filter available for the configuration '%s'.", rawDocument.getFilterConfigId()));
        }
        processInput();
    }

    private void processInput() {
        XMLWriter xMLWriter;
        XMLWriter xMLWriter2 = null;
        try {
            try {
                this.filter.open(this.rawDoc);
                this.htmlSourceFile = File.createTempFile("~okapi-47_hft_", ".html");
                if (this.tmWriter != null) {
                    this.tmWriter.close();
                    this.tmWriter = null;
                }
                if (this.params.getMakeTM()) {
                    String replaceVariables = LocaleId.replaceVariables(Util.fillInputRootDirectoryVariable(Util.fillRootDirectoryVariable(this.params.getTmDirectory(), this.rootDir), this.inputRootDir), this.srcLoc, this.trgLoc);
                    Util.createDirectories(replaceVariables + File.separator);
                    this.tmWriter = TmWriterFactory.createFileBasedTmWriter(replaceVariables, !new File(replaceVariables + File.separator + "segments.gen").exists());
                    if (this.currentTm != null) {
                        this.currentTm.close();
                    }
                    this.currentTm = TmSeekerFactory.createFileBasedTmSeeker(replaceVariables);
                }
                this.docInternalMatches = 0;
                this.docExternalMatches = 0;
                this.docEntries = 0;
                int i = 0;
                int blockSize = this.params.getBlockSize();
                this.subDocId = 0;
                this.currentSubDocId = 0;
                while (this.filter.hasNext()) {
                    Event next = this.filter.next();
                    switch (next.getEventType()) {
                        case START_SUBDOCUMENT:
                            int i2 = this.subDocId + 1;
                            this.subDocId = i2;
                            this.currentSubDocId = i2;
                            break;
                        case END_SUBDOCUMENT:
                            this.currentSubDocId = 0;
                            break;
                        case TEXT_UNIT:
                            ITextUnit textUnit = next.getTextUnit();
                            if (textUnit.isTranslatable()) {
                                TextContainer source = textUnit.getSource();
                                if (this.segmenter != null && this.segmenter.computeSegments(source) > 1) {
                                    source.getSegments().create(this.segmenter.getRanges());
                                }
                                if (xMLWriter2 == null) {
                                    xMLWriter2 = startTemporaryFiles();
                                }
                                boolean z = false;
                                for (Segment segment : source.getSegments()) {
                                    if (this.currentTm != null && this.currentTm.searchFuzzy(segment.text, 95, 1, null).size() > 0) {
                                        this.docInternalMatches++;
                                    } else if (this.existingTm == null || this.existingTm.searchFuzzy(segment.text, 95, 1, null).size() <= 0) {
                                        this.store.write(segment.text);
                                        xMLWriter2.writeStartElement(Namespace.PREFIX_P);
                                        xMLWriter2.writeAttributeString(Const.ATTR_ID, String.format("%d:%s:%s", Integer.valueOf(this.currentSubDocId), textUnit.getId(), segment.id));
                                        xMLWriter2.writeRawXML(this.qutil.toCodedHTML(segment.text));
                                        xMLWriter2.writeEndElementLineBreak();
                                        z = true;
                                        this.docEntries++;
                                    } else {
                                        this.docExternalMatches++;
                                    }
                                }
                                if (z) {
                                    i++;
                                    break;
                                }
                            } else {
                                break;
                            }
                            break;
                    }
                    if (i >= blockSize) {
                        finishTemporaryFiles(xMLWriter2);
                        runBatchTranslation();
                        retrieveTranslation();
                        i = 0;
                        xMLWriter2 = null;
                    }
                }
                if (i > 0) {
                    finishTemporaryFiles(xMLWriter2);
                    xMLWriter = null;
                    runBatchTranslation();
                    retrieveTranslation();
                } else {
                    finishTemporaryFiles(xMLWriter2);
                    xMLWriter = null;
                }
                if (xMLWriter != null) {
                    finishTemporaryFiles(xMLWriter);
                }
                if (this.filter != null) {
                    this.filter.close();
                }
                if (this.tmWriter != null) {
                    this.tmWriter.close();
                }
                if (this.currentTm != null) {
                    this.LOGGER.info("Existing matches from TM being built = {}", Integer.valueOf(this.docInternalMatches));
                    this.LOGGER.info("Existing matches from existing TM = {}", Integer.valueOf(this.docExternalMatches));
                }
                this.LOGGER.info("Entries sent to translation = {}", Integer.valueOf(this.docEntries));
                this.totalInternalMatches += this.docInternalMatches;
                this.totalExternalMatches += this.docExternalMatches;
                this.totalEntries += this.docEntries;
            } catch (Throwable th) {
                throw new OkapiException(String.format("Error when processing a file.\nSource='%s'\nTarget='%s'\n" + th.getMessage(), this.htmlSourceFile.toURI(), this.htmlTargetFile.toURI()), th);
            }
        } catch (Throwable th2) {
            if (0 != 0) {
                finishTemporaryFiles(null);
            }
            if (this.filter != null) {
                this.filter.close();
            }
            if (this.tmWriter != null) {
                this.tmWriter.close();
            }
            if (this.currentTm != null) {
                this.LOGGER.info("Existing matches from TM being built = {}", Integer.valueOf(this.docInternalMatches));
                this.LOGGER.info("Existing matches from existing TM = {}", Integer.valueOf(this.docExternalMatches));
            }
            this.LOGGER.info("Entries sent to translation = {}", Integer.valueOf(this.docEntries));
            this.totalInternalMatches += this.docInternalMatches;
            this.totalExternalMatches += this.docExternalMatches;
            this.totalEntries += this.docEntries;
            throw th2;
        }
    }

    private XMLWriter startTemporaryFiles() {
        XMLWriter xMLWriter = new XMLWriter(this.htmlSourceFile.getPath());
        xMLWriter.writeStartElement("html");
        xMLWriter.writeStartElement(Const.ELEM_CUSTPROP);
        xMLWriter.writeAttributeString("http-equiv", "Content-Type");
        xMLWriter.writeAttributeString("content", "text/html; charset=UTF-8");
        xMLWriter.writeEndElementLineBreak();
        String absolutePath = this.htmlSourceFile.getAbsolutePath();
        this.htmlTargetFile = new File(Util.getDirectoryName(absolutePath) + File.separator + Util.getFilename(absolutePath, false) + ".trg.html");
        if (this.htmlTargetFile.exists()) {
            this.htmlTargetFile.delete();
        }
        String absolutePath2 = this.htmlSourceFile.getAbsolutePath();
        this.originalStoreFile = new File(Util.getDirectoryName(absolutePath2) + File.separator + Util.getFilename(absolutePath2, false) + ".ori.bin");
        this.store.create(this.originalStoreFile);
        return xMLWriter;
    }

    private void finishTemporaryFiles(XMLWriter xMLWriter) {
        if (xMLWriter != null) {
            xMLWriter.writeEndElement();
            xMLWriter.writeEndDocument();
            xMLWriter.close();
        }
        if (this.store != null) {
            this.store.close();
        }
    }

    private void runBatchTranslation() {
        String command = this.params.getCommand();
        try {
            command = LocaleId.replaceVariables(command.replace("${inputURI}", this.htmlSourceFile.toString()).replace("${inputPath}", this.htmlSourceFile.getPath()).replace("${outputPath}", this.htmlTargetFile.getPath()).replace(Util.ROOT_DIRECTORY_VAR, this.rootDir).replace(Util.INPUT_ROOT_DIRECTORY_VAR, this.inputRootDir).replace("${srcLangName}", this.rawDoc.getSourceLocale().toJavaLocale().getDisplayLanguage(Locale.ENGLISH)).replace("${trgLangName}", this.rawDoc.getTargetLocale().toJavaLocale().getDisplayLanguage(Locale.ENGLISH)), this.srcLoc, this.trgLoc);
            this.LOGGER.info("Command line: {}", command);
            Process exec = Runtime.getRuntime().exec(command);
            StreamGobbler streamGobbler = new StreamGobbler(exec.getErrorStream(), "err");
            StreamGobbler streamGobbler2 = new StreamGobbler(exec.getInputStream(), "out");
            streamGobbler.start();
            streamGobbler2.start();
            exec.waitFor();
        } catch (IOException e) {
            throw new OkapiException("Error during the batch translation.\nCommand line was:\n" + command, e);
        } catch (InterruptedException e2) {
            throw new OkapiException("Program interrupted.", e2);
        }
    }

    private void retrieveTranslation() {
        Source source = null;
        try {
            try {
                this.store.openForRead(this.originalStoreFile);
                source = new Source(this.htmlTargetFile.toURI().toURL());
                source.fullSequentialParse();
                for (Element element : source.getAllElements(Namespace.PREFIX_P)) {
                    String attributeValue = element.getAttributeValue(Const.ATTR_ID);
                    if (attributeValue != null) {
                        String[] split = attributeValue.split(Code.EXTENDED_CODE_TYPE_VALUE_DELIMITER, -1);
                        int intValue = Integer.valueOf(split[0]).intValue();
                        String str = split[1];
                        String str2 = split[2];
                        TextFragment readNext = this.store.readNext();
                        if (readNext == null) {
                            break;
                        }
                        try {
                            TextFragment textFragment = new TextFragment(this.qutil.fromCodedHTML(element.getContent().toString(), readNext, true), readNext.getCodes());
                            if (this.tmWriter != null) {
                                this.tmWriter.indexTranslationUnit(new TranslationUnit(new TranslationUnitVariant(this.srcLoc, readNext), new TranslationUnitVariant(this.trgLoc, textFragment)));
                            }
                            if (this.tmxWriter != null) {
                                this.tmxWriter.writeTU(readNext, textFragment, null, this.attributes);
                            }
                        } catch (Throwable th) {
                            this.LOGGER.warn("Skipping entry '{}:{}:{}'.\n{}", new Object[]{Integer.valueOf(intValue), str, str2, th.getMessage()});
                        }
                    }
                }
                if (source != null) {
                    source.clearCache();
                }
                this.htmlTargetFile.delete();
                this.store.close();
            } catch (IOException e) {
                throw new OkapiException(String.format("Error reading the translations.\nSource='%s'\nTarget='%s'", this.htmlSourceFile.toURI(), this.htmlTargetFile.toURI()), e);
            }
        } catch (Throwable th2) {
            if (source != null) {
                source.clearCache();
            }
            this.htmlTargetFile.delete();
            this.store.close();
            throw th2;
        }
    }
}
