package net.sf.okapi.steps.segmentation;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import net.sf.okapi.common.Event;
import net.sf.okapi.common.IParameters;
import net.sf.okapi.common.ISegmenter;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.RenumberingUtil;
import net.sf.okapi.common.UsingParameters;
import net.sf.okapi.common.Util;
import net.sf.okapi.common.annotation.DeepenSegmentationAnnotaton;
import net.sf.okapi.common.pipeline.BasePipelineStep;
import net.sf.okapi.common.pipeline.annotations.StepParameterMapping;
import net.sf.okapi.common.pipeline.annotations.StepParameterType;
import net.sf.okapi.common.resource.Custom;
import net.sf.okapi.common.resource.ISegments;
import net.sf.okapi.common.resource.ITextUnit;
import net.sf.okapi.common.resource.Segment;
import net.sf.okapi.common.resource.TextContainer;
import net.sf.okapi.common.resource.TextPart;
import net.sf.okapi.lib.segmentation.SRXDocument;
import net.sf.okapi.lib.xliff2.Const;
import net.sf.okapi.steps.segmentation.Parameters;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@UsingParameters(Parameters.class)
/* loaded from: input_file:net/sf/okapi/steps/segmentation/SegmentationStep.class */
public class SegmentationStep extends BasePipelineStep {
    private final Logger logger = LoggerFactory.getLogger(getClass());
    private Parameters params = new Parameters();
    private ISegmenter srcSeg = null;
    private final Map<LocaleId, ISegmenter> trgSegs = new HashMap();
    private LocaleId sourceLocale;
    private List<LocaleId> targetLocales;
    private boolean initDone;
    private String rootDir;
    private String inputRootDir;

    @Override // net.sf.okapi.common.pipeline.BasePipelineStep, net.sf.okapi.common.pipeline.IPipelineStep
    @StepParameterMapping(parameterType = StepParameterType.SOURCE_LOCALE)
    public void setSourceLocale(LocaleId localeId) {
        this.sourceLocale = localeId;
    }

    @StepParameterMapping(parameterType = StepParameterType.TARGET_LOCALES)
    public void setTargetLocales(List<LocaleId> list) {
        this.targetLocales = list;
    }

    @StepParameterMapping(parameterType = StepParameterType.ROOT_DIRECTORY)
    public void setRootDirectory(String str) {
        this.rootDir = str;
    }

    @StepParameterMapping(parameterType = StepParameterType.INPUT_ROOT_DIRECTORY)
    public void setInputRootDirectory(String str) {
        this.inputRootDir = str;
    }

    @Override // net.sf.okapi.common.pipeline.BasePipelineStep, net.sf.okapi.common.pipeline.IPipelineStep
    public LocaleId getSourceLocale() {
        return this.sourceLocale;
    }

    public List<LocaleId> getTargetLocales() {
        return this.targetLocales;
    }

    public String getRootDirectory() {
        return this.rootDir;
    }

    public String getInputRootDirectory() {
        return this.inputRootDir;
    }

    @Override // net.sf.okapi.common.pipeline.IPipelineStep
    public String getName() {
        return "Segmentation";
    }

    @Override // net.sf.okapi.common.pipeline.IPipelineStep
    public String getDescription() {
        return "Apply SRX segmentation to the text units content of a document. Expects: filter events. Sends back: filter events.";
    }

    @Override // net.sf.okapi.common.pipeline.BasePipelineStep, net.sf.okapi.common.pipeline.IPipelineStep
    public Parameters getParameters() {
        return this.params;
    }

    @Override // net.sf.okapi.common.pipeline.BasePipelineStep, net.sf.okapi.common.pipeline.IPipelineStep
    public void setParameters(IParameters iParameters) {
        this.params = (Parameters) iParameters;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // net.sf.okapi.common.pipeline.BasePipelineStep
    public Event handleStartBatch(Event event) {
        this.initDone = false;
        return event;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // net.sf.okapi.common.pipeline.BasePipelineStep
    public Event handleStartBatchItem(Event event) {
        if (this.initDone) {
            return event;
        }
        SRXDocument sRXDocument = new SRXDocument();
        String str = null;
        if (this.params.getSegmentSource()) {
            if (this.params.getSourceSrxStream() == null) {
                str = Util.fillInputRootDirectoryVariable(Util.fillRootDirectoryVariable(this.params.getSourceSrxPath(), this.rootDir), this.inputRootDir);
                sRXDocument.loadRules(str);
            } else {
                sRXDocument.loadRules(this.params.getSourceSrxStream());
            }
            if (sRXDocument.hasWarning()) {
                this.logger.warn(sRXDocument.getWarning());
            }
            if (this.params.getTrimSrcLeadingWS() != Parameters.TRIM_DEFAULT) {
                sRXDocument.setTrimLeadingWhitespaces(this.params.getTrimSrcLeadingWS() == Parameters.TRIM_YES);
            }
            if (this.params.getTrimSrcTrailingWS() != Parameters.TRIM_DEFAULT) {
                sRXDocument.setTrimTrailingWhitespaces(this.params.getTrimSrcTrailingWS() == Parameters.TRIM_YES);
            }
            sRXDocument.setTreatIsolatedCodesAsWhitespace(this.params.isTreatIsolatedCodesAsWhitespace());
            this.srcSeg = sRXDocument.compileLanguageRules(this.sourceLocale, null);
        }
        if (this.params.getSegmentTarget()) {
            if (this.params.getTargetSrxStream() == null) {
                String fillInputRootDirectoryVariable = Util.fillInputRootDirectoryVariable(Util.fillRootDirectoryVariable(this.params.getTargetSrxPath(), this.rootDir), this.inputRootDir);
                if (Util.isEmpty(str) || !str.equals(fillInputRootDirectoryVariable)) {
                    sRXDocument.loadRules(fillInputRootDirectoryVariable);
                }
            } else {
                sRXDocument.loadRules(this.params.getTargetSrxStream());
            }
            if (sRXDocument.hasWarning()) {
                this.logger.warn(sRXDocument.getWarning());
            }
            if (this.params.getTrimTrgLeadingWS() != Parameters.TRIM_DEFAULT) {
                sRXDocument.setTrimLeadingWhitespaces(this.params.getTrimTrgLeadingWS() == Parameters.TRIM_YES);
            }
            if (this.params.getTrimTrgTrailingWS() != Parameters.TRIM_DEFAULT) {
                sRXDocument.setTrimTrailingWhitespaces(this.params.getTrimTrgTrailingWS() == Parameters.TRIM_YES);
            }
            sRXDocument.setTreatIsolatedCodesAsWhitespace(this.params.isTreatIsolatedCodesAsWhitespace());
            for (LocaleId localeId : this.targetLocales) {
                this.trgSegs.put(localeId, sRXDocument.compileLanguageRules(localeId, null));
            }
        }
        return event;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // net.sf.okapi.common.pipeline.BasePipelineStep
    public Event handleStartDocument(Event event) {
        IParameters filterParameters;
        if ((this.params.getSegmentSource() || this.params.getSegmentTarget()) && this.params.getForcesegmentedOutput() && (filterParameters = event.getStartDocument().getFilterParameters()) != null) {
            filterParameters.setInteger("outputSegmentationType", 3);
        }
        return event;
    }

    @Override // net.sf.okapi.common.pipeline.BasePipelineStep
    protected Event handleCustom(Event event) {
        if (((Custom) event.getResource()).getAnnotation(DeepenSegmentationAnnotaton.class) != null) {
            this.params.setSegmentationStrategy(Parameters.SegmStrategy.DEEPEN_EXISTING);
        }
        return event;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // net.sf.okapi.common.pipeline.BasePipelineStep
    public Event handleTextUnit(Event event) {
        ITextUnit textUnit = event.getTextUnit();
        if ((textUnit.getProperty("canResegment") == null || !textUnit.getProperty("canResegment").getValue().equals(Const.VALUE_NO)) && textUnit.isTranslatable()) {
            if (!this.params.getSegmentSource() && !this.params.getSegmentTarget()) {
                return event;
            }
            if (this.params.getSegmentSource()) {
                if (this.params.getSegmentationStrategy() == Parameters.SegmStrategy.OVERWRITE_EXISTING || !textUnit.getSource().hasBeenSegmented()) {
                    textUnit.createSourceSegmentation(this.srcSeg);
                } else if (this.params.getSegmentationStrategy() == Parameters.SegmStrategy.DEEPEN_EXISTING) {
                    deepenSegmentation(textUnit.getSource(), this.srcSeg);
                }
                if (this.params.getRenumberCodes() && this.targetLocales == null) {
                    RenumberingUtil.renumberCodesForSegmentation(textUnit.getSource());
                }
            }
            if (this.targetLocales != null) {
                for (LocaleId localeId : this.targetLocales) {
                    TextContainer target = textUnit.getTarget(localeId);
                    ISegmenter iSegmenter = this.trgSegs.get(localeId);
                    if (this.params.getSegmentTarget() && target != null) {
                        if (this.params.getSegmentationStrategy() == Parameters.SegmStrategy.OVERWRITE_EXISTING || !target.hasBeenSegmented()) {
                            iSegmenter.computeSegments(target);
                            target.getSegments().create(iSegmenter.getRanges());
                        } else if (this.params.getSegmentationStrategy() == Parameters.SegmStrategy.DEEPEN_EXISTING) {
                            deepenSegmentation(target, iSegmenter);
                        }
                    }
                    if (this.params.getCopySource()) {
                        target = textUnit.createTarget(localeId, false, 7);
                    }
                    if (this.params.getRenumberCodes()) {
                        RenumberingUtil.renumberTextUnitCodes(textUnit, localeId);
                    }
                    if (this.params.getCheckSegments() && target != null) {
                        if (target.getSegments().count() != textUnit.getSource().getSegments().count()) {
                            this.logger.warn("Text unit id='{}': Source ({}) and target ({}) do not have the same number of segments.", new Object[]{textUnit.getId(), this.sourceLocale, localeId});
                        } else {
                            ISegments segments = target.getSegments();
                            for (Segment segment : textUnit.getSource().getSegments()) {
                                if (segments.get(segment.id) == null) {
                                    this.logger.warn("Text unit id='{}': No match found for source segment id='{}' in target language '{}'", new Object[]{textUnit.getId(), segment.id, localeId});
                                }
                            }
                        }
                    }
                }
            }
            return event;
        }
        return event;
    }

    private void deepenSegmentation(TextContainer textContainer, ISegmenter iSegmenter) {
        if (textContainer == null || iSegmenter == null) {
            this.logger.error("Parameter cannot be null");
            return;
        }
        for (int count = textContainer.count() - 1; count >= 0; count--) {
            TextPart textPart = textContainer.get(count);
            if (textPart.isSegment()) {
                TextContainer textContainer2 = new TextContainer(textPart);
                iSegmenter.computeSegments(textContainer2);
                textContainer2.getSegments().create(iSegmenter.getRanges());
                replacePart(textContainer, count, textContainer2);
            }
        }
    }

    private void replacePart(TextContainer textContainer, int i, TextContainer textContainer2) {
        for (int count = textContainer2.count() - 1; count >= 0; count--) {
            textContainer.insert(i, textContainer2.get(count));
        }
        textContainer.remove(i + textContainer2.count());
    }
}
