package net.sf.okapi.filters.versifiedtxt;

import java.io.BufferedReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.sf.okapi.common.BOMNewlineEncodingDetector;
import net.sf.okapi.common.Event;
import net.sf.okapi.common.IParameters;
import net.sf.okapi.common.UsingParameters;
import net.sf.okapi.common.Util;
import net.sf.okapi.common.encoder.EncoderManager;
import net.sf.okapi.common.exceptions.OkapiBadFilterInputException;
import net.sf.okapi.common.exceptions.OkapiIOException;
import net.sf.okapi.common.filters.AbstractFilter;
import net.sf.okapi.common.filters.EventBuilder;
import net.sf.okapi.common.filters.FilterConfiguration;
import net.sf.okapi.common.filterwriter.GenericFilterWriter;
import net.sf.okapi.common.filterwriter.IFilterWriter;
import net.sf.okapi.common.resource.AlignmentStatus;
import net.sf.okapi.common.resource.Code;
import net.sf.okapi.common.resource.ITextUnit;
import net.sf.okapi.common.resource.RawDocument;
import net.sf.okapi.common.resource.Segment;
import net.sf.okapi.common.resource.StartSubDocument;
import net.sf.okapi.common.resource.TextFragment;
import net.sf.okapi.common.skeleton.GenericSkeleton;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@UsingParameters
/* loaded from: input_file:net/sf/okapi/filters/versifiedtxt/VersifiedTextFilter.class */
public class VersifiedTextFilter extends AbstractFilter {
    private static final Map<String, String> REPLACABLES = new HashMap();
    private static final int BUFFER_SIZE = 2800;
    private static final String VERSIFIED_ID = "^([0-9]+)$";
    private static final Pattern VERSIFIED_ID_COMPILED;
    public static final String VERSIFIED_TXT_MIME_TYPE = "text/x-versified-txt";
    private static final String VERSE = "^[ \\t]*\\|v([^ ]+)[ \\t]*(\\(([^()]+)\\))?(\\+\\|)?[ \\t]*$";
    private static final Pattern VERSE_COMPILED;
    private static final String TRADOS_SEGMENTS = "\\{0>(.*?)<\\}[0-9]+\\{>(.*?)<0\\}";
    private static final Pattern TRADOS_SEGMENTS_COMPILED;
    private static final String TRADOS_LEAVINGS = "(\\{0>)|(<0\\})|(<\\}[0-9]+\\{>)|(<\\})|(\\{>)";
    private static final Pattern TRADOS_LEAVINGS_COMPILED;
    private static final String CHAPTER = "^[ \t]*\\|c.+[ \t]*$";
    private static final String BOOK = "^[ \t]*\\|b.+[ \t]*$";
    private static final String TARGET = "^[ \t]*<TARGET>[ \t]*$";
    private static final String PLACEHOLDER = "(\\{|</?)([0-9]+)(\\}|>)";
    private static final Pattern PLACEHOLDER_PATTERN;
    private int currentChar;
    private EventBuilder eventBuilder;
    private EncoderManager encoderManager;
    private boolean hasUtf8Bom;
    private boolean hasUtf8Encoding;
    private BufferedReader versifiedFileReader;
    private RawDocument currentRawDocument;
    private BOMNewlineEncodingDetector detector;
    private StartSubDocument startSubDocument;
    private Parameters params;
    private StringBuilder filterBuffer;
    private final Logger LOGGER = LoggerFactory.getLogger(getClass());
    private String newline = Util.LINEBREAK_UNIX;
    private String currentChapter = "";
    private String currentBook = "";
    private boolean foundVerse = false;
    private boolean foundBook = false;
    private boolean trados = false;

    public VersifiedTextFilter() {
        setMimeType("text/x-versified-txt");
        setMultilingual(false);
        setFilterWriter(new GenericFilterWriter(createSkeletonWriter(), getEncoderManager()));
        setName("okf_versifiedtxt");
        setDisplayName("Versified Text Filter");
        addConfiguration(new FilterConfiguration(getName(), "text/x-versified-txt", getClass().getName(), "Versified Text", "Versified Text Documents"));
        setParameters(new Parameters());
    }

    @Override // net.sf.okapi.common.filters.AbstractFilter, net.sf.okapi.common.filters.IFilter
    public IFilterWriter createFilterWriter() {
        return super.createFilterWriter();
    }

    @Override // net.sf.okapi.common.filters.IFilter
    public void open(RawDocument rawDocument) {
        this.foundVerse = false;
        this.foundBook = false;
        open(rawDocument, true);
    }

    @Override // net.sf.okapi.common.filters.AbstractFilter, net.sf.okapi.common.filters.IFilter
    public void open(RawDocument rawDocument, boolean z) {
        close();
        this.currentRawDocument = rawDocument;
        this.currentChapter = "";
        this.currentBook = "";
        this.currentChar = -2;
        this.filterBuffer = new StringBuilder(2799);
        if (rawDocument.getInputURI() != null) {
            setDocumentName(rawDocument.getInputURI().getPath());
        }
        this.detector = new BOMNewlineEncodingDetector(rawDocument.getStream(), rawDocument.getEncoding());
        this.detector.detectAndRemoveBom();
        setEncoding(rawDocument.getEncoding());
        this.hasUtf8Bom = this.detector.hasUtf8Bom();
        this.hasUtf8Encoding = this.detector.hasUtf8Encoding();
        this.newline = this.detector.getNewlineType().toString();
        setNewlineType(this.newline);
        String encoding = getEncoding();
        if (this.detector.isDefinitive()) {
            encoding = this.detector.getEncoding();
            this.LOGGER.debug("Overridding user set encoding (if any). Setting auto-detected encoding ({}).", encoding);
        } else if (!this.detector.isDefinitive() && getEncoding().equals("null")) {
            encoding = this.detector.getEncoding();
            this.LOGGER.debug("Default encoding and detected encoding not found. Using best guess encoding ({})", encoding);
        }
        rawDocument.setEncoding(encoding);
        setEncoding(encoding);
        setOptions(rawDocument.getSourceLocale(), rawDocument.getTargetLocale(), encoding, z);
        this.versifiedFileReader = new BufferedReader(rawDocument.getReader());
        String str = "";
        int i = 0;
        try {
            this.versifiedFileReader.mark(BUFFER_SIZE);
            while (true) {
                String readLine = this.versifiedFileReader.readLine();
                str = readLine;
                if (readLine == null) {
                    break;
                }
                i += str.length() + 2;
                if (i >= BUFFER_SIZE) {
                    break;
                }
                if (str.matches(TARGET)) {
                    setMultilingual(true);
                    this.trados = false;
                    break;
                } else if (str.matches(TRADOS_SEGMENTS)) {
                    setMultilingual(true);
                    this.trados = true;
                    break;
                }
            }
            this.versifiedFileReader.reset();
            if (this.eventBuilder == null) {
                this.eventBuilder = new EventBuilder();
            } else {
                this.eventBuilder.reset(null, this);
            }
        } catch (IOException e) {
            throw new OkapiIOException("IO error detecting if file is multilingual: " + (str == null ? "unkown line" : str), e);
        }
    }

    @Override // net.sf.okapi.common.filters.AbstractFilter, net.sf.okapi.common.filters.IFilter, java.lang.AutoCloseable
    public void close() {
        if (this.currentRawDocument != null) {
            this.currentRawDocument.close();
        }
        if (this.versifiedFileReader != null) {
            try {
                this.versifiedFileReader.close();
            } catch (IOException e) {
                this.LOGGER.warn("Error closing the versified text buffered reader.", e);
            }
        }
    }

    @Override // net.sf.okapi.common.filters.AbstractFilter, net.sf.okapi.common.filters.IFilter
    public EncoderManager getEncoderManager() {
        if (this.encoderManager == null) {
            this.encoderManager = new EncoderManager();
            this.encoderManager.setMapping("text/x-versified-txt", "net.sf.okapi.common.encoder.DefaultEncoder");
        }
        return this.encoderManager;
    }

    @Override // net.sf.okapi.common.filters.IFilter
    public IParameters getParameters() {
        return this.params;
    }

    @Override // net.sf.okapi.common.filters.IFilter
    public void setParameters(IParameters iParameters) {
        this.params = (Parameters) iParameters;
    }

    @Override // net.sf.okapi.common.filters.IFilter, java.util.Iterator
    public boolean hasNext() {
        return this.eventBuilder.hasNext();
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // net.sf.okapi.common.filters.IFilter, java.util.Iterator
    public Event next() {
        String str = null;
        if (this.eventBuilder.hasQueuedEvents()) {
            return this.eventBuilder.next();
        }
        while (this.currentChar != -1 && !isCanceled()) {
            try {
                this.currentChar = this.versifiedFileReader.read();
                this.filterBuffer.append((char) this.currentChar);
                if (this.currentChar == 13 || this.currentChar == 10 || this.currentChar == -1) {
                    this.filterBuffer.setLength(this.filterBuffer.length() - 1);
                    str = Util.trimEnd(this.filterBuffer.toString(), Util.LINEBREAK_DOS);
                    this.filterBuffer = new StringBuilder(2799);
                    if (this.currentChar == -1 && str.isEmpty()) {
                        break;
                    }
                    this.newline = handleNewline();
                    if (this.currentChar == -1) {
                        this.newline = "";
                    }
                    if (str.matches(VERSE)) {
                        handleDocumentPart(str + this.newline);
                        Matcher matcher = VERSE_COMPILED.matcher(str);
                        String str2 = "";
                        String str3 = null;
                        if (matcher.matches()) {
                            str2 = matcher.group(1);
                            str3 = matcher.group(3);
                        }
                        handleVerse(this.versifiedFileReader, str, str2, str3);
                        this.foundVerse = true;
                    } else if (str.matches(BOOK)) {
                        this.currentBook = str.substring(2);
                        setDocumentName(this.currentBook);
                        this.eventBuilder.addFilterEvent(createStartFilterEvent());
                        handleDocumentPart(str + this.newline);
                        this.foundBook = true;
                    } else if (str.matches(CHAPTER)) {
                        this.currentChapter = str.substring(2);
                        if (this.startSubDocument != null) {
                            this.eventBuilder.endSubDocument();
                        }
                        handleSubDocument(this.currentChapter);
                        handleDocumentPart(str + this.newline);
                    } else {
                        handleDocumentPart(str + this.newline);
                    }
                    if (this.eventBuilder.hasQueuedEvents()) {
                        break;
                    }
                }
            } catch (IOException e) {
                throw new OkapiIOException("IO error reading versified file at: " + (str == null ? "unkown line" : str), e);
            }
        }
        if (this.currentChar == -1) {
            if (this.startSubDocument != null) {
                this.eventBuilder.endSubDocument();
            }
            this.eventBuilder.flushRemainingTempEvents();
            if (!this.foundBook) {
                this.eventBuilder.addFilterEvent(createStartFilterEvent());
                this.LOGGER.warn("Missing book marker at start of document: |b");
            }
            this.eventBuilder.addFilterEvent(createEndFilterEvent());
            if (!this.foundVerse) {
                throw new OkapiBadFilterInputException("There are no verse codes in this document");
            }
        }
        return this.eventBuilder.next();
    }

    @Override // net.sf.okapi.common.filters.AbstractFilter
    protected boolean isUtf8Bom() {
        return this.hasUtf8Bom;
    }

    @Override // net.sf.okapi.common.filters.AbstractFilter
    protected boolean isUtf8Encoding() {
        return this.hasUtf8Encoding;
    }

    private String handleNewline() throws IOException {
        String str = Util.LINEBREAK_UNIX;
        switch (this.detector.getNewlineType()) {
            case CR:
                str = Util.LINEBREAK_MAC;
                break;
            case CRLF:
                str = Util.LINEBREAK_DOS;
                this.versifiedFileReader.read();
                break;
            case LF:
                str = Util.LINEBREAK_UNIX;
                break;
        }
        return str;
    }

    private void handleSubDocument(String str) {
        this.startSubDocument = this.eventBuilder.startSubDocument();
        this.startSubDocument.setName(str);
    }

    private void handleVerse(BufferedReader bufferedReader, String str, String str2, String str3) throws IOException {
        String str4 = null;
        StringBuilder sb = new StringBuilder(BUFFER_SIZE);
        StringBuilder sb2 = new StringBuilder(BUFFER_SIZE);
        boolean z = false;
        bufferedReader.mark(BUFFER_SIZE);
        while (this.currentChar != -1) {
            try {
                this.currentChar = this.versifiedFileReader.read();
                this.filterBuffer.append((char) this.currentChar);
                if (this.currentChar == 13 || this.currentChar == 10 || this.currentChar == -1) {
                    this.filterBuffer.setLength(this.filterBuffer.length() - 1);
                    str4 = Util.trimEnd(this.filterBuffer.toString(), Util.LINEBREAK_DOS);
                    this.filterBuffer = new StringBuilder(2799);
                    this.newline = handleNewline();
                    if (str4.matches(VERSE) || str4.matches(BOOK) || str4.matches(CHAPTER)) {
                        bufferedReader.reset();
                        break;
                    } else if (str4.matches(TARGET)) {
                        z = true;
                    } else {
                        if (z) {
                            sb2.append(str4 + Util.LINEBREAK_UNIX);
                        } else {
                            sb.append(str4 + Util.LINEBREAK_UNIX);
                        }
                        bufferedReader.mark(BUFFER_SIZE);
                    }
                }
            } catch (IOException e) {
                throw new OkapiIOException("IO error reading versified file at: " + (str4 == null ? "unkown line" : str4), e);
            }
        }
        String chopNewline = chopNewline(sb.toString());
        String chopNewline2 = chopNewline(sb2.toString());
        if (this.currentChar != -1) {
            if (z) {
                chopNewline = chopNewline(chopNewline);
                chopNewline2 = chopNewline(chopNewline2);
            } else {
                chopNewline = chopNewline(chopNewline(chopNewline));
            }
        } else if (z) {
            chopNewline2 = Util.trimEnd(chopNewline2, Util.LINEBREAK_UNIX);
        } else {
            chopNewline = Util.trimEnd(chopNewline, Util.LINEBREAK_UNIX);
        }
        if (z && this.currentChar == -1 && chopNewline(chopNewline2).isEmpty()) {
            chopNewline2 = "";
        }
        this.eventBuilder.startTextUnit();
        ITextUnit buildTextUnit = buildTextUnit(chopNewline, chopNewline2, z, this.trados);
        GenericSkeleton genericSkeleton = new GenericSkeleton();
        genericSkeleton.addContentPlaceholder(buildTextUnit);
        if (z) {
            genericSkeleton.add(this.newline + "<TARGET>" + this.newline);
            genericSkeleton.addContentPlaceholder(buildTextUnit, getTrgLoc());
        }
        if (this.currentChar != -1) {
            genericSkeleton.add(this.newline + this.newline);
        }
        buildTextUnit.setSkeleton(genericSkeleton);
        Matcher matcher = VERSIFIED_ID_COMPILED.matcher(str2);
        if (str3 != null) {
            buildTextUnit.setName(str3);
            buildTextUnit.setId(str2);
        } else if (matcher.matches()) {
            buildTextUnit.setName(this.currentBook + Code.EXTENDED_CODE_TYPE_VALUE_DELIMITER + this.currentChapter + Code.EXTENDED_CODE_TYPE_VALUE_DELIMITER + matcher.group(1));
            buildTextUnit.setId(this.currentChapter + ((this.currentChapter == null || !this.currentChapter.isEmpty()) ? Code.EXTENDED_CODE_TYPE_VALUE_DELIMITER : "") + matcher.group(1));
        } else {
            buildTextUnit.setName(str2);
            buildTextUnit.setId(str2);
        }
        buildTextUnit.setMimeType(getMimeType());
        this.eventBuilder.endTextUnit();
    }

    private String replacePlacebles(String str) {
        if (str == null || str.isEmpty()) {
            return str;
        }
        for (String str2 : REPLACABLES.keySet()) {
            str = str.replace(str2, REPLACABLES.get(str2));
        }
        return str;
    }

    private ITextUnit buildTextUnit(String str, String str2, boolean z, boolean z2) {
        ITextUnit textUnit = this.eventBuilder.peekTempEvent().getTextUnit();
        String replacePlacebles = replacePlacebles(str);
        String replacePlacebles2 = replacePlacebles(str2);
        if (z2) {
            textUnit = buildTextUnitForTrados(replacePlacebles);
        } else {
            buildTextUnitForNonTrados(replacePlacebles, true);
            if (z) {
                buildTextUnitForNonTrados(replacePlacebles2, false);
            }
        }
        return textUnit;
    }

    private ITextUnit buildTextUnitForTrados(String str) {
        ITextUnit textUnit = this.eventBuilder.peekTempEvent().getTextUnit();
        Matcher matcher = TRADOS_SEGMENTS_COMPILED.matcher(str);
        int i = 0;
        if (matcher.find()) {
            textUnit.createTarget(getTrgLoc(), true, 0);
            matcher.reset();
            while (matcher.find()) {
                i++;
                Segment segment = new Segment(Integer.toString(i), buildTextFragment(matcher.group(1)));
                Segment segment2 = new Segment(Integer.toString(i), buildTextFragment(matcher.group(2)));
                textUnit.getSource().append(segment);
                textUnit.getTarget(getTrgLoc()).append(segment2);
            }
            textUnit.getTarget(getTrgLoc()).getSegments().setAlignmentStatus(AlignmentStatus.ALIGNED);
        } else {
            if (TRADOS_LEAVINGS_COMPILED.matcher(str).find()) {
                throw new OkapiBadFilterInputException("Trados segment markers found in source or target text: " + str);
            }
            buildTextUnitForNonTrados(str, true);
            this.LOGGER.warn("In a Trados bilingual document but found no segment markers. Treating as monlingual text: {}", str);
        }
        return textUnit;
    }

    private TextFragment buildTextFragment(String str) {
        TextFragment textFragment = new TextFragment();
        Matcher matcher = PLACEHOLDER_PATTERN.matcher(str);
        if (matcher.find()) {
            matcher.reset();
            for (String str2 : PLACEHOLDER_PATTERN.split(str)) {
                textFragment.append(str2);
                if (matcher.find()) {
                    String substring = str.substring(matcher.start(), matcher.end());
                    Code code = new Code(TextFragment.TagType.PLACEHOLDER, substring, substring);
                    code.setId(Integer.parseInt(matcher.group(2)));
                    textFragment.append(code);
                }
            }
        } else {
            textFragment.append(str);
        }
        return textFragment;
    }

    private void buildTextUnitForNonTrados(String str, boolean z) {
        if (TRADOS_LEAVINGS_COMPILED.matcher(str).find()) {
            throw new OkapiBadFilterInputException("Trados segment markers found in source or target text: " + str);
        }
        if (z) {
            this.eventBuilder.setTargetLocale(null);
        } else {
            this.eventBuilder.setTargetLocale(getTrgLoc());
        }
        Matcher matcher = PLACEHOLDER_PATTERN.matcher(str);
        if (!matcher.find()) {
            this.eventBuilder.addToTextUnit(str);
            return;
        }
        matcher.reset();
        for (String str2 : PLACEHOLDER_PATTERN.split(str)) {
            this.eventBuilder.addToTextUnit(str2);
            if (matcher.find()) {
                String substring = str.substring(matcher.start(), matcher.end());
                Code code = new Code(TextFragment.TagType.PLACEHOLDER, substring, substring);
                code.setId(Integer.parseInt(matcher.group(2)));
                this.eventBuilder.addToTextUnit(code);
            }
        }
    }

    private void handleDocumentPart(String str) {
        this.eventBuilder.addDocumentPart(str);
    }

    private String chopNewline(String str) {
        return (str == null || str.isEmpty()) ? str : str.charAt(str.length() - 1) == '\n' ? str.substring(0, str.length() - 1) : str;
    }

    static {
        REPLACABLES.put("{tab}", "\t");
        REPLACABLES.put("{nb}", " ");
        REPLACABLES.put("{em}", "—");
        REPLACABLES.put("{en}", "–");
        REPLACABLES.put("{emsp}", "\u2003");
        REPLACABLES.put("{ensp}", "\u2002");
        VERSIFIED_ID_COMPILED = Pattern.compile(VERSIFIED_ID);
        VERSE_COMPILED = Pattern.compile(VERSE);
        TRADOS_SEGMENTS_COMPILED = Pattern.compile(TRADOS_SEGMENTS, 104);
        TRADOS_LEAVINGS_COMPILED = Pattern.compile(TRADOS_LEAVINGS, 104);
        PLACEHOLDER_PATTERN = Pattern.compile(PLACEHOLDER);
    }
}
