package net.sf.okapi.steps.whitespacecorrection;

import java.util.EnumSet;
import java.util.Iterator;
import java.util.Set;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.resource.ITextUnit;
import net.sf.okapi.common.resource.TextContainer;
import net.sf.okapi.common.resource.TextFragment;
import net.sf.okapi.common.resource.TextPart;

/* loaded from: input_file:net/sf/okapi/steps/whitespacecorrection/WhitespaceCorrector.class */
public class WhitespaceCorrector {
    public static final EnumSet<Whitespace> VERTICAL_WHITESPACE = EnumSet.of(Whitespace.LINE_FEED, Whitespace.LINE_TABULATION, Whitespace.FORM_FEED, Whitespace.CARRIAGE_RETURN, Whitespace.NEXT_LINE, Whitespace.LINE_SEPARATOR, Whitespace.PARAGRAPH_SEPARATOR);
    public static final EnumSet<Whitespace> NONBREAKING_SPACES = EnumSet.of(Whitespace.NO_BREAK_SPACE, Whitespace.ZERO_WIDTH_NON_BREAKING_SPACE, Whitespace.NAORROW_NO_BREAK_SPACE);
    public static final EnumSet<Whitespace> SPACE = EnumSet.of(Whitespace.SPACE);
    public static final EnumSet<Whitespace> ALL_WHITESPACE = EnumSet.allOf(Whitespace.class);
    public static final EnumSet<Whitespace> OTHER = EnumSet.complementOf(EnumSet.of(Whitespace.LINE_FEED, Whitespace.LINE_TABULATION, Whitespace.FORM_FEED, Whitespace.CARRIAGE_RETURN, Whitespace.NEXT_LINE, Whitespace.LINE_SEPARATOR, Whitespace.PARAGRAPH_SEPARATOR, Whitespace.NO_BREAK_SPACE, Whitespace.ZERO_WIDTH_NON_BREAKING_SPACE, Whitespace.NAORROW_NO_BREAK_SPACE, Whitespace.SPACE, Whitespace.CHARACTER_TABULATION));
    public static final EnumSet<Whitespace> HORIZONTAL_TABS = EnumSet.of(Whitespace.CHARACTER_TABULATION);
    protected static final char WHITESPACE = ' ';
    protected LocaleId sourceLocale;
    protected LocaleId targetLocale;
    protected Set<Punctuation> punctuation;
    protected Set<Whitespace> whitespace;

    /* loaded from: input_file:net/sf/okapi/steps/whitespacecorrection/WhitespaceCorrector$Punctuation.class */
    public enum Punctuation {
        FULL_STOP('.', 12290, 65294),
        COMMA(',', 12289, 65292),
        EXCLAMATION_MARK('!', 65281),
        QUESTION_MARK('?', 65311);

        private final char[] whitespaceNonAcceptingForm;
        private final char whitespaceAcceptingForm;

        Punctuation(char c, char... cArr) {
            this.whitespaceAcceptingForm = c;
            this.whitespaceNonAcceptingForm = (char[]) cArr.clone();
        }

        public char getWhitespaceAcceptingForm() {
            return this.whitespaceAcceptingForm;
        }

        public char[] getWhitespaceNonAcceptingForm() {
            return (char[]) this.whitespaceNonAcceptingForm.clone();
        }
    }

    /* loaded from: input_file:net/sf/okapi/steps/whitespacecorrection/WhitespaceCorrector$Whitespace.class */
    public enum Whitespace {
        LINE_FEED('\n'),
        LINE_TABULATION(11),
        FORM_FEED('\f'),
        CARRIAGE_RETURN('\r'),
        NEXT_LINE(133),
        LINE_SEPARATOR(8232),
        PARAGRAPH_SEPARATOR(8233),
        CHARACTER_TABULATION('\t'),
        SPACE(' '),
        NO_BREAK_SPACE(160),
        EN_QUAD(8192),
        EM_QUAD(8193),
        EN_SPACE(8194),
        EM_SPACE(8195),
        THREE_PER_EM_SPACE(8196),
        FOUR_PER_EM_SPACER(8197),
        SIX_PER_EM_SPACE(8198),
        FIGURE_SPACE(8199),
        PUNCUATION_SPACE(8200),
        THIS_SPACE(8201),
        HAIR_SPACE(8202),
        NAORROW_NO_BREAK_SPACE(8239),
        MEDIUM_MATHEMATICAL_SPACE(8287),
        IDEOGRAPHIC_SPACE(12288),
        ZERO_WIDTH_SPACE(8203),
        ZERO_WIDTH_NON_BREAKING_SPACE(65279);

        private final char whitespace;

        Whitespace(char c) {
            this.whitespace = c;
        }

        public char getWhitespace() {
            return this.whitespace;
        }
    }

    public WhitespaceCorrector(LocaleId localeId, LocaleId localeId2, Set<Punctuation> set, Set<Whitespace> set2) {
        this.sourceLocale = localeId;
        this.targetLocale = localeId2;
        this.punctuation = set;
        this.whitespace = set2;
    }

    static boolean isSpaceDelimitedLanguage(LocaleId localeId) {
        return (LocaleId.JAPANESE.sameLanguageAs(localeId) || LocaleId.CHINA_CHINESE.sameLanguageAs(localeId)) ? false : true;
    }

    public ITextUnit correctWhitespace(ITextUnit iTextUnit) {
        if (isSpaceDelimitedLanguage(this.sourceLocale) && !isSpaceDelimitedLanguage(this.targetLocale)) {
            removeTrailingWhitespace(iTextUnit);
        } else if (!isSpaceDelimitedLanguage(this.sourceLocale) && isSpaceDelimitedLanguage(this.targetLocale)) {
            addTrailingWhitespace(iTextUnit);
        }
        return iTextUnit;
    }

    protected void removeTrailingWhitespace(ITextUnit iTextUnit) {
        for (TextPart textPart : iTextUnit.getTarget(this.targetLocale).getParts()) {
            textPart.setContent(findAndRemoveWhitespacesAfterPunctuation(textPart.getContent()));
        }
    }

    protected void addTrailingWhitespace(ITextUnit iTextUnit) {
        TextContainer source = iTextUnit.getSource();
        TextContainer target = iTextUnit.getTarget(this.targetLocale);
        Iterator it = source.getParts().iterator();
        Iterator it2 = target.getParts().iterator();
        while (it.hasNext() && it2.hasNext()) {
            TextPart textPart = (TextPart) it.next();
            TextPart textPart2 = (TextPart) it2.next();
            String text = textPart.getContent().getText();
            if (!text.isEmpty() && isNonSpaceDelimitedPunctuation(lastChar(text)) && !isWhitespace(lastChar(textPart2.getContent().getText()))) {
                textPart2.getContent().append(' ');
            }
        }
    }

    protected boolean isWhitespace(char c) {
        Iterator<Whitespace> it = this.whitespace.iterator();
        while (it.hasNext()) {
            if (c == it.next().whitespace) {
                return true;
            }
        }
        return false;
    }

    private char lastChar(String str) {
        return str.charAt(str.length() - 1);
    }

    protected boolean isSpaceDelimitedPunctuation(char c) {
        Iterator<Punctuation> it = this.punctuation.iterator();
        while (it.hasNext()) {
            if (c == it.next().whitespaceAcceptingForm) {
                return true;
            }
        }
        return false;
    }

    protected boolean isNonSpaceDelimitedPunctuation(char c) {
        Iterator<Punctuation> it = this.punctuation.iterator();
        while (it.hasNext()) {
            for (char c2 : it.next().whitespaceNonAcceptingForm) {
                if (c2 == c) {
                    return true;
                }
            }
        }
        return false;
    }

    private TextFragment findAndRemoveWhitespacesAfterPunctuation(TextFragment textFragment) {
        TextFragment textFragment2 = new TextFragment();
        char[] charArray = textFragment.getCodedText().toCharArray();
        int i = 0;
        while (i < charArray.length) {
            if (TextFragment.isMarker(charArray[i])) {
                i++;
                textFragment2.append(textFragment.getCode(TextFragment.toIndex(charArray[i])));
            } else {
                textFragment2.append(charArray[i]);
                if (isNonSpaceDelimitedPunctuation(charArray[i]) && i + 1 < charArray.length && isWhitespace(charArray[i + 1])) {
                    i = getLastWhitespacePosition(charArray, i + 1);
                }
            }
            i++;
        }
        return textFragment2;
    }

    private int getLastWhitespacePosition(char[] cArr, int i) {
        do {
            i++;
            if (i >= cArr.length) {
                break;
            }
        } while (isWhitespace(cArr[i]));
        return i - 1;
    }
}
