package be.rlab.nlp;

import be.rlab.nlp.model.Language;
import be.rlab.nlp.model.Token;
import java.io.StringReader;
import java.text.Normalizer;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import kotlin.Metadata;
import kotlin.collections.CollectionsKt;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.text.Regex;
import kotlin.text.StringsKt;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;

/* compiled from: Normalizer.kt */
@Metadata(mv = {1, 7, 1}, k = 1, xi = 48, d1 = {"��@\n\u0002\u0018\u0002\n\u0002\u0010��\n��\n\u0002\u0010\u000e\n��\n\u0002\u0018\u0002\n��\n\u0002\u0010\u000b\n��\n\u0002\u0018\u0002\n\u0002\b\u0007\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n\u0002\b\u000f\n\u0002\u0010\b\n\u0002\b\t\b\u0086\b\u0018�� -2\u00020\u0001:\u0001-B[\u0012\u0006\u0010\u0002\u001a\u00020\u0003\u0012\u0006\u0010\u0004\u001a\u00020\u0005\u0012\b\b\u0002\u0010\u0006\u001a\u00020\u0007\u0012\b\b\u0002\u0010\b\u001a\u00020\t\u0012\b\b\u0002\u0010\n\u001a\u00020\u0007\u0012\b\b\u0002\u0010\u000b\u001a\u00020\u0007\u0012\b\b\u0002\u0010\f\u001a\u00020\u0007\u0012\b\b\u0002\u0010\r\u001a\u00020\u0007\u0012\b\b\u0002\u0010\u000e\u001a\u00020\u0003¢\u0006\u0002\u0010\u000fJ\u0006\u0010\u0016\u001a\u00020��J\u0006\u0010\u0017\u001a\u00020��J\u0006\u0010\u0006\u001a\u00020��J\t\u0010\u0018\u001a\u00020\u0003HÂ\u0003J\t\u0010\u0019\u001a\u00020\u0005HÂ\u0003J\t\u0010\u001a\u001a\u00020\u0007HÂ\u0003J\t\u0010\u001b\u001a\u00020\tHÂ\u0003J\t\u0010\u001c\u001a\u00020\u0007HÂ\u0003J\t\u0010\u001d\u001a\u00020\u0007HÂ\u0003J\t\u0010\u001e\u001a\u00020\u0007HÂ\u0003J\t\u0010\u001f\u001a\u00020\u0007HÂ\u0003J\t\u0010 \u001a\u00020\u0003HÂ\u0003Jc\u0010!\u001a\u00020��2\b\b\u0002\u0010\u0002\u001a\u00020\u00032\b\b\u0002\u0010\u0004\u001a\u00020\u00052\b\b\u0002\u0010\u0006\u001a\u00020\u00072\b\b\u0002\u0010\b\u001a\u00020\t2\b\b\u0002\u0010\n\u001a\u00020\u00072\b\b\u0002\u0010\u000b\u001a\u00020\u00072\b\b\u0002\u0010\f\u001a\u00020\u00072\b\b\u0002\u0010\r\u001a\u00020\u00072\b\b\u0002\u0010\u000e\u001a\u00020\u0003HÆ\u0001J\u0013\u0010\"\u001a\u00020\u00072\b\u0010#\u001a\u0004\u0018\u00010\u0001HÖ\u0003J\u000e\u0010\b\u001a\u00020��2\u0006\u0010\b\u001a\u00020\tJ\t\u0010$\u001a\u00020%HÖ\u0001J\u000e\u0010\u000e\u001a\u00020��2\u0006\u0010&\u001a\u00020\u0003J\u0006\u0010'\u001a\u00020��J\u0006\u0010(\u001a\u00020��J\u0006\u0010)\u001a\u00020��J\u0006\u0010*\u001a\u00020\u0003J\u0006\u0010\n\u001a\u00020��J\u0006\u0010\u000b\u001a\u00020��J\u0006\u0010\f\u001a\u00020��J\u0006\u0010+\u001a\u00020��J\t\u0010,\u001a\u00020\u0003HÖ\u0001R\u000e\u0010\u0006\u001a\u00020\u0007X\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\b\u001a\u00020\tX\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\u000e\u001a\u00020\u0003X\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\u0004\u001a\u00020\u0005X\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\n\u001a\u00020\u0007X\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\u000b\u001a\u00020\u0007X\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\f\u001a\u00020\u0007X\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\u0010\u001a\u00020\u0011X\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\r\u001a\u00020\u0007X\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\u0012\u001a\u00020\u0013X\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\u0002\u001a\u00020\u0003X\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\u0014\u001a\u00020\u0015X\u0082\u0004¢\u0006\u0002\n��¨\u0006."}, d2 = {"Lbe/rlab/nlp/Normalizer;", "", "text", "", "language", "Lbe/rlab/nlp/model/Language;", "caseSensitive", "", "form", "Ljava/text/Normalizer$Form;", "removeDiacritics", "removePunctuation", "removeStopWords", "stemming", "joinWith", "(Ljava/lang/String;Lbe/rlab/nlp/model/Language;ZLjava/text/Normalizer$Form;ZZZZLjava/lang/String;)V", "stemmer", "Lbe/rlab/nlp/MultiLanguageStemmer;", "stopWordTokenizer", "Lbe/rlab/nlp/StopWordTokenizer;", "wordTokenizer", "Lbe/rlab/nlp/WordTokenizer;", "applyStemming", "caseInsensitive", "component1", "component2", "component3", "component4", "component5", "component6", "component7", "component8", "component9", "copy", "equals", "other", "hashCode", "", "joinText", "keepDiacritics", "keepPunctuation", "keepStopWords", "normalize", "skipStemming", "toString", "Companion", "kotlin-search"})
/* loaded from: input_file:be/rlab/nlp/Normalizer.class */
public final class Normalizer {

    @NotNull
    private final String text;

    @NotNull
    private final Language language;
    private final boolean caseSensitive;

    @NotNull
    private final Normalizer.Form form;
    private final boolean removeDiacritics;
    private final boolean removePunctuation;
    private final boolean removeStopWords;
    private final boolean stemming;

    @NotNull
    private final String joinWith;

    @NotNull
    private final WordTokenizer wordTokenizer;

    @NotNull
    private final StopWordTokenizer stopWordTokenizer;

    @NotNull
    private final MultiLanguageStemmer stemmer;

    @NotNull
    public static final Companion Companion = new Companion(null);

    @NotNull
    private static final Regex REGEX_UNACCENT = new Regex("\\p{InCombiningDiacriticalMarks}+");

    /* compiled from: Normalizer.kt */
    @Metadata(mv = {1, 7, 1}, k = 1, xi = 48, d1 = {"��$\n\u0002\u0018\u0002\n\u0002\u0010��\n\u0002\b\u0002\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n��\n\u0002\u0010\u000e\n��\n\u0002\u0018\u0002\n��\b\u0086\u0003\u0018��2\u00020\u0001B\u0007\b\u0002¢\u0006\u0002\u0010\u0002J\u0016\u0010\u0005\u001a\u00020\u00062\u0006\u0010\u0007\u001a\u00020\b2\u0006\u0010\t\u001a\u00020\nR\u000e\u0010\u0003\u001a\u00020\u0004X\u0082\u0004¢\u0006\u0002\n��¨\u0006\u000b"}, d2 = {"Lbe/rlab/nlp/Normalizer$Companion;", "", "()V", "REGEX_UNACCENT", "Lkotlin/text/Regex;", "new", "Lbe/rlab/nlp/Normalizer;", "text", "", "language", "Lbe/rlab/nlp/model/Language;", "kotlin-search"})
    /* loaded from: input_file:be/rlab/nlp/Normalizer$Companion.class */
    public static final class Companion {
        private Companion() {
        }

        @NotNull
        /* renamed from: new, reason: not valid java name */
        public final Normalizer m5new(@NotNull String str, @NotNull Language language) {
            Intrinsics.checkNotNullParameter(str, "text");
            Intrinsics.checkNotNullParameter(language, "language");
            return new Normalizer(str, language, false, null, false, false, false, false, null, 508, null);
        }

        public /* synthetic */ Companion(DefaultConstructorMarker defaultConstructorMarker) {
            this();
        }
    }

    public Normalizer(@NotNull String str, @NotNull Language language, boolean z, @NotNull Normalizer.Form form, boolean z2, boolean z3, boolean z4, boolean z5, @NotNull String str2) {
        Intrinsics.checkNotNullParameter(str, "text");
        Intrinsics.checkNotNullParameter(language, "language");
        Intrinsics.checkNotNullParameter(form, "form");
        Intrinsics.checkNotNullParameter(str2, "joinWith");
        this.text = str;
        this.language = language;
        this.caseSensitive = z;
        this.form = form;
        this.removeDiacritics = z2;
        this.removePunctuation = z3;
        this.removeStopWords = z4;
        this.stemming = z5;
        this.joinWith = str2;
        this.wordTokenizer = new WordTokenizer(this.removePunctuation);
        this.stopWordTokenizer = StopWordTokenizer.Companion.m9new(this.language);
        this.stemmer = MultiLanguageStemmer.Companion.m2new(this.language);
    }

    public /* synthetic */ Normalizer(String str, Language language, boolean z, Normalizer.Form form, boolean z2, boolean z3, boolean z4, boolean z5, String str2, int i, DefaultConstructorMarker defaultConstructorMarker) {
        this(str, language, (i & 4) != 0 ? false : z, (i & 8) != 0 ? Normalizer.Form.NFD : form, (i & 16) != 0 ? true : z2, (i & 32) != 0 ? true : z3, (i & 64) != 0 ? false : z4, (i & 128) != 0 ? true : z5, (i & 256) != 0 ? " " : str2);
    }

    @NotNull
    public final Normalizer caseSensitive() {
        return copy$default(this, null, null, true, null, false, false, false, false, null, 507, null);
    }

    @NotNull
    public final Normalizer caseInsensitive() {
        return copy$default(this, null, null, false, null, false, false, false, false, null, 507, null);
    }

    @NotNull
    public final Normalizer form(@NotNull Normalizer.Form form) {
        Intrinsics.checkNotNullParameter(form, "form");
        return copy$default(this, null, null, false, form, false, false, false, false, null, 503, null);
    }

    @NotNull
    public final Normalizer removeDiacritics() {
        return copy$default(this, null, null, false, null, true, false, false, false, null, 495, null);
    }

    @NotNull
    public final Normalizer keepDiacritics() {
        return copy$default(this, null, null, false, null, false, false, false, false, null, 495, null);
    }

    @NotNull
    public final Normalizer removeStopWords() {
        return copy$default(this, null, null, false, null, false, false, true, false, null, 447, null);
    }

    @NotNull
    public final Normalizer keepStopWords() {
        return copy$default(this, null, null, false, null, false, false, false, false, null, 447, null);
    }

    @NotNull
    public final Normalizer removePunctuation() {
        return copy$default(this, null, null, false, null, false, true, false, false, null, 479, null);
    }

    @NotNull
    public final Normalizer keepPunctuation() {
        return copy$default(this, null, null, false, null, false, false, false, false, null, 479, null);
    }

    @NotNull
    public final Normalizer applyStemming() {
        return copy$default(this, null, null, false, null, false, false, false, true, null, 383, null);
    }

    @NotNull
    public final Normalizer skipStemming() {
        return copy$default(this, null, null, false, null, false, false, false, false, null, 383, null);
    }

    @NotNull
    public final Normalizer joinWith(@NotNull String str) {
        Intrinsics.checkNotNullParameter(str, "joinText");
        return copy$default(this, null, null, false, null, false, false, false, false, str, 255, null);
    }

    @NotNull
    public final String normalize() {
        String str;
        String normalize = java.text.Normalizer.normalize(this.text, this.form);
        if (this.removeDiacritics) {
            Intrinsics.checkNotNullExpressionValue(normalize, "normalize$lambda$0");
            str = REGEX_UNACCENT.replace(normalize, "");
        } else {
            str = normalize;
        }
        String str2 = str;
        if (this.removeDiacritics) {
            Intrinsics.checkNotNullExpressionValue(str2, "normalizedText");
            str2 = REGEX_UNACCENT.replace(str2, "");
        }
        if (!this.caseSensitive) {
            String str3 = str2;
            Intrinsics.checkNotNullExpressionValue(str3, "normalizedText");
            String lowerCase = str3.toLowerCase(Locale.ROOT);
            Intrinsics.checkNotNullExpressionValue(lowerCase, "this as java.lang.String).toLowerCase(Locale.ROOT)");
            str2 = lowerCase;
        }
        WordTokenizer wordTokenizer = this.wordTokenizer;
        String str4 = str2;
        Intrinsics.checkNotNullExpressionValue(str4, "normalizedText");
        List<Token> list = wordTokenizer.tokenize(new StringReader(str4));
        ArrayList arrayList = new ArrayList(CollectionsKt.collectionSizeOrDefault(list, 10));
        Iterator<T> it = list.iterator();
        while (it.hasNext()) {
            arrayList.add(((Token) it.next()).toString());
        }
        String joinToString$default = CollectionsKt.joinToString$default(arrayList, this.joinWith, (CharSequence) null, (CharSequence) null, 0, (CharSequence) null, (Function1) null, 62, (Object) null);
        if (this.removeStopWords) {
            StopWordTokenizer stopWordTokenizer = this.stopWordTokenizer;
            Intrinsics.checkNotNullExpressionValue(joinToString$default, "normalizedText");
            joinToString$default = CollectionsKt.joinToString$default(stopWordTokenizer.tokenize(new StringReader(joinToString$default)), this.joinWith, (CharSequence) null, (CharSequence) null, 0, (CharSequence) null, new Function1<Token, CharSequence>() { // from class: be.rlab.nlp.Normalizer$normalize$2
                @NotNull
                public final CharSequence invoke(@NotNull Token token) {
                    Intrinsics.checkNotNullParameter(token, "word");
                    return token.toString();
                }
            }, 30, (Object) null);
        }
        if (this.stemming) {
            String str5 = joinToString$default;
            Intrinsics.checkNotNullExpressionValue(str5, "normalizedText");
            joinToString$default = CollectionsKt.joinToString$default(StringsKt.split$default(str5, new String[]{this.joinWith}, false, 0, 6, (Object) null), this.joinWith, (CharSequence) null, (CharSequence) null, 0, (CharSequence) null, new Function1<String, CharSequence>() { // from class: be.rlab.nlp.Normalizer$normalize$3
                /* JADX INFO: Access modifiers changed from: package-private */
                {
                    super(1);
                }

                @NotNull
                public final CharSequence invoke(@NotNull String str6) {
                    MultiLanguageStemmer multiLanguageStemmer;
                    Intrinsics.checkNotNullParameter(str6, "word");
                    multiLanguageStemmer = Normalizer.this.stemmer;
                    return multiLanguageStemmer.stem(str6);
                }
            }, 30, (Object) null);
        }
        String str6 = joinToString$default;
        Intrinsics.checkNotNullExpressionValue(str6, "normalizedText");
        return str6;
    }

    private final String component1() {
        return this.text;
    }

    private final Language component2() {
        return this.language;
    }

    private final boolean component3() {
        return this.caseSensitive;
    }

    private final Normalizer.Form component4() {
        return this.form;
    }

    private final boolean component5() {
        return this.removeDiacritics;
    }

    private final boolean component6() {
        return this.removePunctuation;
    }

    private final boolean component7() {
        return this.removeStopWords;
    }

    private final boolean component8() {
        return this.stemming;
    }

    private final String component9() {
        return this.joinWith;
    }

    @NotNull
    public final Normalizer copy(@NotNull String str, @NotNull Language language, boolean z, @NotNull Normalizer.Form form, boolean z2, boolean z3, boolean z4, boolean z5, @NotNull String str2) {
        Intrinsics.checkNotNullParameter(str, "text");
        Intrinsics.checkNotNullParameter(language, "language");
        Intrinsics.checkNotNullParameter(form, "form");
        Intrinsics.checkNotNullParameter(str2, "joinWith");
        return new Normalizer(str, language, z, form, z2, z3, z4, z5, str2);
    }

    public static /* synthetic */ Normalizer copy$default(Normalizer normalizer, String str, Language language, boolean z, Normalizer.Form form, boolean z2, boolean z3, boolean z4, boolean z5, String str2, int i, Object obj) {
        if ((i & 1) != 0) {
            str = normalizer.text;
        }
        if ((i & 2) != 0) {
            language = normalizer.language;
        }
        if ((i & 4) != 0) {
            z = normalizer.caseSensitive;
        }
        if ((i & 8) != 0) {
            form = normalizer.form;
        }
        if ((i & 16) != 0) {
            z2 = normalizer.removeDiacritics;
        }
        if ((i & 32) != 0) {
            z3 = normalizer.removePunctuation;
        }
        if ((i & 64) != 0) {
            z4 = normalizer.removeStopWords;
        }
        if ((i & 128) != 0) {
            z5 = normalizer.stemming;
        }
        if ((i & 256) != 0) {
            str2 = normalizer.joinWith;
        }
        return normalizer.copy(str, language, z, form, z2, z3, z4, z5, str2);
    }

    @NotNull
    public String toString() {
        return "Normalizer(text=" + this.text + ", language=" + this.language + ", caseSensitive=" + this.caseSensitive + ", form=" + this.form + ", removeDiacritics=" + this.removeDiacritics + ", removePunctuation=" + this.removePunctuation + ", removeStopWords=" + this.removeStopWords + ", stemming=" + this.stemming + ", joinWith=" + this.joinWith + ')';
    }

    /* JADX WARN: Multi-variable type inference failed */
    public int hashCode() {
        int hashCode = ((this.text.hashCode() * 31) + this.language.hashCode()) * 31;
        boolean z = this.caseSensitive;
        int i = z;
        if (z != 0) {
            i = 1;
        }
        int hashCode2 = (((hashCode + i) * 31) + this.form.hashCode()) * 31;
        boolean z2 = this.removeDiacritics;
        int i2 = z2;
        if (z2 != 0) {
            i2 = 1;
        }
        int i3 = (hashCode2 + i2) * 31;
        boolean z3 = this.removePunctuation;
        int i4 = z3;
        if (z3 != 0) {
            i4 = 1;
        }
        int i5 = (i3 + i4) * 31;
        boolean z4 = this.removeStopWords;
        int i6 = z4;
        if (z4 != 0) {
            i6 = 1;
        }
        int i7 = (i5 + i6) * 31;
        boolean z5 = this.stemming;
        int i8 = z5;
        if (z5 != 0) {
            i8 = 1;
        }
        return ((i7 + i8) * 31) + this.joinWith.hashCode();
    }

    public boolean equals(@Nullable Object obj) {
        if (this == obj) {
            return true;
        }
        if (!(obj instanceof Normalizer)) {
            return false;
        }
        Normalizer normalizer = (Normalizer) obj;
        return Intrinsics.areEqual(this.text, normalizer.text) && this.language == normalizer.language && this.caseSensitive == normalizer.caseSensitive && this.form == normalizer.form && this.removeDiacritics == normalizer.removeDiacritics && this.removePunctuation == normalizer.removePunctuation && this.removeStopWords == normalizer.removeStopWords && this.stemming == normalizer.stemming && Intrinsics.areEqual(this.joinWith, normalizer.joinWith);
    }
}
