package fr.vsct.tock.nlp.stanford;

import edu.stanford.nlp.international.french.process.FrenchTokenizer;
import edu.stanford.nlp.international.spanish.process.SpanishTokenizer;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.TokenizerFactory;
import fr.vsct.tock.nlp.model.TokenizerContext;
import fr.vsct.tock.nlp.model.service.engine.NlpTokenizer;
import fr.vsct.tock.nlp.model.service.engine.TokenizerModelHolder;
import fr.vsct.tock.shared.PropertiesKt;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import kotlin.Metadata;
import kotlin.TypeCastException;
import kotlin.Unit;
import kotlin.collections.CollectionsKt;
import kotlin.jvm.functions.Function0;
import kotlin.jvm.functions.Function2;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.text.StringsKt;
import mu.KLogger;
import mu.KotlinLogging;
import org.jetbrains.annotations.NotNull;

/* compiled from: StanfordTokenizer.kt */
@Metadata(mv = {1, 1, 7}, bv = {1, 0, 2}, k = 1, d1 = {"��8\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0010 \n\u0002\u0010\u000e\n\u0002\b\u0002\n\u0002\u0010\u0011\n��\n\u0002\u0018\u0002\n\u0002\b\u0004\b��\u0018�� \u00142\u00020\u0001:\u0001\u0014B\r\u0012\u0006\u0010\u0002\u001a\u00020\u0003¢\u0006\u0002\u0010\u0004J\u0014\u0010\n\u001a\b\u0012\u0004\u0012\u00020\f0\u000b2\u0006\u0010\r\u001a\u00020\fJ#\u0010\u000e\u001a\b\u0012\u0004\u0012\u00020\f0\u000f2\u0006\u0010\u0010\u001a\u00020\u00112\u0006\u0010\u0012\u001a\u00020\fH\u0016¢\u0006\u0002\u0010\u0013R\u0017\u0010\u0005\u001a\b\u0012\u0004\u0012\u00020\u00070\u0006¢\u0006\b\n��\u001a\u0004\b\b\u0010\t¨\u0006\u0015"}, d2 = {"Lfr/vsct/tock/nlp/stanford/StanfordTokenizer;", "Lfr/vsct/tock/nlp/model/service/engine/NlpTokenizer;", "model", "Lfr/vsct/tock/nlp/model/service/engine/TokenizerModelHolder;", "(Lfr/vsct/tock/nlp/model/service/engine/TokenizerModelHolder;)V", "tokenizerFactory", "Ledu/stanford/nlp/process/TokenizerFactory;", "Ledu/stanford/nlp/ling/CoreLabel;", "getTokenizerFactory", "()Ledu/stanford/nlp/process/TokenizerFactory;", "splitSeparators", "", "", "word", "tokenize", "", "context", "Lfr/vsct/tock/nlp/model/TokenizerContext;", "text", "(Lfr/vsct/tock/nlp/model/TokenizerContext;Ljava/lang/String;)[Ljava/lang/String;", "Companion", "tock-nlp-model-stanford"})
/* loaded from: input_file:fr/vsct/tock/nlp/stanford/StanfordTokenizer.class */
public final class StanfordTokenizer extends NlpTokenizer {

    @NotNull
    private final TokenizerFactory<CoreLabel> tokenizerFactory;
    public static final Companion Companion = new Companion(null);
    private static final KLogger logger = KotlinLogging.INSTANCE.logger(new Function0<Unit>() { // from class: fr.vsct.tock.nlp.stanford.StanfordTokenizer$Companion$logger$1
        public /* bridge */ /* synthetic */ Object invoke() {
            m18invoke();
            return Unit.INSTANCE;
        }

        /* renamed from: invoke, reason: collision with other method in class */
        public final void m18invoke() {
        }
    });
    private static final List<String> separators = PropertiesKt.listProperty$default("tock_stanford_tokens_separators", CollectionsKt.listOf(new String[]{"-", "'", "/", " ", "#"}), (String) null, 4, (Object) null);

    /* compiled from: StanfordTokenizer.kt */
    @Metadata(mv = {1, 1, 7}, bv = {1, 0, 2}, k = 1, d1 = {"��0\n\u0002\u0018\u0002\n\u0002\u0010��\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0010 \n\u0002\u0010\u000e\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n��\b\u0086\u0003\u0018��2\u00020\u0001B\u0007\b\u0002¢\u0006\u0002\u0010\u0002J\u0016\u0010\f\u001a\b\u0012\u0004\u0012\u00020\u000e0\r2\u0006\u0010\u000f\u001a\u00020\u0010H\u0002R\u0014\u0010\u0003\u001a\u00020\u0004X\u0082\u0004¢\u0006\b\n��\u001a\u0004\b\u0005\u0010\u0006R\u001a\u0010\u0007\u001a\b\u0012\u0004\u0012\u00020\t0\bX\u0082\u0004¢\u0006\b\n��\u001a\u0004\b\n\u0010\u000b¨\u0006\u0011"}, d2 = {"Lfr/vsct/tock/nlp/stanford/StanfordTokenizer$Companion;", "", "()V", "logger", "Lmu/KLogger;", "getLogger", "()Lmu/KLogger;", "separators", "", "", "getSeparators", "()Ljava/util/List;", "getTokenizerFactory", "Ledu/stanford/nlp/process/TokenizerFactory;", "Ledu/stanford/nlp/ling/CoreLabel;", "language", "Ljava/util/Locale;", "tock-nlp-model-stanford"})
    /* loaded from: input_file:fr/vsct/tock/nlp/stanford/StanfordTokenizer$Companion.class */
    public static final class Companion {
        /* JADX INFO: Access modifiers changed from: private */
        public final KLogger getLogger() {
            return StanfordTokenizer.logger;
        }

        /* JADX INFO: Access modifiers changed from: private */
        public final List<String> getSeparators() {
            return StanfordTokenizer.separators;
        }

        /* JADX INFO: Access modifiers changed from: private */
        /* JADX WARN: Can't fix incorrect switch cases order, some code will duplicate */
        /* JADX WARN: Failed to find 'out' block for switch in B:4:0x0028. Please report as an issue. */
        public final TokenizerFactory<CoreLabel> getTokenizerFactory(final Locale locale) {
            getLogger().trace(new Function0<String>() { // from class: fr.vsct.tock.nlp.stanford.StanfordTokenizer$Companion$getTokenizerFactory$1
                @NotNull
                public final String invoke() {
                    return "getting tokenizer for : " + locale;
                }

                /* JADX INFO: Access modifiers changed from: package-private */
                /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
                {
                    super(0);
                }
            });
            String language = locale.getLanguage();
            if (language != null) {
                switch (language.hashCode()) {
                    case 3241:
                        if (language.equals("en")) {
                            TokenizerFactory<CoreLabel> newCoreLabelTokenizerFactory = PTBTokenizer.PTBTokenizerFactory.newCoreLabelTokenizerFactory("");
                            Intrinsics.checkExpressionValueIsNotNull(newCoreLabelTokenizerFactory, "PTBTokenizer.PTBTokenize…LabelTokenizerFactory(\"\")");
                            return newCoreLabelTokenizerFactory;
                        }
                        break;
                    case 3246:
                        if (language.equals("es")) {
                            TokenizerFactory<CoreLabel> newCoreLabelTokenizerFactory2 = SpanishTokenizer.SpanishTokenizerFactory.newCoreLabelTokenizerFactory();
                            Intrinsics.checkExpressionValueIsNotNull(newCoreLabelTokenizerFactory2, "SpanishTokenizer.Spanish…reLabelTokenizerFactory()");
                            return newCoreLabelTokenizerFactory2;
                        }
                        break;
                    case 3276:
                        if (language.equals("fr")) {
                            TokenizerFactory<CoreLabel> newTokenizerFactory = FrenchTokenizer.FrenchTokenizerFactory.newTokenizerFactory();
                            newTokenizerFactory.setOptions("untokenizable=noneDelete");
                            Intrinsics.checkExpressionValueIsNotNull(newTokenizerFactory, "FrenchTokenizer.FrenchTo…                        }");
                            return newTokenizerFactory;
                        }
                        break;
                }
            }
            TokenizerFactory<CoreLabel> newCoreLabelTokenizerFactory3 = PTBTokenizer.PTBTokenizerFactory.newCoreLabelTokenizerFactory("");
            Intrinsics.checkExpressionValueIsNotNull(newCoreLabelTokenizerFactory3, "PTBTokenizer.PTBTokenize…LabelTokenizerFactory(\"\")");
            return newCoreLabelTokenizerFactory3;
        }

        private Companion() {
        }

        public /* synthetic */ Companion(DefaultConstructorMarker defaultConstructorMarker) {
            this();
        }
    }

    @NotNull
    public final TokenizerFactory<CoreLabel> getTokenizerFactory() {
        return this.tokenizerFactory;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v41, types: [java.util.List] */
    /* JADX WARN: Type inference failed for: r0v43, types: [java.util.List] */
    @NotNull
    public String[] tokenize(@NotNull TokenizerContext tokenizerContext, @NotNull final String str) {
        ArrayList arrayList;
        Intrinsics.checkParameterIsNotNull(tokenizerContext, "context");
        Intrinsics.checkParameterIsNotNull(str, "text");
        List list = this.tokenizerFactory.getTokenizer(new StringReader(str)).tokenize();
        ArrayList arrayList2 = new ArrayList();
        Iterator it = list.iterator();
        while (it.hasNext()) {
            String word = ((CoreLabel) it.next()).word();
            Intrinsics.checkExpressionValueIsNotNull(word, "word");
            CollectionsKt.addAll(arrayList2, splitSeparators(word));
        }
        ArrayList arrayList3 = arrayList2;
        if (arrayList3.isEmpty()) {
            if (StringsKt.trim(str).toString().length() == 0) {
                arrayList = CollectionsKt.emptyList();
            } else {
                Companion.getLogger().warn(new Function0<String>() { // from class: fr.vsct.tock.nlp.stanford.StanfordTokenizer$tokenize$1
                    @NotNull
                    public final String invoke() {
                        return "empty token list for " + str + ", do not split";
                    }

                    /* JADX INFO: Access modifiers changed from: package-private */
                    /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
                    {
                        super(0);
                    }
                });
                arrayList = CollectionsKt.listOf(StringsKt.trim(str).toString());
            }
            arrayList3 = arrayList;
        }
        ArrayList arrayList4 = arrayList3;
        if (arrayList4 == null) {
            throw new TypeCastException("null cannot be cast to non-null type java.util.Collection<T>");
        }
        Object[] array = arrayList4.toArray(new String[arrayList4.size()]);
        if (array == null) {
            throw new TypeCastException("null cannot be cast to non-null type kotlin.Array<T>");
        }
        return (String[]) array;
    }

    /* JADX WARN: Type inference failed for: r0v1, types: [fr.vsct.tock.nlp.stanford.StanfordTokenizer$splitSeparators$1] */
    @NotNull
    public final List<String> splitSeparators(@NotNull final String str) {
        Intrinsics.checkParameterIsNotNull(str, "word");
        ?? r0 = new Function2<List<? extends String>, String, List<? extends String>>() { // from class: fr.vsct.tock.nlp.stanford.StanfordTokenizer$splitSeparators$1
            @NotNull
            public final List<String> invoke(@NotNull List<String> list, @NotNull String str2) {
                List listOf;
                Intrinsics.checkParameterIsNotNull(list, "words");
                Intrinsics.checkParameterIsNotNull(str2, "separator");
                ArrayList arrayList = new ArrayList();
                for (String str3 : list) {
                    if (str3.length() == 1) {
                        listOf = CollectionsKt.listOf(str3);
                    } else {
                        if (StringsKt.indexOf$default(str3, str2, 0, false, 6, (Object) null) != -1) {
                            List<String> split$default = StringsKt.split$default(str, new String[]{str2}, false, 0, 6, (Object) null);
                            ArrayList arrayList2 = new ArrayList(CollectionsKt.collectionSizeOrDefault(split$default, 10));
                            int i = 0;
                            for (String str4 : split$default) {
                                int i2 = i;
                                i++;
                                String[] strArr = new String[2];
                                strArr[0] = i2 != 0 ? str2 : null;
                                strArr[1] = str4.length() > 0 ? str4 : null;
                                arrayList2.add(CollectionsKt.listOfNotNull(strArr));
                            }
                            ArrayList arrayList3 = arrayList2;
                            ArrayList arrayList4 = new ArrayList();
                            Iterator it = arrayList3.iterator();
                            while (it.hasNext()) {
                                CollectionsKt.addAll(arrayList4, (List) it.next());
                            }
                            return arrayList4;
                        }
                        listOf = CollectionsKt.listOf(str3);
                    }
                    CollectionsKt.addAll(arrayList, listOf);
                }
                return arrayList;
            }

            /* JADX INFO: Access modifiers changed from: package-private */
            /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
            {
                super(2);
            }
        };
        List<String> listOf = CollectionsKt.listOf(str);
        Iterator it = Companion.getSeparators().iterator();
        while (it.hasNext()) {
            listOf = r0.invoke(listOf, (String) it.next());
        }
        return listOf;
    }

    /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
    public StanfordTokenizer(@NotNull TokenizerModelHolder tokenizerModelHolder) {
        super(tokenizerModelHolder);
        Intrinsics.checkParameterIsNotNull(tokenizerModelHolder, "model");
        this.tokenizerFactory = Companion.getTokenizerFactory(tokenizerModelHolder.getLanguage());
    }
}
