package ai.tock.nlp.stanford;

import ai.tock.nlp.core.Application;
import ai.tock.nlp.core.configuration.NlpApplicationConfiguration;
import ai.tock.nlp.core.configuration.NlpModelConfiguration;
import ai.tock.nlp.core.sample.SampleEntity;
import ai.tock.nlp.core.sample.SampleExpression;
import ai.tock.nlp.model.EntityBuildContext;
import ai.tock.nlp.model.EntityContext;
import ai.tock.nlp.model.IntentContext;
import ai.tock.nlp.model.TokenizerContext;
import ai.tock.nlp.model.service.engine.EntityModelHolder;
import ai.tock.nlp.model.service.engine.IntentModelHolder;
import ai.tock.nlp.model.service.engine.NlpEngineModelBuilder;
import ai.tock.nlp.model.service.engine.Tokenizer;
import ai.tock.nlp.model.service.engine.TokenizerModelHolder;
import ai.tock.shared.PropertiesKt;
import edu.stanford.nlp.classify.Classifier;
import edu.stanford.nlp.classify.ColumnDataClassifier;
import edu.stanford.nlp.classify.Dataset;
import edu.stanford.nlp.classify.GeneralDataset;
import edu.stanford.nlp.ie.crf.CRFClassifier;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import kotlin.Metadata;
import kotlin.Unit;
import kotlin.collections.CollectionsKt;
import kotlin.jvm.functions.Function0;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.text.StringsKt;
import mu.KLogger;
import mu.KotlinLogging;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;

/* compiled from: StanfordModelBuilder.kt */
@Metadata(mv = {1, 5, 1}, k = 1, xi = 48, d1 = {"��T\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0010\u000e\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0007\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n��\n\u0002\u0010 \n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\b\u0003\bÀ\u0002\u0018��2\u00020\u0001B\u0007\b\u0002¢\u0006\u0002\u0010\u0002J&\u0010\u0010\u001a\u00020\u00112\u0006\u0010\u0012\u001a\u00020\u00132\u0006\u0010\u0014\u001a\u00020\u00152\f\u0010\u0016\u001a\b\u0012\u0004\u0012\u00020\u00180\u0017H\u0016J&\u0010\u0019\u001a\u00020\u001a2\u0006\u0010\u0012\u001a\u00020\u001b2\u0006\u0010\u0014\u001a\u00020\u00152\f\u0010\u0016\u001a\b\u0012\u0004\u0012\u00020\u00180\u0017H\u0016J&\u0010\u001c\u001a\u00020\u001d2\u0006\u0010\u0012\u001a\u00020\u001e2\u0006\u0010\u0014\u001a\u00020\u00152\f\u0010\u0016\u001a\b\u0012\u0004\u0012\u00020\u00180\u0017H\u0016J+\u0010\u001f\u001a\u00020\u00042\u0006\u0010\u0012\u001a\u00020\u00132\u0006\u0010\u0014\u001a\u00020\u00152\f\u0010\u0016\u001a\b\u0012\u0004\u0012\u00020\u00180\u0017H��¢\u0006\u0002\b R\u000e\u0010\u0003\u001a\u00020\u0004X\u0086T¢\u0006\u0002\n��R\u000e\u0010\u0005\u001a\u00020\u0004X\u0086T¢\u0006\u0002\n��R\u0014\u0010\u0006\u001a\u00020\u0007X\u0096\u0004¢\u0006\b\n��\u001a\u0004\b\b\u0010\tR\u0014\u0010\n\u001a\u00020\u0007X\u0096\u0004¢\u0006\b\n��\u001a\u0004\b\u000b\u0010\tR\u0014\u0010\f\u001a\u00020\u0007X\u0096\u0004¢\u0006\b\n��\u001a\u0004\b\r\u0010\tR\u000e\u0010\u000e\u001a\u00020\u000fX\u0082\u0004¢\u0006\u0002\n��¨\u0006!"}, d2 = {"Lai/tock/nlp/stanford/StanfordModelBuilder;", "Lai/tock/nlp/model/service/engine/NlpEngineModelBuilder;", "()V", "ADJACENT_ENTITY_MARKER", "", "TAB", "defaultEntityClassifierConfiguration", "Lai/tock/nlp/core/configuration/NlpModelConfiguration;", "getDefaultEntityClassifierConfiguration", "()Lai/tock/nlp/core/configuration/NlpModelConfiguration;", "defaultIntentClassifierConfiguration", "getDefaultIntentClassifierConfiguration", "defaultTokenizerConfiguration", "getDefaultTokenizerConfiguration", "logger", "Lmu/KLogger;", "buildEntityModel", "Lai/tock/nlp/model/service/engine/EntityModelHolder;", "context", "Lai/tock/nlp/model/EntityBuildContext;", "configuration", "Lai/tock/nlp/core/configuration/NlpApplicationConfiguration;", "expressions", "", "Lai/tock/nlp/core/sample/SampleExpression;", "buildIntentModel", "Lai/tock/nlp/model/service/engine/IntentModelHolder;", "Lai/tock/nlp/model/IntentContext;", "buildTokenizerModel", "Lai/tock/nlp/model/service/engine/TokenizerModelHolder;", "Lai/tock/nlp/model/TokenizerContext;", "getEntityTrainData", "getEntityTrainData$tock_nlp_model_stanford", "tock-nlp-model-stanford"})
/* loaded from: input_file:ai/tock/nlp/stanford/StanfordModelBuilder.class */
public final class StanfordModelBuilder implements NlpEngineModelBuilder {

    @NotNull
    public static final String TAB = "\t";

    @NotNull
    public static final String ADJACENT_ENTITY_MARKER = "__near__";

    @NotNull
    public static final StanfordModelBuilder INSTANCE = new StanfordModelBuilder();

    @NotNull
    private static final KLogger logger = KotlinLogging.INSTANCE.logger(new Function0<Unit>() { // from class: ai.tock.nlp.stanford.StanfordModelBuilder$logger$1
        public final void invoke() {
        }

        /* renamed from: invoke, reason: collision with other method in class */
        public /* bridge */ /* synthetic */ Object m11invoke() {
            invoke();
            return Unit.INSTANCE;
        }
    });

    @NotNull
    private static final NlpModelConfiguration defaultEntityClassifierConfiguration = new NlpModelConfiguration(PropertiesKt.loadProperties("/stanford/crfclassifier.properties"), (String) null, false, false, 14, (DefaultConstructorMarker) null);

    @NotNull
    private static final NlpModelConfiguration defaultIntentClassifierConfiguration = new NlpModelConfiguration(PropertiesKt.loadProperties("/stanford/intentClassifier.properties"), (String) null, false, false, 14, (DefaultConstructorMarker) null);

    @NotNull
    private static final NlpModelConfiguration defaultTokenizerConfiguration = new NlpModelConfiguration(PropertiesKt.loadProperties("/stanford/tokenizer.properties"), (String) null, false, false, 14, (DefaultConstructorMarker) null);

    private StanfordModelBuilder() {
    }

    @NotNull
    public TokenizerModelHolder buildTokenizerModel(@NotNull TokenizerContext tokenizerContext, @NotNull NlpApplicationConfiguration nlpApplicationConfiguration, @NotNull List<SampleExpression> list) {
        Intrinsics.checkNotNullParameter(tokenizerContext, "context");
        Intrinsics.checkNotNullParameter(nlpApplicationConfiguration, "configuration");
        Intrinsics.checkNotNullParameter(list, "expressions");
        return new TokenizerModelHolder(tokenizerContext.getLanguage(), nlpApplicationConfiguration);
    }

    @NotNull
    public NlpModelConfiguration getDefaultEntityClassifierConfiguration() {
        return defaultEntityClassifierConfiguration;
    }

    @NotNull
    public NlpModelConfiguration getDefaultIntentClassifierConfiguration() {
        return defaultIntentClassifierConfiguration;
    }

    @NotNull
    public NlpModelConfiguration getDefaultTokenizerConfiguration() {
        return defaultTokenizerConfiguration;
    }

    @NotNull
    public IntentModelHolder buildIntentModel(@NotNull IntentContext intentContext, @NotNull NlpApplicationConfiguration nlpApplicationConfiguration, @NotNull List<SampleExpression> list) {
        Intrinsics.checkNotNullParameter(intentContext, "context");
        Intrinsics.checkNotNullParameter(nlpApplicationConfiguration, "configuration");
        Intrinsics.checkNotNullParameter(list, "expressions");
        ColumnDataClassifier columnDataClassifier = new ColumnDataClassifier(nlpApplicationConfiguration.getIntentConfiguration().getProperties());
        GeneralDataset dataset = new Dataset();
        List<SampleExpression> list2 = list;
        ArrayList arrayList = new ArrayList(CollectionsKt.collectionSizeOrDefault(list2, 10));
        for (SampleExpression sampleExpression : list2) {
            dataset.add(columnDataClassifier.makeDatumFromLine(sampleExpression.getIntent().getName() + "\t$" + sampleExpression.getText()));
            arrayList.add(Unit.INSTANCE);
        }
        Classifier makeClassifier = columnDataClassifier.makeClassifier(dataset);
        Application application = intentContext.getApplication();
        Intrinsics.checkNotNullExpressionValue(makeClassifier, "classifier");
        return new IntentModelHolder(application, new StanfordIntentModel(columnDataClassifier, makeClassifier), nlpApplicationConfiguration, (Instant) null, 8, (DefaultConstructorMarker) null);
    }

    @NotNull
    public EntityModelHolder buildEntityModel(@NotNull EntityBuildContext entityBuildContext, @NotNull NlpApplicationConfiguration nlpApplicationConfiguration, @NotNull List<SampleExpression> list) {
        Intrinsics.checkNotNullParameter(entityBuildContext, "context");
        Intrinsics.checkNotNullParameter(nlpApplicationConfiguration, "configuration");
        Intrinsics.checkNotNullParameter(list, "expressions");
        CRFClassifier cRFClassifier = new CRFClassifier(nlpApplicationConfiguration.getEntityConfiguration().getProperties());
        final String entityTrainData$tock_nlp_model_stanford = getEntityTrainData$tock_nlp_model_stanford(entityBuildContext, nlpApplicationConfiguration, list);
        try {
            Collection makeObjectBankFromString = cRFClassifier.makeObjectBankFromString(entityTrainData$tock_nlp_model_stanford, cRFClassifier.defaultReaderAndWriter());
            Intrinsics.checkNotNullExpressionValue(makeObjectBankFromString, "crfClassifier.makeObjectBankFromString(\n                trainingData,\n                crfClassifier.defaultReaderAndWriter()\n            )");
            cRFClassifier.train(makeObjectBankFromString);
            return new EntityModelHolder(cRFClassifier, nlpApplicationConfiguration, (Instant) null, 4, (DefaultConstructorMarker) null);
        } catch (Exception e) {
            logger.error(new Function0<Object>() { // from class: ai.tock.nlp.stanford.StanfordModelBuilder$buildEntityModel$1
                /* JADX INFO: Access modifiers changed from: package-private */
                /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
                {
                    super(0);
                }

                @Nullable
                public final Object invoke() {
                    return Intrinsics.stringPlus("error with train data: \n ", entityTrainData$tock_nlp_model_stanford);
                }
            });
            throw e;
        }
    }

    @NotNull
    public final String getEntityTrainData$tock_nlp_model_stanford(@NotNull EntityBuildContext entityBuildContext, @NotNull NlpApplicationConfiguration nlpApplicationConfiguration, @NotNull List<SampleExpression> list) {
        String stringPlus;
        String str;
        int length;
        Intrinsics.checkNotNullParameter(entityBuildContext, "context");
        Intrinsics.checkNotNullParameter(nlpApplicationConfiguration, "configuration");
        Intrinsics.checkNotNullParameter(list, "expressions");
        Tokenizer stanfordTokenizer = StanfordEngineProvider.Companion.getStanfordTokenizer(new TokenizerModelHolder(entityBuildContext.getLanguage(), nlpApplicationConfiguration));
        TokenizerContext tokenizerContext = new TokenizerContext((EntityContext) entityBuildContext);
        final StringBuilder sb = new StringBuilder();
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        for (SampleExpression sampleExpression : list) {
            try {
                final String text = sampleExpression.getText();
                if (StringsKt.contains$default(text, "\n", false, 2, (Object) null) || StringsKt.contains$default(text, TAB, false, 2, (Object) null)) {
                    logger.warn(new Function0<Object>() { // from class: ai.tock.nlp.stanford.StanfordModelBuilder$getEntityTrainData$1$1
                        /* JADX INFO: Access modifiers changed from: package-private */
                        /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
                        {
                            super(0);
                        }

                        @Nullable
                        public final Object invoke() {
                            return "expression " + text + " contains \\n or \\t!!! - skipped";
                        }
                    });
                } else {
                    hashMap.clear();
                    hashMap2.clear();
                    String[] strArr = stanfordTokenizer.tokenize(tokenizerContext, text);
                    for (SampleEntity sampleEntity : sampleExpression.getEntities()) {
                        if (sampleEntity.getStart() == 0) {
                            length = 0;
                        } else {
                            int start = sampleEntity.getStart();
                            if (text == null) {
                                throw new NullPointerException("null cannot be cast to non-null type java.lang.String");
                            }
                            String substring = text.substring(0, start);
                            Intrinsics.checkNotNullExpressionValue(substring, "(this as java.lang.Strin…ing(startIndex, endIndex)");
                            length = stanfordTokenizer.tokenize(tokenizerContext, substring).length;
                        }
                        int i = length;
                        int start2 = sampleEntity.getStart();
                        int end = sampleEntity.getEnd();
                        if (text == null) {
                            throw new NullPointerException("null cannot be cast to non-null type java.lang.String");
                        }
                        String substring2 = text.substring(start2, end);
                        Intrinsics.checkNotNullExpressionValue(substring2, "(this as java.lang.Strin…ing(startIndex, endIndex)");
                        int length2 = i + stanfordTokenizer.tokenize(tokenizerContext, substring2).length;
                        if (i >= strArr.length || length2 > strArr.length) {
                            logger.warn(new Function0<Object>() { // from class: ai.tock.nlp.stanford.StanfordModelBuilder$getEntityTrainData$1$2$1
                                /* JADX INFO: Access modifiers changed from: package-private */
                                /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
                                {
                                    super(0);
                                }

                                @Nullable
                                public final Object invoke() {
                                    return Intrinsics.stringPlus("entity mismatch for ", text);
                                }
                            });
                            break;
                        }
                        int i2 = i;
                        if (i2 >= length2) {
                        }
                        do {
                            int i3 = i2;
                            i2++;
                            hashMap.put(Integer.valueOf(i3), sampleEntity);
                        } while (i2 < length2);
                    }
                    int i4 = 0;
                    for (String str2 : strArr) {
                        int i5 = i4;
                        i4++;
                        SampleEntity sampleEntity2 = (SampleEntity) hashMap.get(Integer.valueOf(i5));
                        if (sampleEntity2 == null) {
                            str = "O";
                        } else if (i5 == 0) {
                            str = sampleEntity2.getDefinition().getRole();
                        } else {
                            String str3 = (String) hashMap2.get(sampleEntity2);
                            if (str3 != null) {
                                str = str3;
                            } else {
                                SampleEntity sampleEntity3 = (SampleEntity) hashMap.get(Integer.valueOf(i5 - 1));
                                if (sampleEntity3 == null) {
                                    stringPlus = null;
                                } else {
                                    SampleEntity sampleEntity4 = Intrinsics.areEqual(hashMap2.get(sampleEntity3), sampleEntity2.getDefinition().getRole()) ? sampleEntity3 : null;
                                    stringPlus = sampleEntity4 == null ? null : Intrinsics.stringPlus(ADJACENT_ENTITY_MARKER, sampleEntity4.getDefinition().getRole());
                                }
                                String str4 = stringPlus;
                                String role = str4 == null ? sampleEntity2.getDefinition().getRole() : str4;
                                hashMap2.put(sampleEntity2, role);
                                str = role;
                            }
                        }
                        sb.append(str2);
                        sb.append(TAB);
                        StringBuilder append = sb.append(str);
                        Intrinsics.checkNotNullExpressionValue(append, "append(value)");
                        StringsKt.appendln(append);
                    }
                    StringsKt.appendln(sb);
                    logger.trace(new Function0<Object>() { // from class: ai.tock.nlp.stanford.StanfordModelBuilder$getEntityTrainData$1$4
                        /* JADX INFO: Access modifiers changed from: package-private */
                        /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
                        {
                            super(0);
                        }

                        @Nullable
                        public final Object invoke() {
                            return text + " ->\n" + ((Object) sb);
                        }
                    });
                }
            } catch (Exception e) {
                logger.error(Intrinsics.stringPlus("error with ", sampleExpression), e);
            }
        }
        String sb2 = sb.toString();
        Intrinsics.checkNotNullExpressionValue(sb2, "sb.toString()");
        return sb2;
    }

    @NotNull
    public NlpApplicationConfiguration defaultNlpApplicationConfiguration() {
        return NlpEngineModelBuilder.DefaultImpls.defaultNlpApplicationConfiguration(this);
    }
}
