package com.gengoai.hermes;

import com.gengoai.apollo.model.topic.MalletLDA;
import com.gengoai.collection.counter.Counter;
import com.gengoai.hermes.corpus.DocumentCollection;
import com.gengoai.hermes.extraction.NGramExtractor;
import com.gengoai.hermes.extraction.TermExtractor;
import com.gengoai.hermes.extraction.lyre.LyreDSL;
import com.gengoai.hermes.extraction.lyre.LyreExpression;
import com.gengoai.hermes.ml.HStringDataSetGenerator;
import com.gengoai.hermes.tools.HermesCLI;
import java.lang.invoke.SerializedLambda;

/* loaded from: input_file:com/gengoai/hermes/CorpusExample.class */
public class CorpusExample extends HermesCLI {
    public static void main(String[] strArr) throws Exception {
        new CorpusExample().run(strArr);
    }

    public void programLogic() throws Exception {
        DocumentCollection annotate = DocumentCollection.create("text_opl::classpath:com/gengoai/hermes/example_docs.txt").annotate(new AnnotatableType[]{Types.TOKEN, Types.SENTENCE, Types.LEMMA});
        TermExtractor build = TermExtractor.builder().toString(LyreDSL.lemma).build();
        Counter termCount = annotate.termCount(build);
        System.out.println("Top 10 by Term Frequency");
        termCount.topN(10).itemsByCount(false).forEach(str -> {
            System.out.println(str + ": " + termCount.get(str));
        });
        System.out.println();
        Counter documentCount = annotate.documentCount(build);
        System.out.println("Top 10 by Document Frequency");
        documentCount.topN(10).itemsByCount(false).forEach(str2 -> {
            System.out.println(str2 + ": " + termCount.get(str2));
        });
        System.out.println();
        Counter nGramCount = annotate.nGramCount(NGramExtractor.bigrams().ignoreStopwords().toLowerCase().build());
        System.out.println("Top 10 Bigrams");
        nGramCount.topN(10).itemsByCount(false).forEach(tuple -> {
            System.out.println(tuple + ": " + nGramCount.get(tuple));
        });
        System.out.println();
        System.out.println(annotate.query("younger AND (brother OR sister)").size() + " documents match (younger AND (brother OR sister))");
        System.out.println();
        System.out.println(annotate.sample(1).iterator().next());
        System.out.println();
        System.out.println(annotate.filter(document -> {
            return document.tokenLength() > 4;
        }).size() + " documents have more than four words.");
        System.out.println();
        System.out.println();
        MalletLDA malletLDA = new MalletLDA(parameters -> {
            parameters.verbose.set(false);
            parameters.K.set(2);
            parameters.maxIterations.set(10);
        });
        malletLDA.estimate(annotate.asDataSet(HStringDataSetGenerator.builder(Types.SENTENCE).tokenSequence("input", LyreExpression.parse("filter(lower(@TOKEN), isContentWord)")).build()));
        for (int i = 0; i < malletLDA.getNumberOfTopics(); i++) {
            System.out.println("Topic " + i);
            Counter featureDistribution = malletLDA.getTopic(i).getFeatureDistribution();
            featureDistribution.topN(10).itemsByCount(false).forEach(str3 -> {
                System.out.println(str3 + ": " + featureDistribution.get(str3));
            });
            System.out.println();
        }
    }

    private static /* synthetic */ Object $deserializeLambda$(SerializedLambda serializedLambda) {
        String implMethodName = serializedLambda.getImplMethodName();
        boolean z = -1;
        switch (implMethodName.hashCode()) {
            case -688055965:
                if (implMethodName.equals("lambda$programLogic$6aa565a$1")) {
                    z = false;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("com/gengoai/function/SerializablePredicate") && serializedLambda.getFunctionalInterfaceMethodName().equals("test") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Z") && serializedLambda.getImplClass().equals("com/gengoai/hermes/CorpusExample") && serializedLambda.getImplMethodSignature().equals("(Lcom/gengoai/hermes/Document;)Z")) {
                    return document -> {
                        return document.tokenLength() > 4;
                    };
                }
                break;
        }
        throw new IllegalArgumentException("Invalid lambda deserialization");
    }
}
