package org.carrot2.clustering.lingo;

import com.carrotsearch.hppc.BitSet;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.carrot2.attrs.AttrComposite;
import org.carrot2.attrs.AttrDouble;
import org.carrot2.attrs.AttrInteger;
import org.carrot2.attrs.AttrObject;
import org.carrot2.attrs.AttrString;
import org.carrot2.clustering.Cluster;
import org.carrot2.clustering.ClusteringAlgorithm;
import org.carrot2.clustering.Document;
import org.carrot2.clustering.SharedInfrastructure;
import org.carrot2.language.LanguageComponents;
import org.carrot2.language.LexicalData;
import org.carrot2.language.Stemmer;
import org.carrot2.language.Tokenizer;
import org.carrot2.text.preprocessing.CompletePreprocessingPipeline;
import org.carrot2.text.preprocessing.LabelFormatter;
import org.carrot2.text.preprocessing.PreprocessingContext;
import org.carrot2.text.vsm.ReducedVectorSpaceModelContext;
import org.carrot2.text.vsm.TermDocumentMatrixBuilder;
import org.carrot2.text.vsm.TermDocumentMatrixReducer;
import org.carrot2.text.vsm.VectorSpaceModelContext;

/* loaded from: input_file:org/carrot2/clustering/lingo/LingoClusteringAlgorithm.class */
public class LingoClusteringAlgorithm extends AttrComposite implements ClusteringAlgorithm {
    public static final String NAME = "Lingo";
    private static final Set<Class<?>> REQUIRED_LANGUAGE_COMPONENTS = new HashSet(Arrays.asList(Stemmer.class, Tokenizer.class, LexicalData.class, LabelFormatter.class));
    public AttrDouble scoreWeight = this.attributes.register("scoreWeight", AttrDouble.builder().label2("Size-score sorting ratio").min(0.0d).max(1.0d).defaultValue(Double.valueOf(0.0d)));
    public AttrInteger desiredClusterCount = this.attributes.register("desiredClusterCount", AttrInteger.builder().label2("Desired cluster count").min(2).max(100).defaultValue(30));
    public CompletePreprocessingPipeline preprocessing;
    public TermDocumentMatrixBuilder matrixBuilder;
    public TermDocumentMatrixReducer matrixReducer;
    public ClusterBuilder clusterBuilder;
    public final AttrString queryHint;

    /* JADX WARN: Type inference failed for: r3v1, types: [org.carrot2.attrs.AttrDouble$Builder] */
    /* JADX WARN: Type inference failed for: r3v6, types: [org.carrot2.attrs.AttrInteger$Builder] */
    public LingoClusteringAlgorithm() {
        this.attributes.register("preprocessing", AttrObject.builder(CompletePreprocessingPipeline.class).label2("Input preprocessing components").getset(() -> {
            return this.preprocessing;
        }, completePreprocessingPipeline -> {
            this.preprocessing = completePreprocessingPipeline;
        }).defaultValue(CompletePreprocessingPipeline::new));
        this.attributes.register("matrixBuilder", AttrObject.builder(TermDocumentMatrixBuilder.class).label2("Term-document matrix builder").getset(() -> {
            return this.matrixBuilder;
        }, termDocumentMatrixBuilder -> {
            this.matrixBuilder = termDocumentMatrixBuilder;
        }).defaultValue(TermDocumentMatrixBuilder::new));
        this.attributes.register("matrixReducer", AttrObject.builder(TermDocumentMatrixReducer.class).label2("Term-document matrix reducer").getset(() -> {
            return this.matrixReducer;
        }, termDocumentMatrixReducer -> {
            this.matrixReducer = termDocumentMatrixReducer;
        }).defaultValue(TermDocumentMatrixReducer::new));
        this.attributes.register("clusterBuilder", AttrObject.builder(ClusterBuilder.class).label2("Cluster label supplier").getset(() -> {
            return this.clusterBuilder;
        }, clusterBuilder -> {
            this.clusterBuilder = clusterBuilder;
        }).defaultValue(ClusterBuilder::new));
        this.queryHint = this.attributes.register("queryHint", SharedInfrastructure.queryHintAttribute());
    }

    @Override // org.carrot2.clustering.ClusteringAlgorithm
    public Set<Class<?>> requiredLanguageComponents() {
        return REQUIRED_LANGUAGE_COMPONENTS;
    }

    @Override // org.carrot2.clustering.ClusteringAlgorithm
    public <T extends Document> List<Cluster<T>> cluster(Stream<? extends T> stream, LanguageComponents languageComponents) {
        List list = (List) stream.collect(Collectors.toList());
        PreprocessingContext preprocess = this.preprocessing.preprocess(list.stream(), this.queryHint.get(), languageComponents);
        ArrayList arrayList = new ArrayList();
        if (preprocess.hasLabels()) {
            VectorSpaceModelContext vectorSpaceModelContext = new VectorSpaceModelContext(preprocess);
            ReducedVectorSpaceModelContext reducedVectorSpaceModelContext = new ReducedVectorSpaceModelContext(vectorSpaceModelContext);
            LingoProcessingContext lingoProcessingContext = new LingoProcessingContext(reducedVectorSpaceModelContext);
            TermDocumentMatrixBuilder termDocumentMatrixBuilder = this.matrixBuilder;
            termDocumentMatrixBuilder.buildTermDocumentMatrix(vectorSpaceModelContext);
            termDocumentMatrixBuilder.buildTermPhraseMatrix(vectorSpaceModelContext);
            this.matrixReducer.reduce(reducedVectorSpaceModelContext, computeClusterCount(this.desiredClusterCount.get().intValue(), list.size()));
            this.clusterBuilder.buildLabels(lingoProcessingContext, termDocumentMatrixBuilder.termWeighting);
            this.clusterBuilder.assignDocuments(lingoProcessingContext);
            this.clusterBuilder.merge(lingoProcessingContext);
            LabelFormatter labelFormatter = (LabelFormatter) lingoProcessingContext.preprocessingContext.languageComponents.get(LabelFormatter.class);
            int[] iArr = lingoProcessingContext.clusterLabelFeatureIndex;
            BitSet[] bitSetArr = lingoProcessingContext.clusterDocuments;
            double[] dArr = lingoProcessingContext.clusterLabelScore;
            for (int i = 0; i < iArr.length; i++) {
                Cluster cluster = new Cluster();
                int i2 = iArr[i];
                if (i2 >= 0) {
                    cluster.addLabel(preprocess.format(labelFormatter, i2));
                    cluster.setScore(Double.valueOf(dArr[i]));
                    BitSet bitSet = bitSetArr[i];
                    int nextSetBit = bitSet.nextSetBit(0);
                    while (true) {
                        int i3 = nextSetBit;
                        if (i3 < 0) {
                            break;
                        }
                        cluster.addDocument((Document) list.get(i3));
                        nextSetBit = bitSet.nextSetBit(i3 + 1);
                    }
                    arrayList.add(cluster);
                }
            }
        }
        return SharedInfrastructure.reorderByWeightedScoreAndSize(arrayList, this.scoreWeight.get().doubleValue());
    }

    static int computeClusterCount(int i, int i2) {
        return Math.min((int) ((i / 10.0d) * Math.sqrt(i2)), i2);
    }
}
