package org.carrot2.clustering.kmeans;

import com.carrotsearch.hppc.IntArrayList;
import com.carrotsearch.hppc.IntIntOpenHashMap;
import com.carrotsearch.hppc.cursors.IntCursor;
import com.carrotsearch.hppc.cursors.IntIntCursor;
import com.google.common.collect.Lists;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.lang.ObjectUtils;
import org.apache.mahout.math.function.Functions;
import org.apache.mahout.math.matrix.DoubleFactory1D;
import org.apache.mahout.math.matrix.DoubleFactory2D;
import org.apache.mahout.math.matrix.DoubleMatrix1D;
import org.apache.mahout.math.matrix.DoubleMatrix2D;
import org.carrot2.core.Cluster;
import org.carrot2.core.Document;
import org.carrot2.core.IClusteringAlgorithm;
import org.carrot2.core.LanguageCode;
import org.carrot2.core.ProcessingComponentBase;
import org.carrot2.core.ProcessingException;
import org.carrot2.core.attribute.CommonAttributes;
import org.carrot2.core.attribute.Internal;
import org.carrot2.core.attribute.Processing;
import org.carrot2.text.analysis.ITokenizer;
import org.carrot2.text.preprocessing.LabelFormatter;
import org.carrot2.text.preprocessing.PreprocessingContext;
import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipeline;
import org.carrot2.text.vsm.ReducedVectorSpaceModelContext;
import org.carrot2.text.vsm.TermDocumentMatrixBuilder;
import org.carrot2.text.vsm.TermDocumentMatrixReducer;
import org.carrot2.text.vsm.VectorSpaceModelContext;
import org.carrot2.util.attribute.Attribute;
import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Output;
import org.carrot2.util.attribute.Required;
import org.carrot2.util.attribute.constraint.IntRange;

@Bindable(prefix = "BisectingKMeansClusteringAlgorithm", inherit = {CommonAttributes.class})
/* loaded from: input_file:org/carrot2/clustering/kmeans/BisectingKMeansClusteringAlgorithm.class */
public class BisectingKMeansClusteringAlgorithm extends ProcessingComponentBase implements IClusteringAlgorithm {

    @Processing
    @Required
    @Input
    @Internal
    @Attribute(key = "documents", inherit = true)
    public List<Document> documents;

    @Processing
    @Output
    @Internal
    @Attribute(key = "clusters", inherit = true)
    public List<Cluster> clusters = null;

    @Processing
    @Input
    @Attribute
    @IntRange(min = ITokenizer.TT_NUMERIC)
    public int clusterCount = 25;

    @Processing
    @Input
    @Attribute
    @IntRange(min = 1)
    public int maxIterations = 15;

    @Input
    @Attribute
    @Processing
    public boolean useDimensionalityReduction = true;

    @Processing
    @Input
    @Attribute
    @IntRange(min = ITokenizer.TT_NUMERIC, max = 10)
    public int partitionCount = 2;

    @Processing
    @Input
    @Attribute
    @IntRange(min = 1, max = 10)
    public int labelCount = 3;
    public final BasicPreprocessingPipeline preprocessingPipeline = new BasicPreprocessingPipeline();
    public final TermDocumentMatrixBuilder matrixBuilder = new TermDocumentMatrixBuilder();
    public final TermDocumentMatrixReducer matrixReducer = new TermDocumentMatrixReducer();
    public final LabelFormatter labelFormatter = new LabelFormatter();

    @Override // org.carrot2.core.ProcessingComponentBase, org.carrot2.core.IProcessingComponent
    public void process() throws ProcessingException {
        DoubleMatrix2D doubleMatrix2D;
        PreprocessingContext preprocess = this.preprocessingPipeline.preprocess(this.documents, null, LanguageCode.ENGLISH);
        int[] iArr = preprocess.allStems.mostFrequentOriginalWordIndex;
        short[] sArr = preprocess.allWords.type;
        IntArrayList intArrayList = new IntArrayList(iArr.length);
        for (int i = 0; i < iArr.length; i++) {
            if ((sArr[iArr[i]] & 12290) == 0) {
                intArrayList.add(iArr[i]);
            }
        }
        preprocess.allLabels.featureIndex = intArrayList.toArray();
        preprocess.allLabels.firstPhraseIndex = -1;
        this.clusters = Lists.newArrayList();
        if (preprocess.hasLabels()) {
            VectorSpaceModelContext vectorSpaceModelContext = new VectorSpaceModelContext(preprocess);
            ReducedVectorSpaceModelContext reducedVectorSpaceModelContext = new ReducedVectorSpaceModelContext(vectorSpaceModelContext);
            this.matrixBuilder.buildTermDocumentMatrix(vectorSpaceModelContext);
            this.matrixBuilder.buildTermPhraseMatrix(vectorSpaceModelContext);
            IntIntOpenHashMap intIntOpenHashMap = new IntIntOpenHashMap();
            Iterator it = vectorSpaceModelContext.stemToRowIndex.iterator();
            while (it.hasNext()) {
                IntIntCursor intIntCursor = (IntIntCursor) it.next();
                intIntOpenHashMap.put(intIntCursor.value, intIntCursor.key);
            }
            if (this.useDimensionalityReduction) {
                this.matrixReducer.reduce(reducedVectorSpaceModelContext, this.clusterCount);
                doubleMatrix2D = reducedVectorSpaceModelContext.coefficientMatrix.viewDice();
            } else {
                doubleMatrix2D = vectorSpaceModelContext.termDocumentMatrix;
            }
            IntArrayList intArrayList2 = new IntArrayList(doubleMatrix2D.columns());
            for (int i2 = 0; i2 < doubleMatrix2D.columns(); i2++) {
                intArrayList2.add(i2);
            }
            ArrayList newArrayList = Lists.newArrayList();
            newArrayList.addAll(split(this.partitionCount, doubleMatrix2D, intArrayList2, this.maxIterations));
            boolean z = false;
            int i3 = 0;
            while (newArrayList.size() < this.clusterCount && !z) {
                int i4 = 0;
                IntArrayList intArrayList3 = (IntArrayList) newArrayList.get(0);
                z = intArrayList3.size() <= this.partitionCount * 2;
                for (int i5 = 1; i5 < newArrayList.size(); i5++) {
                    int size = ((IntArrayList) newArrayList.get(i5)).size();
                    if (size > intArrayList3.size() && size > this.partitionCount * 2) {
                        intArrayList3 = (IntArrayList) newArrayList.get(i5);
                        i4 = i5;
                        z = false;
                    }
                }
                if (z) {
                    break;
                }
                List<IntArrayList> split = split(this.partitionCount, doubleMatrix2D, intArrayList3, this.maxIterations);
                if (split.size() <= 1) {
                    i3++;
                    if (i3 >= newArrayList.size()) {
                        break;
                    }
                } else {
                    newArrayList.remove(i4);
                    newArrayList.addAll(split);
                    i3 = 0;
                }
            }
            for (int i6 = 0; i6 < newArrayList.size(); i6++) {
                Cluster cluster = new Cluster();
                IntArrayList intArrayList4 = (IntArrayList) newArrayList.get(i6);
                if (intArrayList4.size() > 1) {
                    cluster.addPhrases(getLabels(intArrayList4, vectorSpaceModelContext.termDocumentMatrix, intIntOpenHashMap, preprocess.allStems.mostFrequentOriginalWordIndex, preprocess.allWords.image));
                    for (int i7 = 0; i7 < intArrayList4.size(); i7++) {
                        cluster.addDocuments(this.documents.get(intArrayList4.get(i7)));
                    }
                    this.clusters.add(cluster);
                }
            }
        }
        Collections.sort(this.clusters, Cluster.BY_REVERSED_SIZE_AND_LABEL_COMPARATOR);
        Cluster.appendOtherTopics(this.documents, this.clusters);
    }

    /* JADX WARN: Type inference failed for: r1v10, types: [char[], char[][]] */
    private List<String> getLabels(IntArrayList intArrayList, DoubleMatrix2D doubleMatrix2D, IntIntOpenHashMap intIntOpenHashMap, int[] iArr, char[][] cArr) {
        DoubleMatrix1D make = DoubleFactory1D.dense.make(doubleMatrix2D.rows());
        Iterator it = intArrayList.iterator();
        while (it.hasNext()) {
            make.assign(doubleMatrix2D.viewColumn(((IntCursor) it.next()).value), Functions.plus);
        }
        ArrayList newArrayListWithCapacity = Lists.newArrayListWithCapacity(this.labelCount);
        double d = make.viewSorted().get(make.size() - Math.min(this.labelCount, make.size()));
        for (int i = 0; i < make.size(); i++) {
            if (make.getQuick(i) >= d) {
                newArrayListWithCapacity.add(LabelFormatter.format(new char[]{cArr[iArr[intIntOpenHashMap.get(i)]]}, new boolean[]{false}, false));
            }
        }
        return newArrayListWithCapacity;
    }

    private List<IntArrayList> split(int i, DoubleMatrix2D doubleMatrix2D, IntArrayList intArrayList, int i2) {
        DoubleMatrix2D copy = doubleMatrix2D.viewSelection((int[]) null, intArrayList.toArray()).copy();
        IntIntOpenHashMap intIntOpenHashMap = new IntIntOpenHashMap(copy.columns());
        for (int i3 = 0; i3 < intArrayList.size(); i3++) {
            intIntOpenHashMap.put(i3, intArrayList.get(i3));
        }
        ArrayList newArrayList = Lists.newArrayList();
        ArrayList arrayList = null;
        for (int i4 = 0; i4 < i; i4++) {
            newArrayList.add(new IntArrayList(copy.columns()));
        }
        DoubleMatrix2D assign = DoubleFactory2D.dense.make(copy.rows(), i).assign(copy.viewPart(0, 0, copy.rows(), i));
        DoubleMatrix2D make = DoubleFactory2D.dense.make(i, copy.columns());
        for (int i5 = 0; i5 < i2; i5++) {
            assign.zMult(copy, make, 1.0d, 0.0d, true, false);
            for (int i6 = 0; i6 < make.columns(); i6++) {
                int i7 = 0;
                double d = make.get(0, i6);
                for (int i8 = 1; i8 < make.rows(); i8++) {
                    if (d < make.get(i8, i6)) {
                        d = make.get(i8, i6);
                        i7 = i8;
                    }
                }
                ((IntArrayList) newArrayList.get(i7)).add(i6);
            }
            if (ObjectUtils.equals(arrayList, newArrayList)) {
                break;
            }
            for (int i9 = 0; i9 < newArrayList.size(); i9++) {
                IntArrayList intArrayList2 = (IntArrayList) newArrayList.get(i9);
                for (int i10 = 0; i10 < copy.rows(); i10++) {
                    double d2 = 0.0d;
                    for (int i11 = 0; i11 < intArrayList2.size(); i11++) {
                        d2 += copy.get(i10, intArrayList2.get(i11));
                    }
                    assign.setQuick(i10, i9, d2 / intArrayList2.size());
                }
            }
            if (i5 < i2 - 1) {
                arrayList = newArrayList;
                newArrayList = Lists.newArrayList();
                for (int i12 = 0; i12 < i; i12++) {
                    newArrayList.add(new IntArrayList(copy.columns()));
                }
            }
        }
        Iterator it = newArrayList.iterator();
        while (it.hasNext()) {
            IntArrayList intArrayList3 = (IntArrayList) it.next();
            if (intArrayList3.isEmpty()) {
                it.remove();
            } else {
                for (int i13 = 0; i13 < intArrayList3.size(); i13++) {
                    intArrayList3.set(i13, intIntOpenHashMap.get(intArrayList3.get(i13)));
                }
            }
        }
        return newArrayList;
    }
}
