package org.carrot2.clustering.lingo;

import com.carrotsearch.hppc.BitSet;
import com.carrotsearch.hppc.IntArrayList;
import com.carrotsearch.hppc.IntIntOpenHashMap;
import com.carrotsearch.hppc.cursors.IntIntCursor;
import java.util.Arrays;
import java.util.Iterator;
import org.apache.mahout.math.function.Functions;
import org.apache.mahout.math.matrix.DoubleMatrix2D;
import org.carrot2.core.attribute.Processing;
import org.carrot2.text.analysis.ITokenizer;
import org.carrot2.text.preprocessing.PreprocessingContext;
import org.carrot2.text.vsm.ITermWeighting;
import org.carrot2.text.vsm.VectorSpaceModelContext;
import org.carrot2.util.GraphUtils;
import org.carrot2.util.LinearApproximation;
import org.carrot2.util.attribute.Attribute;
import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Required;
import org.carrot2.util.attribute.constraint.DoubleRange;
import org.carrot2.util.attribute.constraint.ImplementingClasses;
import org.carrot2.util.attribute.constraint.IntRange;

@Bindable(prefix = "LingoClusteringAlgorithm")
/* loaded from: input_file:org/carrot2/clustering/lingo/ClusterBuilder.class */
public class ClusterBuilder {

    @Processing
    @Input
    @Attribute
    @DoubleRange(min = 0.0d, max = 10.0d)
    public double phraseLabelBoost = 1.5d;

    @Processing
    @Input
    @Attribute
    @IntRange(min = ITokenizer.TT_NUMERIC, max = ITokenizer.TT_FILE)
    public int phraseLengthPenaltyStart = 8;

    @Processing
    @Input
    @Attribute
    @IntRange(min = ITokenizer.TT_NUMERIC, max = ITokenizer.TT_FILE)
    public int phraseLengthPenaltyStop = 8;

    @Processing
    @Input
    @Attribute
    @DoubleRange(min = 0.0d, max = 1.0d)
    public double clusterMergingThreshold = 0.7d;
    public IFeatureScorer featureScorer = null;

    @ImplementingClasses(classes = {UniqueLabelAssigner.class, SimpleLabelAssigner.class})
    @Processing
    @Required
    @Input
    @Attribute
    public ILabelAssigner labelAssigner = new UniqueLabelAssigner();
    private LinearApproximation documentSizeCoefficients = new LinearApproximation(new double[]{1.0d, 1.5d, 1.3d, 0.9d, 0.7d, 0.6d, 0.3d, 0.05d, 0.05d, 0.05d, 0.05d}, 0.0d, 1.0d);

    /* JADX INFO: Access modifiers changed from: package-private */
    public void buildLabels(LingoProcessingContext lingoProcessingContext, ITermWeighting iTermWeighting) {
        double documentCountPenalty;
        int i;
        PreprocessingContext preprocessingContext = lingoProcessingContext.preprocessingContext;
        VectorSpaceModelContext vectorSpaceModelContext = lingoProcessingContext.vsmContext;
        DoubleMatrix2D doubleMatrix2D = lingoProcessingContext.reducedVsmContext.baseMatrix;
        int[] iArr = preprocessingContext.allWords.stemIndex;
        int[] iArr2 = preprocessingContext.allLabels.featureIndex;
        int[] iArr3 = preprocessingContext.allStems.mostFrequentOriginalWordIndex;
        int[][] iArr4 = preprocessingContext.allPhrases.wordIndices;
        BitSet[] bitSetArr = preprocessingContext.allLabels.documentIndices;
        int length = preprocessingContext.allWords.image.length;
        int size = preprocessingContext.documents.size();
        BitSet bitSet = new BitSet();
        for (int i2 = 0; i2 < iArr2.length && (i = iArr2[i2]) < length; i2++) {
            bitSet.set(iArr[i]);
        }
        IntIntOpenHashMap intIntOpenHashMap = vectorSpaceModelContext.stemToRowIndex;
        IntIntOpenHashMap intIntOpenHashMap2 = new IntIntOpenHashMap();
        IntArrayList intArrayList = new IntArrayList();
        int i3 = 0;
        Iterator it = intIntOpenHashMap.iterator();
        while (it.hasNext()) {
            IntIntCursor intIntCursor = (IntIntCursor) it.next();
            if (bitSet.get(intIntCursor.key)) {
                int i4 = i3;
                i3++;
                intIntOpenHashMap2.put(i4, intIntCursor.key);
                intArrayList.add(intIntCursor.value);
            }
        }
        double[] featureScores = this.featureScorer != null ? this.featureScorer.getFeatureScores(lingoProcessingContext) : null;
        int[] iArr5 = new int[length];
        if (featureScores != null) {
            Arrays.fill(iArr5, -1);
            for (int i5 = 0; i5 < iArr2.length; i5++) {
                int i6 = iArr2[i5];
                if (i6 < length) {
                    iArr5[i6] = i5;
                }
            }
        }
        DoubleMatrix2D copy = doubleMatrix2D.viewSelection(intArrayList.toArray(), (int[]) null).copy();
        for (int i7 = 0; i7 < copy.rows(); i7++) {
            int i8 = iArr5[iArr3[intIntOpenHashMap2.get(i7)]];
            double documentCountPenalty2 = getDocumentCountPenalty(i8, size, bitSetArr);
            if (featureScores != null) {
                documentCountPenalty2 *= featureScores[i8];
            }
            copy.viewRow(i7).assign(Functions.mult(documentCountPenalty2));
        }
        DoubleMatrix2D doubleMatrix2D2 = vectorSpaceModelContext.termPhraseMatrix;
        int i9 = preprocessingContext.allLabels.firstPhraseIndex;
        DoubleMatrix2D doubleMatrix2D3 = null;
        if (doubleMatrix2D2 != null) {
            doubleMatrix2D3 = doubleMatrix2D2.zMult(doubleMatrix2D, (DoubleMatrix2D) null, 1.0d, 0.0d, false, false);
            if (this.phraseLengthPenaltyStop < this.phraseLengthPenaltyStart) {
                this.phraseLengthPenaltyStop = this.phraseLengthPenaltyStart;
            }
            double d = 1.0d / ((this.phraseLengthPenaltyStop - this.phraseLengthPenaltyStart) + 1);
            for (int i10 = 0; i10 < doubleMatrix2D3.rows(); i10++) {
                int[] iArr6 = iArr4[iArr2[i10 + i9] - length];
                if (iArr6.length >= this.phraseLengthPenaltyStop) {
                    documentCountPenalty = 0.0d;
                } else {
                    documentCountPenalty = getDocumentCountPenalty(i10 + i9, size, bitSetArr);
                    if (iArr6.length >= this.phraseLengthPenaltyStart) {
                        documentCountPenalty *= 1.0d - (d * ((iArr6.length - this.phraseLengthPenaltyStart) + 1));
                    }
                    if (featureScores != null) {
                        documentCountPenalty *= featureScores[i10 + i9];
                    }
                }
                doubleMatrix2D3.viewRow(i10).assign(Functions.mult(documentCountPenalty * this.phraseLabelBoost));
            }
        }
        this.labelAssigner.assignLabels(lingoProcessingContext, copy, intIntOpenHashMap2, doubleMatrix2D3);
    }

    private double getDocumentCountPenalty(int i, int i2, BitSet[] bitSetArr) {
        return this.documentSizeCoefficients.getValue(bitSetArr[i].cardinality() / i2);
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public void assignDocuments(LingoProcessingContext lingoProcessingContext) {
        int[] iArr = lingoProcessingContext.clusterLabelFeatureIndex;
        BitSet[] bitSetArr = new BitSet[iArr.length];
        int[] iArr2 = lingoProcessingContext.preprocessingContext.allLabels.featureIndex;
        BitSet[] bitSetArr2 = lingoProcessingContext.preprocessingContext.allLabels.documentIndices;
        IntIntOpenHashMap intIntOpenHashMap = new IntIntOpenHashMap();
        for (int i = 0; i < iArr2.length; i++) {
            intIntOpenHashMap.put(iArr2[i], i);
        }
        for (int i2 = 0; i2 < bitSetArr.length; i2++) {
            bitSetArr[i2] = bitSetArr2[intIntOpenHashMap.get(iArr[i2])];
        }
        lingoProcessingContext.clusterDocuments = bitSetArr;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public void merge(LingoProcessingContext lingoProcessingContext) {
        final BitSet[] bitSetArr = lingoProcessingContext.clusterDocuments;
        int[] iArr = lingoProcessingContext.clusterLabelFeatureIndex;
        double[] dArr = lingoProcessingContext.clusterLabelScore;
        for (IntArrayList intArrayList : GraphUtils.findCoherentSubgraphs(bitSetArr.length, new GraphUtils.IArcPredicate() { // from class: org.carrot2.clustering.lingo.ClusterBuilder.1
            private BitSet temp = new BitSet();

            @Override // org.carrot2.util.GraphUtils.IArcPredicate
            public boolean isArcPresent(int i, int i2) {
                int cardinality;
                this.temp.clear();
                BitSet bitSet = bitSetArr[i];
                BitSet bitSet2 = bitSetArr[i2];
                if (bitSet.cardinality() < bitSet2.cardinality()) {
                    this.temp.or(bitSet);
                    this.temp.intersect(bitSet2);
                    cardinality = (int) bitSet2.cardinality();
                } else {
                    this.temp.or(bitSet2);
                    this.temp.intersect(bitSet);
                    cardinality = (int) bitSet.cardinality();
                }
                return ((double) this.temp.cardinality()) / ((double) cardinality) >= ClusterBuilder.this.clusterMergingThreshold;
            }
        }, true)) {
            int i = -1;
            double d = -1.0d;
            int[] iArr2 = intArrayList.buffer;
            int size = intArrayList.size();
            for (int i2 = 0; i2 < size; i2++) {
                int i3 = iArr2[i2];
                if (dArr[i3] > d) {
                    i = i3;
                    d = dArr[i3];
                }
            }
            for (int i4 = 0; i4 < size; i4++) {
                int i5 = iArr2[i4];
                if (i5 != i) {
                    bitSetArr[i].or(bitSetArr[i5]);
                    iArr[i5] = -1;
                    bitSetArr[i5] = null;
                }
            }
        }
    }
}
