package org.maochen.nlp.test;

import com.google.common.collect.HashBasedTable;
import com.google.common.collect.Table;
import java.util.List;
import org.apache.commons.math3.distribution.GammaDistribution;
import org.maochen.nlp.ml.Tuple;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/maochen/nlp/test/ChiSquare.class */
public class ChiSquare {
    private static final Logger LOG = LoggerFactory.getLogger(ChiSquare.class);
    public static final double EMPIRICAL_P_VALUE = 0.05d;
    protected Table<String, String, Integer> dataTable = HashBasedTable.create();
    protected Table<String, String, Double> chiSquareTable = HashBasedTable.create();
    protected int df;
    protected int total;
    public double totalChiSquare;
    public double totalPVal;

    public void loadTrainingData(List<Tuple> list) {
        for (int i = 0; i < list.size(); i++) {
            if (i % 1000 == 0) {
                LOG.debug("Processed " + i + " of " + list.size());
            }
            Tuple tuple = list.get(i);
            for (String str : tuple.featureName) {
                Integer num = (Integer) this.dataTable.get(str, tuple.label);
                this.dataTable.put(str, tuple.label, Integer.valueOf(num == null ? 1 : num.intValue() + 1));
            }
        }
    }

    public void calculateChiSquare() {
        int i;
        int i2;
        this.df = this.dataTable.rowKeySet().size() - 1;
        if (this.df == 0) {
            i = 1;
            this.df = 1;
        } else {
            i = this.df;
        }
        this.df = i;
        this.df *= this.dataTable.columnKeySet().size() - 1;
        if (this.df == 0) {
            i2 = 1;
            this.df = 1;
        } else {
            i2 = this.df;
        }
        this.df = i2;
        this.total = this.dataTable.rowMap().values().stream().map((v0) -> {
            return v0.values();
        }).map(collection -> {
            return Integer.valueOf(collection.stream().mapToInt(num -> {
                return num.intValue();
            }).sum());
        }).mapToInt(num -> {
            return num.intValue();
        }).sum();
        this.dataTable.cellSet().forEach(cell -> {
            String str = (String) cell.getRowKey();
            String str2 = (String) cell.getColumnKey();
            double sum = (this.dataTable.row(str).values().stream().mapToInt(num2 -> {
                return num2.intValue();
            }).sum() * this.dataTable.column(str2).values().stream().mapToInt(num3 -> {
                return num3.intValue();
            }).sum()) / this.total;
            this.chiSquareTable.put(str, str2, Double.valueOf(Math.pow((cell.getValue() == null ? 0 : ((Integer) cell.getValue()).intValue()) - sum, 2.0d) / sum));
        });
        this.totalChiSquare = this.chiSquareTable.cellSet().parallelStream().mapToDouble(cell2 -> {
            if (cell2.getValue() == null) {
                return 0.0d;
            }
            return ((Double) cell2.getValue()).doubleValue();
        }).sum();
        this.totalPVal = getPValue(this.totalChiSquare, this.df);
    }

    protected static double getPValue(double d, double d2) {
        return 1.0d - new GammaDistribution(d2 / 2.0d, 2.0d).cumulativeProbability(d);
    }

    public void printPTable() {
        StringBuilder sb = new StringBuilder();
        sb.append("Greater than 0.05 might be independent.").append(System.lineSeparator());
        sb.append("Total P Value: ").append(String.format("%.5f", Double.valueOf(this.totalPVal))).append(System.lineSeparator());
        System.out.println(sb.toString());
    }
}
