package weka.distributed;

import com.clearspring.analytics.stream.quantile.TDigest;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import weka.core.Attribute;
import weka.core.Instances;
import weka.core.Utils;
import weka.core.stats.ArffSummaryNumericMetric;
import weka.core.stats.NominalStats;
import weka.core.stats.NumericAttributeBinData;
import weka.core.stats.NumericStats;
import weka.core.stats.QuantileCalculator;
import weka.core.stats.Stats;
import weka.core.stats.StringStats;
import weka.distributed.CSVToARFFHeaderMapTask;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.Remove;

/* loaded from: input_file:weka/distributed/CSVToARFFHeaderReduceTask.class */
public class CSVToARFFHeaderReduceTask implements Serializable {
    private static final long serialVersionUID = -2626548935034818537L;

    public Instances aggregateHeadersAndQuartiles(List<CSVToARFFHeaderMapTask.HeaderAndQuantileDataHolder> list) throws DistributedWekaException {
        ArrayList arrayList = new ArrayList();
        Iterator<CSVToARFFHeaderMapTask.HeaderAndQuantileDataHolder> it = list.iterator();
        while (it.hasNext()) {
            arrayList.add(it.next().getHeader());
        }
        Instances aggregate = aggregate(arrayList);
        ArrayList arrayList2 = new ArrayList();
        Instances stripSummaryAtts = stripSummaryAtts(aggregate);
        for (int i = 0; i < stripSummaryAtts.numAttributes(); i++) {
            arrayList2.add((Attribute) stripSummaryAtts.attribute(i).copy());
        }
        for (int i2 = 0; i2 < stripSummaryAtts.numAttributes(); i2++) {
            if (stripSummaryAtts.attribute(i2).isNumeric()) {
                String name = stripSummaryAtts.attribute(i2).name();
                ArrayList arrayList3 = new ArrayList();
                Iterator<CSVToARFFHeaderMapTask.HeaderAndQuantileDataHolder> it2 = list.iterator();
                while (it2.hasNext()) {
                    try {
                        TDigest quantileEstimator = it2.next().getQuantileEstimator(name);
                        if (quantileEstimator != null) {
                            arrayList3.add(quantileEstimator);
                        } else {
                            System.err.println("[CSVReducer] Partial quantile estimator for attribute '" + name + "' is null!");
                        }
                    } catch (DistributedWekaException e) {
                        System.err.println("[CSVReducer] No partial quantile estimator for attribute '" + name + "'");
                    }
                }
                Attribute attribute = (Attribute) aggregate.attribute(CSVToARFFHeaderMapTask.ARFF_SUMMARY_ATTRIBUTE_PREFIX + stripSummaryAtts.attribute(i2).name()).copy();
                if (arrayList3.size() > 0) {
                    TDigest merge = TDigest.merge(((TDigest) arrayList3.get(0)).compression(), arrayList3);
                    NumericStats attributeToStats = NumericStats.attributeToStats(attribute);
                    attributeToStats.setQuantileEstimator(merge);
                    attributeToStats.computeQuartilesAndHistogram();
                    arrayList2.add(attributeToStats.makeAttribute());
                } else {
                    arrayList2.add(attribute);
                }
            } else {
                Attribute attribute2 = (Attribute) aggregate.attribute(CSVToARFFHeaderMapTask.ARFF_SUMMARY_ATTRIBUTE_PREFIX + stripSummaryAtts.attribute(i2).name()).copy();
                if (attribute2 != null) {
                    arrayList2.add(attribute2);
                }
            }
        }
        return new Instances(aggregate.relationName(), arrayList2, 0);
    }

    public Instances aggregate(List<Instances> list) throws DistributedWekaException {
        if (list.size() == 0) {
            throw new DistributedWekaException("Nothing to aggregate!");
        }
        Instances instances = list.get(0);
        Instances stripSummaryAtts = stripSummaryAtts(instances);
        HashMap hashMap = new HashMap();
        for (int i = 1; i < list.size(); i++) {
            Instances stripSummaryAtts2 = stripSummaryAtts(list.get(i));
            if (stripSummaryAtts2.numAttributes() != stripSummaryAtts.numAttributes()) {
                System.err.println("Master:\n\n" + stripSummaryAtts);
                System.err.println("\nTo aggregate # " + (i + 1) + ":\n\n" + stripSummaryAtts2);
                throw new DistributedWekaException("Number of attributes differ between headers to aggregate!");
            }
            for (int i2 = 0; i2 < stripSummaryAtts.numAttributes(); i2++) {
                if (!stripSummaryAtts2.attribute(i2).name().equals(stripSummaryAtts.attribute(i2).name())) {
                    System.err.println("Master:\n\n" + stripSummaryAtts);
                    System.err.println("\nTo aggregate # " + (i + 1) + ":\n\n" + stripSummaryAtts2);
                    throw new DistributedWekaException("Attribute names differ in headers to aggregate: att (master) '" + stripSummaryAtts.attribute(i2).name() + "' != att (to aggregate) '" + stripSummaryAtts2.attribute(i2).name() + "' at pos " + (i2 + 1));
                }
                if (stripSummaryAtts2.attribute(i2).type() != stripSummaryAtts.attribute(i2).type()) {
                    System.err.println("Master:\n\n" + instances);
                    System.err.println("\nTo aggregate # " + (i + 1) + ":\n\n" + stripSummaryAtts2);
                    boolean z = false;
                    if (stripSummaryAtts.attribute(i2).isNumeric()) {
                        if (stripSummaryAtts2.attribute(i2).isNominal() || stripSummaryAtts2.attribute(i2).isString()) {
                            hashMap.put(new Integer(i2), stripSummaryAtts2.attribute(i2));
                        } else {
                            z = true;
                        }
                    } else if (!stripSummaryAtts2.attribute(i2).isNumeric()) {
                        z = true;
                    } else if (!stripSummaryAtts.attribute(i2).isNominal() && !stripSummaryAtts.attribute(i2).isString()) {
                        z = true;
                    }
                    if (z) {
                        throw new DistributedWekaException("Types differ in headers to aggregate: att (master) '" + stripSummaryAtts.attribute(i2).name() + "' (" + Attribute.typeToString(stripSummaryAtts.attribute(i2)) + ") != att (to aggregate) '" + stripSummaryAtts2.attribute(i2).name() + "' (" + Attribute.typeToString(stripSummaryAtts2.attribute(i2)) + ") at pos " + (i2 + 1));
                    }
                }
            }
        }
        if (hashMap.size() > 0) {
            ArrayList arrayList = new ArrayList();
            for (int i3 = 0; i3 < stripSummaryAtts.numAttributes(); i3++) {
                Attribute attribute = (Attribute) stripSummaryAtts.attribute(i3).copy();
                if (hashMap.get(new Integer(i3)) != null) {
                    if (((Attribute) hashMap.get(new Integer(i3))).isNominal()) {
                        ArrayList arrayList2 = new ArrayList();
                        arrayList2.add("Dummy");
                        attribute = new Attribute(attribute.name(), arrayList2);
                    } else {
                        attribute = new Attribute(attribute.name(), (List) null);
                    }
                }
                arrayList.add(attribute);
            }
            stripSummaryAtts = new Instances(instances.relationName(), arrayList, 0);
        }
        ArrayList arrayList3 = new ArrayList();
        for (int i4 = 0; i4 < stripSummaryAtts.numAttributes(); i4++) {
            if (stripSummaryAtts.attribute(i4).isNominal()) {
                ArrayList arrayList4 = new ArrayList();
                Iterator<Instances> it = list.iterator();
                while (it.hasNext()) {
                    Attribute attribute2 = it.next().attribute(i4);
                    if (!attribute2.isNumeric()) {
                        for (int i5 = 0; i5 < attribute2.numValues(); i5++) {
                            if (!arrayList4.contains(attribute2.value(i5))) {
                                arrayList4.add(attribute2.value(i5));
                            }
                        }
                    }
                }
                Collections.sort(arrayList4);
                arrayList3.add(new Attribute(stripSummaryAtts.attribute(i4).name(), arrayList4));
            } else if (stripSummaryAtts.attribute(i4).isString()) {
                arrayList3.add(new Attribute(stripSummaryAtts.attribute(i4).name(), (List) null));
            } else if (stripSummaryAtts.attribute(i4).isDate()) {
                arrayList3.add(new Attribute(stripSummaryAtts.attribute(i4).name(), stripSummaryAtts.attribute(i4).getDateFormat()));
            } else if (stripSummaryAtts.attribute(i4).isRelationValued()) {
                Instances relation = stripSummaryAtts.attribute(i4).relation();
                for (int i6 = 1; i6 < list.size(); i6++) {
                    if (relation.equalHeadersMsg(list.get(i6).attribute(i4).relation()) != null) {
                        throw new DistributedWekaException("Relational attribute '" + instances.attribute(i4).name() + "' differs in structure amongs the headers to be aggregated!");
                    }
                }
                arrayList3.add(new Attribute(stripSummaryAtts.attribute(i4).name(), new Instances(relation, 0)));
            } else {
                arrayList3.add(new Attribute(stripSummaryAtts.attribute(i4).name()));
            }
        }
        Iterator<Attribute> it2 = aggregateSummaryStats(list, stripSummaryAtts).iterator();
        while (it2.hasNext()) {
            arrayList3.add(it2.next());
        }
        return new Instances(instances.relationName(), arrayList3, 0);
    }

    public static Instances stripSummaryAtts(Instances instances) throws DistributedWekaException {
        int i = 0;
        int i2 = 0;
        while (true) {
            if (i2 >= instances.numAttributes()) {
                break;
            }
            if (instances.attribute(i2).name().startsWith(CSVToARFFHeaderMapTask.ARFF_SUMMARY_ATTRIBUTE_PREFIX)) {
                i = i2 + 1;
                break;
            }
            i2++;
        }
        if (i > 0) {
            Remove remove = new Remove();
            remove.setAttributeIndices("" + i + "-last");
            try {
                remove.setInputFormat(instances);
                instances = Filter.useFilter(instances, remove);
            } catch (Exception e) {
                throw new DistributedWekaException(e);
            }
        }
        return instances;
    }

    public static Instances updateSummaryAttsWithQuartilesAndHistograms(Instances instances, QuantileCalculator quantileCalculator, Map<Integer, NumericAttributeBinData> map) throws DistributedWekaException {
        Instances stripSummaryAtts = stripSummaryAtts(instances);
        ArrayList arrayList = new ArrayList();
        for (int i = 0; i < stripSummaryAtts.numAttributes(); i++) {
            arrayList.add((Attribute) stripSummaryAtts.attribute(i).copy());
        }
        for (int i2 = 0; i2 < stripSummaryAtts.numAttributes(); i2++) {
            String name = stripSummaryAtts.attribute(i2).name();
            Attribute attribute = instances.attribute(CSVToARFFHeaderMapTask.ARFF_SUMMARY_ATTRIBUTE_PREFIX + name);
            if (stripSummaryAtts.attribute(i2).isNumeric()) {
                NumericStats attributeToStats = NumericStats.attributeToStats(attribute);
                try {
                    double[] quantiles = quantileCalculator.getQuantiles(name);
                    attributeToStats.getStats()[ArffSummaryNumericMetric.FIRSTQUARTILE.ordinal()] = quantiles[0];
                    attributeToStats.getStats()[ArffSummaryNumericMetric.MEDIAN.ordinal()] = quantiles[1];
                    attributeToStats.getStats()[ArffSummaryNumericMetric.THIRDQUARTILE.ordinal()] = quantiles[2];
                    NumericAttributeBinData numericAttributeBinData = map.get(Integer.valueOf(i2));
                    if (!numericAttributeBinData.getAttributeName().equals(name)) {
                        throw new DistributedWekaException("Histogram data at index " + i2 + "(" + numericAttributeBinData.getAttributeName() + ") does not match quantile data (" + name + ")!");
                        break;
                    }
                    attributeToStats.setHistogramData(numericAttributeBinData.getBinLabels(), numericAttributeBinData.getBinFreqs());
                    arrayList.add(attributeToStats.makeAttribute());
                } catch (Exception e) {
                    System.err.println(e);
                    arrayList.add(attribute);
                }
            } else if (attribute != null) {
                arrayList.add((Attribute) attribute.copy());
            }
        }
        return new Instances("Updated with quartiles", arrayList, 0);
    }

    public static boolean headerContainsNumericAttributes(Instances instances) throws DistributedWekaException {
        Instances stripSummaryAtts = stripSummaryAtts(instances);
        boolean z = false;
        int i = 0;
        while (true) {
            if (i >= stripSummaryAtts.numAttributes()) {
                break;
            }
            if (stripSummaryAtts.attribute(i).isNumeric()) {
                z = true;
                break;
            }
            i++;
        }
        return z;
    }

    public static boolean headerContainsQuartiles(Instances instances) throws DistributedWekaException {
        Instances stripSummaryAtts = stripSummaryAtts(instances);
        boolean z = false;
        int i = 0;
        while (true) {
            if (i >= stripSummaryAtts.numAttributes()) {
                break;
            }
            if (stripSummaryAtts.attribute(i).isNumeric()) {
                if (!Utils.isMissingValue(ArffSummaryNumericMetric.FIRSTQUARTILE.valueFromAttribute(instances.attribute(CSVToARFFHeaderMapTask.ARFF_SUMMARY_ATTRIBUTE_PREFIX + stripSummaryAtts.attribute(i).name())))) {
                    z = true;
                    break;
                }
            }
            i++;
        }
        return z;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static double[] attributeToStatsArray(Attribute attribute) throws IllegalArgumentException {
        return NumericStats.attributeToStats(attribute).getStats();
    }

    protected static List<Attribute> aggregateSummaryStats(List<Instances> list, Instances instances) throws DistributedWekaException {
        ArrayList arrayList = new ArrayList();
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        int i = -1;
        for (Instances instances2 : list) {
            int i2 = 0;
            while (true) {
                if (i2 >= instances2.numAttributes()) {
                    break;
                }
                if (instances2.attribute(i2).name().startsWith(CSVToARFFHeaderMapTask.ARFF_SUMMARY_ATTRIBUTE_PREFIX)) {
                    i = i2;
                    break;
                }
                i2++;
            }
        }
        if (i < 0) {
            return arrayList;
        }
        for (Instances instances3 : list) {
            for (int i3 = i; i3 < instances3.numAttributes(); i3++) {
                Attribute attribute = instances3.attribute(i3);
                Attribute attribute2 = instances.attribute(attribute.name().replace(CSVToARFFHeaderMapTask.ARFF_SUMMARY_ATTRIBUTE_PREFIX, ""));
                if (attribute2 == null) {
                    throw new DistributedWekaException("Can't find corresponding original attribute for summary stats attribute '" + attribute.name() + "'");
                }
                if (attribute2.isNumeric()) {
                    double[] attributeToStatsArray = attributeToStatsArray(attribute);
                    NumericStats numericStats = (NumericStats) linkedHashMap.get(attribute2.name());
                    if (numericStats == null) {
                        NumericStats numericStats2 = new NumericStats(attribute2.name());
                        numericStats2.setStats(attributeToStatsArray);
                        linkedHashMap.put(attribute2.name(), numericStats2);
                    } else {
                        for (ArffSummaryNumericMetric arffSummaryNumericMetric : ArffSummaryNumericMetric.values()) {
                            if (arffSummaryNumericMetric == ArffSummaryNumericMetric.COUNT || arffSummaryNumericMetric == ArffSummaryNumericMetric.SUM || arffSummaryNumericMetric == ArffSummaryNumericMetric.SUMSQ || arffSummaryNumericMetric == ArffSummaryNumericMetric.MISSING) {
                                double[] stats = numericStats.getStats();
                                int ordinal = arffSummaryNumericMetric.ordinal();
                                stats[ordinal] = stats[ordinal] + attributeToStatsArray[arffSummaryNumericMetric.ordinal()];
                            } else if (arffSummaryNumericMetric == ArffSummaryNumericMetric.MIN) {
                                if (attributeToStatsArray[arffSummaryNumericMetric.ordinal()] < numericStats.getStats()[arffSummaryNumericMetric.ordinal()]) {
                                    numericStats.getStats()[arffSummaryNumericMetric.ordinal()] = attributeToStatsArray[arffSummaryNumericMetric.ordinal()];
                                }
                            } else if (arffSummaryNumericMetric == ArffSummaryNumericMetric.MAX && attributeToStatsArray[arffSummaryNumericMetric.ordinal()] > numericStats.getStats()[arffSummaryNumericMetric.ordinal()]) {
                                numericStats.getStats()[arffSummaryNumericMetric.ordinal()] = attributeToStatsArray[arffSummaryNumericMetric.ordinal()];
                            }
                        }
                    }
                } else if (attribute2.isNominal()) {
                    NominalStats nominalStats = (NominalStats) linkedHashMap.get(attribute2.name());
                    if (nominalStats == null) {
                        nominalStats = new NominalStats(attribute2.name());
                        linkedHashMap.put(attribute2.name(), nominalStats);
                    }
                    if (attribute.numValues() == ArffSummaryNumericMetric.values().length && attribute.value(ArffSummaryNumericMetric.COUNT.ordinal()).startsWith(ArffSummaryNumericMetric.COUNT.toString()) && attribute.value(ArffSummaryNumericMetric.STDDEV.ordinal()).startsWith(ArffSummaryNumericMetric.STDDEV.toString())) {
                        nominalStats.add(null, attributeToStatsArray(attribute)[ArffSummaryNumericMetric.MISSING.ordinal()]);
                    } else {
                        for (int i4 = 0; i4 < attribute.numValues(); i4++) {
                            String value = attribute.value(i4);
                            String substring = value.substring(0, value.lastIndexOf("_"));
                            try {
                                double parseDouble = Double.parseDouble(value.substring(value.lastIndexOf("_") + 1, value.length()));
                                if (substring.equals(NominalStats.MISSING_LABEL)) {
                                    nominalStats.add(null, parseDouble);
                                } else {
                                    nominalStats.add(substring, parseDouble);
                                }
                            } catch (NumberFormatException e) {
                                throw new DistributedWekaException(e);
                            }
                        }
                    }
                } else if (attribute2.isString()) {
                    StringStats attributeToStats = StringStats.attributeToStats(attribute);
                    StringStats stringStats = (StringStats) linkedHashMap.get(attribute2.name());
                    if (stringStats == null) {
                        linkedHashMap.put(attribute2.name(), attributeToStats);
                    } else {
                        for (ArffSummaryNumericMetric arffSummaryNumericMetric2 : ArffSummaryNumericMetric.values()) {
                            if (arffSummaryNumericMetric2 == ArffSummaryNumericMetric.COUNT || arffSummaryNumericMetric2 == ArffSummaryNumericMetric.SUM || arffSummaryNumericMetric2 == ArffSummaryNumericMetric.SUMSQ || arffSummaryNumericMetric2 == ArffSummaryNumericMetric.MISSING) {
                                double[] stats2 = stringStats.getStringLengthStats().getStats();
                                int ordinal2 = arffSummaryNumericMetric2.ordinal();
                                stats2[ordinal2] = stats2[ordinal2] + attributeToStats.getStringLengthStats().getStats()[arffSummaryNumericMetric2.ordinal()];
                                double[] stats3 = stringStats.getWordCountStats().getStats();
                                int ordinal3 = arffSummaryNumericMetric2.ordinal();
                                stats3[ordinal3] = stats3[ordinal3] + attributeToStats.getWordCountStats().getStats()[arffSummaryNumericMetric2.ordinal()];
                            } else if (arffSummaryNumericMetric2 == ArffSummaryNumericMetric.MIN) {
                                if (attributeToStats.getStringLengthStats().getStats()[arffSummaryNumericMetric2.ordinal()] < stringStats.getStringLengthStats().getStats()[arffSummaryNumericMetric2.ordinal()]) {
                                    stringStats.getStringLengthStats().getStats()[arffSummaryNumericMetric2.ordinal()] = attributeToStats.getStringLengthStats().getStats()[arffSummaryNumericMetric2.ordinal()];
                                    stringStats.getWordCountStats().getStats()[arffSummaryNumericMetric2.ordinal()] = attributeToStats.getWordCountStats().getStats()[arffSummaryNumericMetric2.ordinal()];
                                }
                            } else if (arffSummaryNumericMetric2 == ArffSummaryNumericMetric.MAX && attributeToStats.getStringLengthStats().getStats()[arffSummaryNumericMetric2.ordinal()] > stringStats.getStringLengthStats().getStats()[arffSummaryNumericMetric2.ordinal()]) {
                                stringStats.getStringLengthStats().getStats()[arffSummaryNumericMetric2.ordinal()] = attributeToStats.getStringLengthStats().getStats()[arffSummaryNumericMetric2.ordinal()];
                                stringStats.getWordCountStats().getStats()[arffSummaryNumericMetric2.ordinal()] = attributeToStats.getWordCountStats().getStats()[arffSummaryNumericMetric2.ordinal()];
                            }
                        }
                    }
                }
            }
        }
        Iterator it = linkedHashMap.entrySet().iterator();
        while (it.hasNext()) {
            Stats stats4 = (Stats) ((Map.Entry) it.next()).getValue();
            if (stats4 instanceof NumericStats) {
                ((NumericStats) stats4).computeDerived();
            } else if (stats4 instanceof StringStats) {
                ((StringStats) stats4).computeDerived();
            }
            arrayList.add(stats4.makeAttribute());
        }
        return arrayList;
    }

    public static void main(String[] strArr) {
        try {
            ArrayList arrayList = new ArrayList();
            for (String str : strArr) {
                if (str != null && str.length() > 0) {
                    arrayList.add(new Instances(new Instances(new BufferedReader(new FileReader(str))), 0));
                }
            }
            if (arrayList.size() > 0) {
                System.out.println("Aggregated header\n\n" + new CSVToARFFHeaderReduceTask().aggregate(arrayList).toString());
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
