package weka.distributed;

import distributed.core.DistributedJobConfig;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Vector;
import weka.classifiers.rules.DecisionTableHashKey;
import weka.clusterers.ClusterUtils;
import weka.clusterers.PreconstructedKMeans;
import weka.core.Attribute;
import weka.core.EuclideanDistance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.NormalizableDistance;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.StreamableFilterHelper;
import weka.core.Utils;
import weka.core.stats.Stats;
import weka.filters.Filter;
import weka.filters.PreconstructedFilter;
import weka.filters.StreamableFilter;
import weka.filters.unsupervised.attribute.PreconstructedMissingValuesReplacer;

/* loaded from: input_file:weka/distributed/KMeansMapTask.class */
public class KMeansMapTask implements OptionHandler, Serializable {
    private static final long serialVersionUID = -2423639970668815722L;
    protected Instances m_headerWithSummary;
    protected Instances m_transformedHeaderNoSummary;
    protected PreconstructedFilter m_missingValuesReplacer;
    protected boolean m_dontReplaceMissing;
    protected PreconstructedFilter m_finalFullPreprocess;
    protected Instances m_centroids;
    protected List<Map<String, Stats>> m_centroidSummaryStats;
    protected PreconstructedKMeans m_kMeans;
    protected boolean m_dataIsBeingTransformed;
    protected boolean m_updateDistanceFunction;
    protected boolean m_converged;
    protected List<Filter> m_filtersToUse = new ArrayList();
    protected NormalizableDistance m_distanceFunction = new EuclideanDistance();

    public Instances init(Instances instances) throws DistributedWekaException {
        this.m_headerWithSummary = instances;
        this.m_transformedHeaderNoSummary = CSVToARFFHeaderReduceTask.stripSummaryAtts(this.m_headerWithSummary);
        Instances primingDataForDistanceFunction = ClusterUtils.getPrimingDataForDistanceFunction(this.m_headerWithSummary);
        if (!this.m_dontReplaceMissing) {
            try {
                this.m_missingValuesReplacer = new PreconstructedMissingValuesReplacer(this.m_headerWithSummary);
            } catch (Exception e) {
                throw new DistributedWekaException(e);
            }
        }
        configureFilters(this.m_transformedHeaderNoSummary);
        if (this.m_dataIsBeingTransformed) {
            this.m_distanceFunction.setInstances(this.m_finalFullPreprocess.getOutputFormat());
            this.m_updateDistanceFunction = true;
        } else {
            this.m_distanceFunction.setInstances(primingDataForDistanceFunction);
        }
        this.m_kMeans = new PreconstructedKMeans();
        try {
            this.m_kMeans.setDistanceFunction(this.m_distanceFunction);
            try {
                this.m_transformedHeaderNoSummary = applyFilters(this.m_transformedHeaderNoSummary);
                return this.m_transformedHeaderNoSummary;
            } catch (Exception e2) {
                throw new DistributedWekaException(e2);
            }
        } catch (Exception e3) {
            throw new DistributedWekaException(e3);
        }
    }

    public void setDummyDistancePrimingData(Instances instances) throws DistributedWekaException {
        if (this.m_kMeans == null) {
            throw new DistributedWekaException("Must call init() first");
        }
        this.m_distanceFunction = new EuclideanDistance();
        this.m_distanceFunction.setInstances(instances);
        try {
            this.m_kMeans.setDistanceFunction(this.m_distanceFunction);
            this.m_updateDistanceFunction = false;
        } catch (Exception e) {
            throw new DistributedWekaException(e);
        }
    }

    public void processInstance(Instance instance) throws DistributedWekaException {
        if (this.m_centroids == null) {
            throw new DistributedWekaException("No centroids set!");
        }
        try {
            Instance applyFilters = applyFilters(instance);
            Map<String, Stats> map = this.m_centroidSummaryStats.get(this.m_kMeans.clusterProcessedInstance(null, applyFilters, this.m_updateDistanceFunction, null));
            for (int i = 0; i < this.m_transformedHeaderNoSummary.numAttributes(); i++) {
                if (this.m_transformedHeaderNoSummary.attribute(i).isNominal() || this.m_transformedHeaderNoSummary.attribute(i).isNumeric()) {
                    boolean isNominal = applyFilters.attribute(i).isNominal();
                    CSVToARFFHeaderMapTask.updateSummaryStats(map, this.m_transformedHeaderNoSummary.attribute(i).name(), (applyFilters.isMissing(i) || !isNominal) ? applyFilters.value(i) : 1.0d, (applyFilters.isMissing(i) || !isNominal) ? null : applyFilters.stringValue(i), isNominal, false, false, false, 50.0d);
                }
            }
        } catch (Exception e) {
            throw new DistributedWekaException(e);
        }
    }

    public double distance(Instance instance, Instance instance2) throws DistributedWekaException {
        if (this.m_kMeans == null) {
            throw new DistributedWekaException("[KMeansMapTask] We haven't been initialized yet");
        }
        return this.m_kMeans.getDistanceFunction().distance(instance, instance2);
    }

    public List<Instances> getCentroidStats() {
        double[] errorsForClusters = this.m_kMeans.getErrorsForClusters();
        ArrayList arrayList = new ArrayList();
        for (int i = 0; i < this.m_centroids.numInstances(); i++) {
            Map<String, Stats> map = this.m_centroidSummaryStats.get(i);
            ArrayList arrayList2 = new ArrayList();
            for (int i2 = 0; i2 < this.m_transformedHeaderNoSummary.numAttributes(); i2++) {
                arrayList2.add((Attribute) this.m_transformedHeaderNoSummary.attribute(i2).copy());
            }
            boolean z = true;
            int i3 = 0;
            while (true) {
                if (i3 >= this.m_transformedHeaderNoSummary.numAttributes()) {
                    break;
                }
                if (this.m_transformedHeaderNoSummary.attribute(i3).isNominal() || this.m_transformedHeaderNoSummary.attribute(i3).isNumeric()) {
                    Stats stats = map.get(this.m_transformedHeaderNoSummary.attribute(i3).name());
                    if (stats == null) {
                        System.err.println("No instances for centroid: " + i + " " + this.m_centroids.instance(i));
                        z = false;
                        break;
                    }
                    arrayList2.add(stats.makeAttribute());
                }
                i3++;
            }
            if (z) {
                arrayList.add(new Instances("Partial stats for centroid " + i + " : " + errorsForClusters[i], arrayList2, 0));
            } else {
                arrayList.add(null);
            }
        }
        return arrayList;
    }

    protected void configureFilters(Instances instances) throws DistributedWekaException {
        ArrayList arrayList = new ArrayList();
        if (!getDontReplaceMissingValues()) {
            arrayList.add(this.m_missingValuesReplacer);
        }
        if (this.m_filtersToUse != null && this.m_filtersToUse.size() > 0) {
            Iterator<Filter> it = this.m_filtersToUse.iterator();
            while (it.hasNext()) {
                StreamableFilter streamableFilter = (Filter) it.next();
                if (!(streamableFilter instanceof StreamableFilter)) {
                    throw new DistributedWekaException("Filter " + streamableFilter.getClass().getName() + " is not a StreamableFilter!");
                }
                arrayList.add(streamableFilter);
            }
        }
        if (arrayList.size() > 0) {
            try {
                this.m_finalFullPreprocess = StreamableFilterHelper.wrapStreamableFilters(arrayList);
                if (arrayList.size() > 1) {
                    this.m_dataIsBeingTransformed = true;
                }
            } catch (Exception e) {
                throw new DistributedWekaException(e);
            }
        }
        if (this.m_finalFullPreprocess != null) {
            try {
                this.m_finalFullPreprocess.setInputFormat(instances);
            } catch (Exception e2) {
                throw new DistributedWekaException(e2);
            }
        }
    }

    public String dontReplaceMissingValuesTipText() {
        return "Don't replace missing values globally with mean/mode.";
    }

    public void setDontReplaceMissingValues(boolean z) {
        this.m_dontReplaceMissing = z;
    }

    public boolean getDontReplaceMissingValues() {
        return this.m_dontReplaceMissing;
    }

    public NormalizableDistance getDistanceFunction() {
        return this.m_distanceFunction;
    }

    public void setCentroids(Instances instances) {
        this.m_centroids = instances;
        this.m_kMeans.setClusterCentroids(instances);
        this.m_centroidSummaryStats = new ArrayList();
        for (int i = 0; i < instances.numInstances(); i++) {
            this.m_centroidSummaryStats.add(new HashMap());
        }
    }

    public Instances applyFilters(Instances instances) throws Exception {
        Instances instances2 = instances;
        if (this.m_finalFullPreprocess != null) {
            instances2 = new Instances(this.m_finalFullPreprocess.getOutputFormat(), 0);
            for (int i = 0; i < instances.numInstances(); i++) {
                this.m_finalFullPreprocess.input(instances.instance(i));
                instances2.add(this.m_finalFullPreprocess.output());
            }
        }
        return instances2;
    }

    public Instance applyFilters(Instance instance) throws Exception {
        Instance instance2 = instance;
        if (this.m_finalFullPreprocess != null) {
            this.m_finalFullPreprocess.input(instance2);
            instance2 = this.m_finalFullPreprocess.output();
        }
        return instance2;
    }

    public Filter getPreprocessingFilters() {
        return this.m_finalFullPreprocess;
    }

    public void setConverged(boolean z) {
        this.m_converged = z;
    }

    public boolean getConverged() {
        return this.m_converged;
    }

    public Instances getTransformedHeader() {
        return this.m_transformedHeaderNoSummary;
    }

    public Filter[] getFiltersToUse() {
        ArrayList arrayList = new ArrayList();
        for (Filter filter : this.m_filtersToUse) {
            if (!(filter instanceof PreconstructedFilter)) {
                arrayList.add(filter);
            }
        }
        Filter[] filterArr = new Filter[arrayList.size()];
        int i = 0;
        for (Filter filter2 : this.m_filtersToUse) {
            if (!(filter2 instanceof PreconstructedFilter)) {
                int i2 = i;
                i++;
                filterArr[i2] = filter2;
            }
        }
        return filterArr;
    }

    public void setFiltersToUse(Filter[] filterArr) {
        this.m_filtersToUse.clear();
        if (filterArr == null || filterArr.length <= 0) {
            return;
        }
        for (Filter filter : filterArr) {
            if (!(filter instanceof PreconstructedFilter) && (filter instanceof StreamableFilter)) {
                this.m_filtersToUse.add(filter);
            }
        }
    }

    public String filtersToUseTipText() {
        return "Filters to pre-process the data with before passing it to k-means. Note that only StreamableFilters can be used.";
    }

    public Enumeration<Option> listOptions() {
        Vector vector = new Vector();
        vector.addElement(new Option("\tDon't replace missing values with mean/mode when running in batch mode.", "dont-replace-missing", 0, "-dont-replace-missing"));
        Enumeration<Option> listOptions = StreamableFilterHelper.listOptions();
        while (listOptions.hasMoreElements()) {
            vector.add(listOptions.nextElement());
        }
        return vector.elements();
    }

    public void setOptions(String[] strArr) throws Exception {
        setDontReplaceMissingValues(Utils.getFlag("dont-replace-missing", strArr));
        this.m_filtersToUse = new ArrayList();
        while (true) {
            String option = Utils.getOption("filter", strArr);
            if (DistributedJobConfig.isEmpty(option)) {
                return;
            }
            String[] splitOptions = Utils.splitOptions(option);
            if (splitOptions.length == 0) {
                throw new IllegalArgumentException("Invalid filter specification string");
            }
            OptionHandler optionHandler = (Filter) Class.forName(splitOptions[0]).newInstance();
            splitOptions[0] = "";
            if (optionHandler instanceof OptionHandler) {
                optionHandler.setOptions(splitOptions);
            }
            this.m_filtersToUse.add(optionHandler);
        }
    }

    public String[] getOptions() {
        ArrayList arrayList = new ArrayList();
        if (getDontReplaceMissingValues()) {
            arrayList.add("-dont-replace-missing");
        }
        if (this.m_filtersToUse != null) {
            for (Filter filter : this.m_filtersToUse) {
                arrayList.add("-filter");
                arrayList.add(StreamableFilterHelper.getFilterSpec(filter));
            }
        }
        return (String[]) arrayList.toArray(new String[arrayList.size()]);
    }

    public static List<Instances> assignStartPointsFromList(int i, int i2, List<Instance> list, Instances instances) throws DistributedWekaException {
        ArrayList arrayList = new ArrayList();
        for (int i3 = 0; i3 < i; i3++) {
            HashSet hashSet = new HashSet();
            Instances instances2 = new Instances(instances, 0);
            for (int size = list.size() - 1; instances2.numInstances() < i2 && size >= 0; size--) {
                Instance instance = list.get(size);
                if (!hashSet.contains(instance)) {
                    instances2.add(instance);
                    try {
                        hashSet.add(new DecisionTableHashKey(instance, instances.numAttributes(), true));
                        if (size == list.size() - 1) {
                            list.remove(list.size() - 1);
                        } else {
                            list.set(size, list.get(list.size() - 1));
                            list.remove(list.size() - 1);
                        }
                    } catch (Exception e) {
                        throw new DistributedWekaException(e);
                    }
                }
            }
            if (instances2.numInstances() == 0) {
                throw new DistributedWekaException("Unable to find distinct initial centers!");
            }
            arrayList.add(instances2);
        }
        return arrayList;
    }
}
