package weka.distributed.hadoop;

import distributed.core.DistributedJob;
import distributed.core.DistributedJobConfig;
import distributed.hadoop.HDFSUtils;
import distributed.hadoop.MapReduceJobConfig;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.Vector;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import weka.classifiers.Classifier;
import weka.core.Attribute;
import weka.core.CommandlineRunnable;
import weka.core.Environment;
import weka.core.Instances;
import weka.core.Option;
import weka.core.Utils;
import weka.distributed.CSVToARFFHeaderMapTask;
import weka.distributed.CSVToARFFHeaderReduceTask;
import weka.distributed.DistributedWekaException;
import weka.distributed.WekaClassifierMapTask;
import weka.gui.beans.ClassifierProducer;

/* loaded from: input_file:weka/distributed/hadoop/WekaClassifierHadoopJob.class */
public class WekaClassifierHadoopJob extends HadoopJob implements ClassifierProducer, CommandlineRunnable {
    private static final long serialVersionUID = 5266821649358242686L;
    protected static final String OUTPUT_SUBDIR = "/model";
    protected int m_numIterations;
    protected String m_modelName;
    protected String m_pathToPreconstructedFilter;
    protected String m_classIndex;
    protected String m_wekaClassifierMapTaskOpts;
    protected String m_wekaCsvToArffMapTaskOpts;
    protected ArffHeaderHadoopJob m_arffHeaderJob;
    protected String m_hdfsPathToAggregatedClassifier;
    protected String m_minTrainingFraction;
    protected boolean m_createRandomizedDataChunks;
    protected String m_numDataChunks;
    protected String m_numInstancesPerDataChunk;
    protected MapReduceJobConfig m_randomizeConfig;
    protected Classifier m_finalClassifier;
    protected boolean m_runArffJob;

    public WekaClassifierHadoopJob() {
        super("Weka classifier builder job", "Build an aggregated Weka classifier");
        this.m_numIterations = 1;
        this.m_modelName = "outputModel.model";
        this.m_pathToPreconstructedFilter = "";
        this.m_classIndex = "";
        this.m_wekaClassifierMapTaskOpts = "";
        this.m_wekaCsvToArffMapTaskOpts = "";
        this.m_arffHeaderJob = new ArffHeaderHadoopJob();
        this.m_hdfsPathToAggregatedClassifier = "";
        this.m_minTrainingFraction = "";
        this.m_numDataChunks = "";
        this.m_numInstancesPerDataChunk = "";
        this.m_randomizeConfig = new MapReduceJobConfig();
        this.m_runArffJob = true;
        this.m_mrConfig.setMapperClass(WekaClassifierHadoopMapper.class.getName());
        this.m_mrConfig.setReducerClass(WekaClassifierHadoopReducer.class.getName());
    }

    public String globalInfo() {
        return "Trains a classifier - produces a single model of the same type for Aggregateable classifiers or a voted ensemble of the base classifiers if they are not directly aggregateable";
    }

    public String modelFileNameTipText() {
        return "The name only (not full path) that the model should be saved to";
    }

    public void setModelFileName(String str) {
        this.m_modelName = str;
    }

    public String getModelFileName() {
        return this.m_modelName;
    }

    public String numIterationsTipText() {
        return "The number of iterations to run in the model building phase - >1 only makes sense for incremental classifiers such as SGD.";
    }

    public void setNumIterations(int i) {
        this.m_numIterations = i;
    }

    public int getNumIterations() {
        return this.m_numIterations;
    }

    public String classAttributeTipText() {
        return "The name or index of the class attribute. 'first' and 'last' may also be used.";
    }

    public void setClassAttribute(String str) {
        this.m_classIndex = str;
    }

    public String getClassAttribute() {
        return this.m_classIndex;
    }

    public void setCSVMapTaskOptions(String str) {
        this.m_wekaCsvToArffMapTaskOpts = str;
    }

    public String getCSVMapTaskOptions() {
        return this.m_wekaCsvToArffMapTaskOpts;
    }

    public void setClassifierMapTaskOptions(String str) {
        this.m_wekaClassifierMapTaskOpts = str;
    }

    public String getClassifierMapTaskOptions() {
        return this.m_wekaClassifierMapTaskOpts;
    }

    public String minTrainingFraction() {
        return "The fraction of training instances below which a model learned by a map task will be discarded from the aggregation. This is a percentage of the total number of instances seen by the map task that has seen the mpst data. This option is useful when not using randomly shuffled data chunks, as there may be one input slit that contains significantly less data than all the others, and we might want to discard the model learned on this one.";
    }

    public void setMinTrainingFraction(String str) {
        this.m_minTrainingFraction = str;
    }

    public String getMinTrainingFraction() {
        return this.m_minTrainingFraction;
    }

    public String pathToPreconstructedFilterTipText() {
        return "The path to a (optional) pre-constructed filter to use. The filter may reside in HDFS or on the local file system.";
    }

    public void setPathToPreconstructedFilter(String str) {
        this.m_pathToPreconstructedFilter = str;
    }

    public String getPathToPreconstructedFilter() {
        return this.m_pathToPreconstructedFilter;
    }

    public String createRandomizedDataChunksTipText() {
        return "Create randomly shuffled (and stratified if class is nominal) data chunks. This involves an extra pass (job) over the data. One of numRandomizedDataChunks or numInstancesPerRandomizedDataChunk must be set in conjunction with this option.";
    }

    public void setCreateRandomizedDataChunks(boolean z) {
        this.m_createRandomizedDataChunks = z;
    }

    public boolean getCreateRandomizedDataChunks() {
        return this.m_createRandomizedDataChunks;
    }

    public String numRandomizedDataChunksTipText() {
        return "The number of randomly shuffled data chunks to create. Use in conjunction with createRandomizedDataChunks";
    }

    public void setNumRandomizedDataChunks(String str) {
        this.m_numDataChunks = str;
    }

    public String getNumRandomizedDataChunks() {
        return this.m_numDataChunks;
    }

    public String numInstancesPerRandomizedDataChunkTipText() {
        return "The number of instances that each randomly shuffled data chunk should contain. Use in conjunction with createRandomizedDataChunks.";
    }

    public void setNumInstancesPerRandomizedDataChunk(String str) {
        this.m_numInstancesPerDataChunk = str;
    }

    public String getNumInstancesPerRandomizedDataChunk() {
        return this.m_numInstancesPerDataChunk;
    }

    @Override // weka.distributed.hadoop.HadoopJob
    public Enumeration<Option> listOptions() {
        Vector vector = new Vector();
        vector.add(new Option("\tCreate data chunks with the order of the input instances\n\tshuffled randomly. Also stratifies the data if the class\n\tis nominal. Use in conjuction with -num-chunks or -num-instances-per-chunk", "randomized-chunks", 0, "-randomized-chunks"));
        vector.add(new Option("\tNumber of randomized data chunks. Use in conjunction with\n\t-randomized-chunks", "num-chunks", 1, "-num-chunks <integer>"));
        vector.add(new Option("\tNumber of instances per randomized data chunk.\n\tUse in conjunction with -randomized-chunks", "num-instances-per-chunk", 1, "-num-instances-per-chunk <integer>"));
        vector.add(new Option("\tName of output model file. Model will be\n\twritten to output-path/model/<model name>", "model-file-name", 1, "-model-file-name <model-name>"));
        vector.add(new Option("\tNumber of iterations over the data (default = 1).\n\tMore than 1 iteration only makes sense for classifiers such\n\tas SGD and SGDText", "num-iterations", 1, "-num-iterations <num>"));
        vector.add(new Option("\tClass index (1-based) or class attribute name (default = last attribute).", "class", 1, "-class <index or name>"));
        vector.add(new Option("\tMinimum allowable fraction of the largest number of training instances\n\tprocessed by any classifier on its split of the data. Any classifier\n\tthat sees less than this percentage of instances on its data set\n\twill be discarded and not form part of the final aggregation\n\t(default = 0.5)", "aggregation-fraction", 1, "-aggregation-fraction <fraction>"));
        vector.add(new Option("\tPath to a serialized pre-constructed filter to use on the data.\n\tThe filter may reside locally or in HDFS.", "preconstructed-filter", 1, "-preconstructed-filter <path to filter>"));
        Enumeration listOptions = new WekaClassifierMapTask().listOptions();
        while (listOptions.hasMoreElements()) {
            vector.add(listOptions.nextElement());
        }
        vector.add(new Option("", "", 0, "\nOptions specific to ARFF training header creation:"));
        Enumeration<Option> listOptions2 = new ArffHeaderHadoopJob().listOptions();
        while (listOptions2.hasMoreElements()) {
            vector.add(listOptions2.nextElement());
        }
        return vector.elements();
    }

    @Override // weka.distributed.hadoop.HadoopJob
    public void setOptions(String[] strArr) throws Exception {
        String option = Utils.getOption("num-iterations", strArr);
        if (DistributedJobConfig.isEmpty(option)) {
            setNumIterations(1);
        } else {
            setNumIterations(Integer.parseInt(option));
        }
        String option2 = Utils.getOption("model-file-name", strArr);
        if (!DistributedJobConfig.isEmpty(option2)) {
            setModelFileName(option2);
        }
        setClassAttribute(Utils.getOption("class", strArr));
        setMinTrainingFraction(Utils.getOption("aggregation-fraction", strArr));
        setPathToPreconstructedFilter(Utils.getOption("preconstructed-filter", strArr));
        setCreateRandomizedDataChunks(Utils.getFlag("randomized-chunks", strArr));
        setNumRandomizedDataChunks(Utils.getOption("num-chunks", strArr));
        setNumInstancesPerRandomizedDataChunk(Utils.getOption("num-instances-per-chunk", strArr));
        String[] strArr2 = (String[]) strArr.clone();
        super.setOptions(strArr);
        this.m_randomizeConfig.setOptions((String[]) strArr2.clone());
        this.m_arffHeaderJob.setOptions(strArr2);
        String joinOptions = Utils.joinOptions(this.m_arffHeaderJob.getOptions());
        if (!DistributedJobConfig.isEmpty(joinOptions)) {
            setCSVMapTaskOptions(joinOptions);
        }
        WekaClassifierMapTask wekaClassifierMapTask = new WekaClassifierMapTask();
        wekaClassifierMapTask.setOptions(strArr);
        String joinOptions2 = Utils.joinOptions(wekaClassifierMapTask.getOptions());
        if (DistributedJobConfig.isEmpty(joinOptions2)) {
            return;
        }
        setClassifierMapTaskOptions(joinOptions2);
    }

    @Override // weka.distributed.hadoop.HadoopJob
    public String[] getOptions() {
        ArrayList arrayList = new ArrayList();
        arrayList.add("-model-file-name");
        arrayList.add(getModelFileName());
        arrayList.add("-num-iterations");
        arrayList.add("" + getNumIterations());
        if (!DistributedJobConfig.isEmpty(getClassAttribute())) {
            arrayList.add("-class");
            arrayList.add(getClassAttribute());
        }
        if (!DistributedJobConfig.isEmpty(getPathToPreconstructedFilter())) {
            arrayList.add("-preconstructed-filter");
            arrayList.add(getPathToPreconstructedFilter());
        }
        if (!DistributedJobConfig.isEmpty(getMinTrainingFraction())) {
            arrayList.add("-aggregation-fraction");
            arrayList.add(getMinTrainingFraction());
        }
        if (getCreateRandomizedDataChunks()) {
            arrayList.add("-randomized-chunks");
            if (!DistributedJobConfig.isEmpty(getNumRandomizedDataChunks())) {
                arrayList.add("-num-chunks");
                arrayList.add(getNumRandomizedDataChunks());
            }
            if (!DistributedJobConfig.isEmpty(getNumInstancesPerRandomizedDataChunk())) {
                arrayList.add("-num-instances-per-chunk");
                arrayList.add(getNumInstancesPerRandomizedDataChunk());
            }
        }
        if (!DistributedJobConfig.isEmpty(getCSVMapTaskOptions())) {
            try {
                for (String str : Utils.splitOptions(getCSVMapTaskOptions())) {
                    arrayList.add(str);
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        if (!DistributedJobConfig.isEmpty(getClassifierMapTaskOptions())) {
            try {
                for (String str2 : Utils.splitOptions(getClassifierMapTaskOptions())) {
                    arrayList.add(str2);
                }
            } catch (Exception e2) {
                e2.printStackTrace();
            }
        }
        return (String[]) arrayList.toArray(new String[arrayList.size()]);
    }

    public String[] getJobOptionsOnly() {
        ArrayList arrayList = new ArrayList();
        arrayList.add("-model-file-name");
        arrayList.add(getModelFileName());
        arrayList.add("-num-iterations");
        arrayList.add("" + getNumIterations());
        if (!DistributedJobConfig.isEmpty(getClassAttribute())) {
            arrayList.add("-class");
            arrayList.add(getClassAttribute());
        }
        if (!DistributedJobConfig.isEmpty(getPathToPreconstructedFilter())) {
            arrayList.add("-preconstructed-filter");
            arrayList.add(getPathToPreconstructedFilter());
        }
        if (!DistributedJobConfig.isEmpty(getMinTrainingFraction())) {
            arrayList.add("-aggregation-fraction");
            arrayList.add(getMinTrainingFraction());
        }
        if (getCreateRandomizedDataChunks()) {
            arrayList.add("-randomized-chunks");
            if (!DistributedJobConfig.isEmpty(getNumRandomizedDataChunks())) {
                arrayList.add("-num-chunks");
                arrayList.add(getNumRandomizedDataChunks());
            }
            if (!DistributedJobConfig.isEmpty(getNumInstancesPerRandomizedDataChunk())) {
                arrayList.add("-num-instances-per-chunk");
                arrayList.add(getNumInstancesPerRandomizedDataChunk());
            }
        }
        return (String[]) arrayList.toArray(new String[arrayList.size()]);
    }

    protected void stageIntermediateClassifier(Configuration configuration) throws IOException {
        String str = this.m_hdfsPathToAggregatedClassifier;
        String str2 = HDFSUtils.WEKA_TEMP_DISTRIBUTED_CACHE_FILES + getModelFileName();
        HDFSUtils.moveInHDFS(str, str2, this.m_mrConfig.getHDFSConfig(), this.m_env);
        HDFSUtils.addFileToDistributedCache(this.m_mrConfig.getHDFSConfig(), configuration, str2, this.m_env);
    }

    protected String handlePreconstructedFilter(Configuration configuration) throws IOException {
        return HDFSUtils.addFileToDistributedCache(this.m_mrConfig.getHDFSConfig(), configuration, environmentSubstitute(getPathToPreconstructedFilter()), this.m_env);
    }

    protected boolean initializeAndRunRandomizeDataJob(Instances instances) throws DistributedWekaException, IOException {
        int parseInt;
        if (!getCreateRandomizedDataChunks()) {
            return true;
        }
        Instances stripSummaryAtts = CSVToARFFHeaderReduceTask.stripSummaryAtts(instances);
        try {
            WekaClassifierHadoopMapper.setClassIndex(getClassAttribute(), stripSummaryAtts, true);
            String name = stripSummaryAtts.classAttribute().name();
            Attribute attribute = instances.attribute("arff_summary_" + name);
            if (attribute == null) {
                throw new DistributedWekaException("Was unable to find the summary attribute for the class: " + name);
            }
            int i = 0;
            if (stripSummaryAtts.classAttribute().isNominal()) {
                CSVToARFFHeaderMapTask.NominalStats attributeToStats = CSVToARFFHeaderMapTask.NominalStats.attributeToStats(attribute);
                Iterator it = attributeToStats.getLabels().iterator();
                while (it.hasNext()) {
                    i = (int) (i + attributeToStats.getCount((String) it.next()));
                }
            } else {
                i = (int) CSVToARFFHeaderMapTask.NumericStats.attributeToStats(attribute).getStats()[CSVToARFFHeaderMapTask.ArffSummaryNumericMetric.COUNT.ordinal()];
            }
            this.m_randomizeConfig.setMapperClass(RandomizedDataChunkHadoopMapper.class.getName());
            this.m_randomizeConfig.setReducerClass(RandomizedDataChunkHadoopReducer.class.getName());
            this.m_randomizeConfig.setMapOutputValueClass(Text.class.getName());
            Configuration configuration = new Configuration();
            String environmentSubstitute = environmentSubstitute(this.m_arffHeaderJob.getAggregatedHeaderPath());
            HDFSUtils.addFileToDistributedCache(this.m_randomizeConfig.getHDFSConfig(), configuration, environmentSubstitute, this.m_env);
            String substring = environmentSubstitute.substring(environmentSubstitute.lastIndexOf("/") + 1, environmentSubstitute.length());
            ArrayList arrayList = new ArrayList();
            arrayList.add("-arff-header");
            arrayList.add(substring);
            if (!DistributedJobConfig.isEmpty(getClassAttribute())) {
                arrayList.add("-class");
                arrayList.add(environmentSubstitute(getClassAttribute()));
            }
            try {
                String option = Utils.getOption("seed", Utils.splitOptions(getClassifierMapTaskOptions()));
                if (!DistributedJobConfig.isEmpty(option)) {
                    String environmentSubstitute2 = environmentSubstitute(option);
                    arrayList.add("-seed");
                    arrayList.add(environmentSubstitute2);
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
            this.m_randomizeConfig.setUserSuppliedProperty(RandomizedDataChunkHadoopMapper.RANDOMIZED_DATA_CHUNK_MAP_TASK_OPTIONS, environmentSubstitute(Utils.joinOptions((String[]) arrayList.toArray(new String[arrayList.size()]))));
            this.m_randomizeConfig.setUserSuppliedProperty(CSVToArffHeaderHadoopMapper.CSV_TO_ARFF_HEADER_MAP_TASK_OPTIONS, environmentSubstitute(getCSVMapTaskOptions()));
            if (DistributedJobConfig.isEmpty(getNumRandomizedDataChunks()) && DistributedJobConfig.isEmpty(getNumInstancesPerRandomizedDataChunk())) {
                throw new DistributedWekaException("Must specify either the number of chunks to create or the number of instances per chunk");
            }
            if (DistributedJobConfig.isEmpty(getNumRandomizedDataChunks())) {
                try {
                    int parseInt2 = Integer.parseInt(environmentSubstitute(getNumInstancesPerRandomizedDataChunk()));
                    if (parseInt2 <= 0) {
                        throw new DistributedWekaException("Number of instances per chunk must be > 0");
                    }
                    if (parseInt2 > i) {
                        throw new DistributedWekaException("Can't have more instances per chunk than there are instances in the dataset!");
                    }
                    parseInt = (int) Math.ceil(i / parseInt2);
                } catch (NumberFormatException e2) {
                    throw new DistributedWekaException(e2);
                }
            } else {
                try {
                    parseInt = Integer.parseInt(environmentSubstitute(getNumRandomizedDataChunks()));
                } catch (NumberFormatException e3) {
                    throw new DistributedWekaException(e3);
                }
            }
            if (parseInt <= 1) {
                throw new DistributedWekaException("Can't randomize because number of data chunks <= 1");
            }
            this.m_randomizeConfig.setUserSuppliedProperty(RandomizedDataChunkHadoopReducer.NUM_DATA_CHUNKS, "" + parseInt);
            String environmentSubstitute3 = environmentSubstitute(this.m_mrConfig.getOutputPath() + "/randomized");
            this.m_randomizeConfig.setOutputPath(environmentSubstitute3);
            this.m_randomizeConfig.setNumberOfReducers("1");
            installWekaLibrariesInHDFS(configuration);
            try {
                Job configureForHadoop = this.m_randomizeConfig.configureForHadoop("Create randomly shuffled input data chunk job - num chunks: " + parseInt, configuration, this.m_env);
                for (int i2 = 0; i2 < parseInt; i2++) {
                    MultipleOutputs.addNamedOutput(configureForHadoop, "chunk" + i2, TextOutputFormat.class, Text.class, Text.class);
                }
                this.m_randomizeConfig.deleteOutputDirectory(configureForHadoop, this.m_env);
                statusMessage("Submitting randomized data chunk job ");
                logMessage("Submitting randomized data chunk job ");
                boolean runJob = runJob(configureForHadoop);
                if (runJob) {
                    HDFSUtils.deleteFile(this.m_randomizeConfig.getHDFSConfig(), configuration, environmentSubstitute3 + "/part-r-00000", this.m_env);
                } else {
                    statusMessage("Create randomly shuffled input data chunk job failed - check logs on Hadoop");
                    logMessage("Create randomly shuffled input data chunk job job failed - check logs on Hadoop");
                }
                return runJob;
            } catch (ClassNotFoundException e4) {
                throw new DistributedWekaException(e4);
            }
        } catch (Exception e5) {
            throw new DistributedWekaException(e5);
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public boolean initializeAndRunArffJob() throws DistributedWekaException, IOException {
        if (this.m_env == null) {
            this.m_env = Environment.getSystemWide();
        }
        if (this.m_runArffJob) {
            logMessage("Executing ARFF Job....");
            statusMessage("Excecuting ARFF Job...");
            this.m_arffHeaderJob.setEnvironment(this.m_env);
            this.m_arffHeaderJob.setLog(getLog());
            this.m_arffHeaderJob.setStatusMessagePrefix(this.m_statusMessagePrefix);
            if (!this.m_arffHeaderJob.runJob()) {
                statusMessage("Unable to continue - creating the ARFF header failed!");
                logMessage("Unable to continue - creating the ARFF header failed!");
                return false;
            }
        }
        if (getCreateRandomizedDataChunks()) {
            if (!initializeAndRunRandomizeDataJob(this.m_arffHeaderJob.getFinalHeader())) {
                statusMessage("Unable to continue - stratification of input data failed!");
                logMessage("Unable to continue - stratification of input data failed!");
                return false;
            }
            this.m_mrConfig.setMapredMaxSplitSize("");
            this.m_mrConfig.setInputPaths(this.m_randomizeConfig.getOutputPath());
        }
        String environmentSubstitute = environmentSubstitute(this.m_mrConfig.getOutputPath() + OUTPUT_SUBDIR);
        this.m_mrConfig.setOutputPath(environmentSubstitute);
        String str = environmentSubstitute + "/" + environmentSubstitute(getModelFileName());
        this.m_mrConfig.setUserSuppliedProperty(WekaClassifierHadoopReducer.CLASSIFIER_WRITE_PATH, str);
        this.m_hdfsPathToAggregatedClassifier = str;
        if (DistributedJobConfig.isEmpty(getMinTrainingFraction())) {
            return true;
        }
        try {
            Double.parseDouble(environmentSubstitute(getMinTrainingFraction()));
            this.m_mrConfig.setUserSuppliedProperty(WekaClassifierHadoopReducer.MIN_TRAINING_FRACTION, environmentSubstitute(getMinTrainingFraction()));
            return true;
        } catch (NumberFormatException e) {
            System.err.println("Warning: unable to parse aggregation-fraction value: " + environmentSubstitute(getMinTrainingFraction()));
            return true;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public boolean performIteration(int i, boolean z, Configuration configuration) throws IOException, DistributedWekaException {
        String environmentSubstitute = environmentSubstitute(getJobName());
        if (environmentSubstitute.lastIndexOf(" - ") > 0) {
            environmentSubstitute = environmentSubstitute.substring(0, environmentSubstitute.lastIndexOf(" - "));
        }
        String environmentSubstitute2 = environmentSubstitute(this.m_arffHeaderJob.getAggregatedHeaderPath());
        if (configuration == null) {
            configuration = new Configuration();
        }
        HDFSUtils.addFileToDistributedCache(this.m_mrConfig.getHDFSConfig(), configuration, environmentSubstitute2, this.m_env);
        String substring = environmentSubstitute2.substring(environmentSubstitute2.lastIndexOf("/") + 1, environmentSubstitute2.length());
        ArrayList arrayList = new ArrayList();
        arrayList.add("-arff-header");
        arrayList.add(substring);
        if (!DistributedJobConfig.isEmpty(getClassAttribute())) {
            arrayList.add("-class");
            arrayList.add(environmentSubstitute(getClassAttribute()));
        }
        if (!DistributedJobConfig.isEmpty(getPathToPreconstructedFilter())) {
            String handlePreconstructedFilter = handlePreconstructedFilter(configuration);
            arrayList.add("-preconstructed-filter");
            arrayList.add(handlePreconstructedFilter);
        }
        if (i > 0) {
            arrayList.add("-continue-training-updateable");
            if (z) {
                stageIntermediateClassifier(configuration);
            }
            arrayList.add("-model-file-name");
            arrayList.add(environmentSubstitute(getModelFileName()));
        }
        if (!DistributedJobConfig.isEmpty(getClassifierMapTaskOptions())) {
            try {
                for (String str : Utils.splitOptions(environmentSubstitute(getClassifierMapTaskOptions()))) {
                    arrayList.add(str);
                }
            } catch (Exception e) {
                throw new DistributedWekaException(e);
            }
        }
        this.m_mrConfig.setUserSuppliedProperty(WekaClassifierHadoopMapper.CLASSIFIER_MAP_TASK_OPTIONS, environmentSubstitute(Utils.joinOptions((String[]) arrayList.toArray(new String[arrayList.size()]))));
        setJobName(environmentSubstitute + " - iteration: " + (i + 1) + " " + Utils.joinOptions((String[]) arrayList.toArray(new String[arrayList.size()])));
        this.m_mrConfig.setUserSuppliedProperty(CSVToArffHeaderHadoopMapper.CSV_TO_ARFF_HEADER_MAP_TASK_OPTIONS, environmentSubstitute(getCSVMapTaskOptions()));
        if (i == 0) {
            installWekaLibrariesInHDFS(configuration);
        } else {
            addWekaLibrariesToClasspath(configuration);
        }
        try {
            Job configureForHadoop = this.m_mrConfig.configureForHadoop(getJobName(), configuration, this.m_env);
            cleanOutputDirectory(configureForHadoop);
            statusMessage("Submitting iteration " + (i + 1) + " of job: " + getJobName());
            logMessage("Submitting iteration " + (i + 1) + " of job: " + getJobName());
            boolean runJob = runJob(configureForHadoop);
            if (!runJob) {
                statusMessage("Weka classifier job failed - check logs on Hadoop");
                logMessage("Weka classifier job failed - check logs on Hadoop");
            }
            return runJob;
        } catch (ClassNotFoundException e2) {
            throw new DistributedWekaException(e2);
        }
    }

    public boolean runJob() throws DistributedWekaException {
        boolean z = true;
        ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
        try {
            Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
            try {
                setJobStatus(DistributedJob.JobStatus.RUNNING);
                if (!initializeAndRunArffJob()) {
                    Thread.currentThread().setContextClassLoader(contextClassLoader);
                    return false;
                }
                int i = 0;
                while (true) {
                    if (i >= this.m_numIterations) {
                        break;
                    }
                    z = performIteration(i, true, null);
                    if (!z) {
                        statusMessage("Weka classifier job failed - check logs on Hadoop");
                        logMessage("Weka classifier job failed - check logs on Hadoop");
                        break;
                    }
                    i++;
                }
                setJobStatus(z ? DistributedJob.JobStatus.FINISHED : DistributedJob.JobStatus.FAILED);
                if (z) {
                    Configuration configuration = new Configuration();
                    this.m_mrConfig.getHDFSConfig().configureForHadoop(configuration, this.m_env);
                    try {
                        try {
                            ObjectInputStream objectInputStream = null;
                            try {
                                ObjectInputStream objectInputStream2 = new ObjectInputStream(new BufferedInputStream(FileSystem.get(configuration).open(new Path(this.m_hdfsPathToAggregatedClassifier))));
                                Object readObject = objectInputStream2.readObject();
                                objectInputStream2.close();
                                objectInputStream = null;
                                this.m_finalClassifier = (Classifier) readObject;
                                if (0 != 0) {
                                    objectInputStream.close();
                                }
                            } catch (Throwable th) {
                                if (objectInputStream != null) {
                                    objectInputStream.close();
                                }
                                throw th;
                            }
                        } catch (ClassNotFoundException e) {
                            throw new DistributedWekaException(e);
                        }
                    } catch (IOException e2) {
                        throw new DistributedWekaException(e2);
                    }
                }
                Thread.currentThread().setContextClassLoader(contextClassLoader);
                return z;
            } catch (Exception e3) {
                setJobStatus(DistributedJob.JobStatus.FAILED);
                throw new DistributedWekaException(e3);
            }
        } catch (Throwable th2) {
            Thread.currentThread().setContextClassLoader(contextClassLoader);
            throw th2;
        }
    }

    @Override // weka.gui.beans.ClassifierProducer
    public Classifier getClassifier() {
        return this.m_finalClassifier;
    }

    @Override // weka.gui.beans.ClassifierProducer
    public Instances getTrainingHeader() {
        Instances finalHeader;
        if (this.m_arffHeaderJob == null || (finalHeader = this.m_arffHeaderJob.getFinalHeader()) == null) {
            return null;
        }
        try {
            return CSVToARFFHeaderReduceTask.stripSummaryAtts(finalHeader);
        } catch (DistributedWekaException e) {
            e.printStackTrace();
            return null;
        }
    }

    public static void main(String[] strArr) {
        WekaClassifierHadoopJob wekaClassifierHadoopJob = new WekaClassifierHadoopJob();
        wekaClassifierHadoopJob.run(wekaClassifierHadoopJob, strArr);
    }

    public void run(Object obj, String[] strArr) throws IllegalArgumentException {
        if (!(obj instanceof WekaClassifierHadoopJob)) {
            throw new IllegalArgumentException("Object to run is not a WekaClassifierHadoopJob!");
        }
        try {
            WekaClassifierHadoopJob wekaClassifierHadoopJob = (WekaClassifierHadoopJob) obj;
            if (Utils.getFlag('h', strArr)) {
                System.err.println(DistributedJob.makeOptionsStr(wekaClassifierHadoopJob));
                System.exit(1);
            }
            wekaClassifierHadoopJob.setOptions(strArr);
            wekaClassifierHadoopJob.runJob();
            System.out.println(wekaClassifierHadoopJob.getClassifier());
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
