package weka.distributed.hadoop;

import distributed.core.DistributedJob;
import distributed.core.DistributedJobConfig;
import distributed.hadoop.HDFSUtils;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Vector;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import weka.core.CommandlineRunnable;
import weka.core.Environment;
import weka.core.Instances;
import weka.core.Option;
import weka.core.Utils;
import weka.distributed.DistributedWekaException;
import weka.gui.beans.InstancesProducer;
import weka.gui.beans.TextProducer;

/* loaded from: input_file:weka/distributed/hadoop/WekaClassifierEvaluationHadoopJob.class */
public class WekaClassifierEvaluationHadoopJob extends HadoopJob implements TextProducer, InstancesProducer, CommandlineRunnable {
    private static final long serialVersionUID = -5625588954027886749L;
    protected WekaClassifierHadoopJob m_classifierJob;
    protected int m_nodesAvailable;
    protected String m_textEvalResults;
    protected Instances m_evalResults;
    protected String m_separateTestSetPath;
    protected String m_predFrac;

    public WekaClassifierEvaluationHadoopJob() {
        super("Weka classifier evaluation job", "Evaluates a Weka classifier");
        this.m_classifierJob = new WekaClassifierHadoopJob();
        this.m_nodesAvailable = 1;
        this.m_separateTestSetPath = "";
        this.m_predFrac = "";
        this.m_classifierJob.getMapReduceJobConfig().setMapperClass(WekaFoldBasedClassifierHadoopMapper.class.getName());
        this.m_classifierJob.getMapReduceJobConfig().setReducerClass(WekaFoldBasedClassifierHadoopReducer.class.getName());
        this.m_mrConfig.setMapperClass(WekaFoldBasedClassifierEvaluationHadoopMapper.class.getName());
        this.m_mrConfig.setReducerClass(WekaClassifierEvaluationHadoopReducer.class.getName());
    }

    public String globalInfo() {
        return "Evaluates a classifier using either the training data, a separate test set or a cross-validation.";
    }

    @Override // weka.distributed.hadoop.HadoopJob
    public Enumeration<Option> listOptions() {
        Vector vector = new Vector();
        vector.add(new Option("", "", 0, "\nNote: the -fold-number option is ignored by this job."));
        vector.add(new Option("\tNumber of nodes available in cluster (default = 1).", "num-nodes", 1, "-num-nodes"));
        vector.add(new Option("", "", 0, "\nOptions specific to model building and evaluation:"));
        vector.add(new Option("\tPath to a separate test set. Set either this or\n\ttotal-folds for a cross-validation (note that settting total-folds\n\tto 1 will perform testing on training)", "test-set-path", 1, "-test-set-path <path>"));
        vector.add(new Option("\tCompute AUC and AUPRC. Note that this requires individual\n\tpredictions to be retained - specify a fraction of\n\tpredictions to sample (e.g. 0.5) in order to save resources.", "auc", 1, "-auc <fraction of predictions to sample>"));
        Enumeration<Option> listOptions = new WekaClassifierHadoopJob().listOptions();
        while (listOptions.hasMoreElements()) {
            vector.add(listOptions.nextElement());
        }
        return vector.elements();
    }

    @Override // weka.distributed.hadoop.HadoopJob
    public void setOptions(String[] strArr) throws Exception {
        String option = Utils.getOption("num-nodes", strArr);
        if (!DistributedJobConfig.isEmpty(option)) {
            setNumNodesInCluster(Integer.parseInt(option));
        }
        setSeparateTestSetPath(Utils.getOption("test-set-path", strArr));
        setSampleFractionForAUC(Utils.getOption("auc", strArr));
        String[] strArr2 = (String[]) strArr.clone();
        super.setOptions(strArr);
        this.m_classifierJob.setOptions(strArr2);
    }

    @Override // weka.distributed.hadoop.HadoopJob
    public String[] getOptions() {
        ArrayList arrayList = new ArrayList();
        for (String str : super.getOptions()) {
            arrayList.add(str);
        }
        if (!DistributedJobConfig.isEmpty(getSeparateTestSetPath())) {
            arrayList.add("-test-set-path");
            arrayList.add(getSeparateTestSetPath());
        }
        if (!DistributedJobConfig.isEmpty(getSampleFractionForAUC())) {
            arrayList.add("-auc");
            arrayList.add(getSampleFractionForAUC());
        }
        arrayList.add("-num-nodes");
        arrayList.add("" + getNumNodesInCluster());
        for (String str2 : this.m_classifierJob.getOptions()) {
            arrayList.add(str2);
        }
        return (String[]) arrayList.toArray(new String[arrayList.size()]);
    }

    public String[] getJobOptionsOnly() {
        ArrayList arrayList = new ArrayList();
        if (!DistributedJobConfig.isEmpty(getSeparateTestSetPath())) {
            arrayList.add("-test-set-path");
            arrayList.add(getSeparateTestSetPath());
        }
        if (!DistributedJobConfig.isEmpty(getSampleFractionForAUC())) {
            arrayList.add("-auc");
            arrayList.add(getSampleFractionForAUC());
        }
        arrayList.add("-num-nodes");
        arrayList.add("" + getNumNodesInCluster());
        return (String[]) arrayList.toArray(new String[arrayList.size()]);
    }

    public String separateTestSetPathTipText() {
        return "The path (in HDFS) to a separate test set to use";
    }

    public void setSeparateTestSetPath(String str) {
        this.m_separateTestSetPath = str;
    }

    public String getSeparateTestSetPath() {
        return this.m_separateTestSetPath;
    }

    public String sampleFractionForAUCTipText() {
        return "The percentage of all predictions (randomly sampled) to retain for computing AUC and AUPRC. If not specified, then these metrics are not computed and no predictions are kept. Use this option to keep the number of predictions retained under control when computing AUC/AUPRC.";
    }

    public void setSampleFractionForAUC(String str) {
        this.m_predFrac = str;
    }

    public String getSampleFractionForAUC() {
        return this.m_predFrac;
    }

    protected void stageClassifiersForFolds(int i, String str, Configuration configuration) throws Exception {
        String modelFileName = this.m_classifierJob.getModelFileName();
        String substring = str.substring(0, str.lastIndexOf("/") + 1);
        for (int i2 = 0; i2 < i; i2++) {
            String str2 = "" + (i2 + 1) + "_" + modelFileName;
            String str3 = substring + str2;
            String str4 = HDFSUtils.WEKA_TEMP_DISTRIBUTED_CACHE_FILES + str2;
            HDFSUtils.moveInHDFS(str3, str4, this.m_classifierJob.m_mrConfig.getHDFSConfig(), this.m_env);
            HDFSUtils.addFileToDistributedCache(this.m_classifierJob.m_mrConfig.getHDFSConfig(), configuration, str4, this.m_env);
        }
    }

    public String numNodesInClusterTipText() {
        return "The number of nodes in the cluster - used for setting the number of reducers to use when performing a cross-validation";
    }

    public void setNumNodesInCluster(int i) {
        this.m_nodesAvailable = i;
    }

    public int getNumNodesInCluster() {
        return this.m_nodesAvailable;
    }

    protected boolean runEvaluationPhase(String str) throws Exception {
        String aggregatedHeaderPath = this.m_classifierJob.m_arffHeaderJob.getAggregatedHeaderPath();
        Configuration configuration = new Configuration();
        HDFSUtils.addFileToDistributedCache(this.m_mrConfig.getHDFSConfig(), configuration, aggregatedHeaderPath, this.m_env);
        String substring = aggregatedHeaderPath.substring(aggregatedHeaderPath.lastIndexOf("/") + 1, aggregatedHeaderPath.length());
        ArrayList arrayList = new ArrayList();
        arrayList.add("-arff-header");
        arrayList.add(substring);
        if (!DistributedJobConfig.isEmpty(this.m_classifierJob.getClassAttribute())) {
            arrayList.add("-class");
            arrayList.add(environmentSubstitute(this.m_classifierJob.getClassAttribute()));
        }
        String classifierMapTaskOptions = this.m_classifierJob.getClassifierMapTaskOptions();
        arrayList.add("-model-file-name");
        arrayList.add(environmentSubstitute(this.m_classifierJob.getModelFileName()));
        if (!DistributedJobConfig.isEmpty(getSeparateTestSetPath())) {
            arrayList.add("-test-set-path");
            arrayList.add(environmentSubstitute(getSeparateTestSetPath()));
        }
        if (!DistributedJobConfig.isEmpty(getSampleFractionForAUC())) {
            String environmentSubstitute = environmentSubstitute(getSampleFractionForAUC());
            try {
                double parseDouble = Double.parseDouble(environmentSubstitute);
                if (parseDouble > 1.0d) {
                    parseDouble /= 100.0d;
                }
                if (parseDouble > 0.0d) {
                    arrayList.add("-auc");
                    arrayList.add("" + parseDouble);
                }
            } catch (NumberFormatException e) {
                throw new Exception("Unable to parse the sampling fraction for AUC: " + environmentSubstitute);
            }
        }
        if (!DistributedJobConfig.isEmpty(classifierMapTaskOptions)) {
            for (String str2 : Utils.splitOptions(environmentSubstitute(classifierMapTaskOptions))) {
                arrayList.add(str2);
            }
        }
        String option = Utils.getOption("total-folds", (String[]) Utils.splitOptions(environmentSubstitute(classifierMapTaskOptions)).clone());
        stageClassifiersForFolds(DistributedJobConfig.isEmpty(option) ? 1 : Integer.parseInt(option), str, configuration);
        String environmentSubstitute2 = environmentSubstitute(getJobName());
        setJobName((DistributedJobConfig.isEmpty(getSeparateTestSetPath()) ? environmentSubstitute2 + " (" + option + " folds ) " : environmentSubstitute2 + " (separate test set) ") + Utils.joinOptions((String[]) arrayList.toArray(new String[arrayList.size()])));
        this.m_mrConfig.setOutputPath(this.m_mrConfig.getOutputPath() + "/eval");
        if (this.m_classifierJob.getCreateRandomizedDataChunks()) {
            this.m_mrConfig.setInputPaths(this.m_classifierJob.m_mrConfig.getInputPaths());
            if (DistributedJobConfig.isEmpty(getSeparateTestSetPath())) {
                this.m_mrConfig.setMapredMaxSplitSize("");
            }
        }
        if (!DistributedJobConfig.isEmpty(getSeparateTestSetPath())) {
            String separateTestSetPath = getSeparateTestSetPath();
            try {
                separateTestSetPath = environmentSubstitute(separateTestSetPath);
            } catch (Exception e2) {
            }
            this.m_mrConfig.setInputPaths(separateTestSetPath);
        }
        this.m_mrConfig.setUserSuppliedProperty(WekaClassifierHadoopMapper.CLASSIFIER_MAP_TASK_OPTIONS, environmentSubstitute(Utils.joinOptions((String[]) arrayList.toArray(new String[arrayList.size()]))));
        this.m_mrConfig.setUserSuppliedProperty(CSVToArffHeaderHadoopMapper.CSV_TO_ARFF_HEADER_MAP_TASK_OPTIONS, environmentSubstitute(this.m_classifierJob.getCSVMapTaskOptions()));
        addWekaLibrariesToClasspath(configuration);
        Job configureForHadoop = this.m_mrConfig.configureForHadoop(environmentSubstitute(getJobName()), configuration, this.m_env);
        cleanOutputDirectory(configureForHadoop);
        statusMessage("Submitting fold-based evaluation job: " + option + " folds.");
        logMessage("Submitting fold-based evaluation job: " + option + " folds.");
        return runJob(configureForHadoop);
    }

    public boolean runJob() throws DistributedWekaException {
        boolean z = true;
        ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
        try {
            Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
            if (this.m_env == null) {
                this.m_env = Environment.getSystemWide();
            }
            try {
                int numIterations = this.m_classifierJob.getNumIterations();
                int i = 1;
                String option = Utils.getOption("total-folds", (String[]) Utils.splitOptions(environmentSubstitute(this.m_classifierJob.getClassifierMapTaskOptions())).clone());
                if (!DistributedJobConfig.isEmpty(option)) {
                    i = Integer.parseInt(option);
                }
                if (i > 1 && !DistributedJobConfig.isEmpty(getSeparateTestSetPath())) {
                    throw new DistributedWekaException("Total folds is > 1 and a separate test set has been specified - can only perform one or the other out of a cross-validation or separate test set evaluation");
                }
                String str = new Configuration().get("mapred.tasktracker.reduce.tasks.maximum");
                if (!DistributedJobConfig.isEmpty(this.m_mrConfig.getUserSuppliedProperty("mapred.tasktracker.reduce.tasks.maximum"))) {
                    str = environmentSubstitute(this.m_mrConfig.getUserSuppliedProperty("mapred.tasktracker.reduce.tasks.maximum"));
                }
                int i2 = 2;
                if (!DistributedJobConfig.isEmpty(str)) {
                    i2 = Integer.parseInt(environmentSubstitute(str));
                }
                int min = Math.min(i, i2 * getNumNodesInCluster());
                logMessage("Setting num reducers per node for fold-based model building to: " + min);
                this.m_classifierJob.m_mrConfig.setNumberOfReducers("" + min);
                this.m_classifierJob.setLog(getLog());
                this.m_classifierJob.setStatusMessagePrefix(this.m_statusMessagePrefix);
                this.m_classifierJob.setEnvironment(this.m_env);
                setJobStatus(DistributedJob.JobStatus.RUNNING);
                if (!this.m_classifierJob.initializeAndRunArffJob()) {
                    setJobStatus(DistributedJob.JobStatus.FAILED);
                    Thread.currentThread().setContextClassLoader(contextClassLoader);
                    return false;
                }
                String environmentSubstitute = environmentSubstitute(this.m_classifierJob.m_mrConfig.getUserSuppliedProperty(WekaClassifierHadoopReducer.CLASSIFIER_WRITE_PATH));
                if (DistributedJobConfig.isEmpty(environmentSubstitute)) {
                    throw new Exception("The output model path is not set!");
                }
                int i3 = 0;
                while (true) {
                    if (i3 >= numIterations) {
                        break;
                    }
                    Configuration configuration = new Configuration();
                    if (i3 > 0) {
                        stageClassifiersForFolds(i, environmentSubstitute, configuration);
                    }
                    this.m_classifierJob.setEnvironment(this.m_env);
                    if (!this.m_classifierJob.performIteration(i3, false, configuration)) {
                        z = false;
                        statusMessage("Unable to continue - fold-based classifier job failed. Check Hadoop logs");
                        logMessage("Unable to continue - fold-based classifier job failed. Check Hadoop logs");
                        break;
                    }
                    i3++;
                }
                if (!z) {
                    setJobStatus(DistributedJob.JobStatus.FAILED);
                    Thread.currentThread().setContextClassLoader(contextClassLoader);
                    return false;
                }
                if (!runEvaluationPhase(environmentSubstitute)) {
                    z = false;
                    statusMessage("Evaluation phase failed. Check hadoop logs");
                    logMessage("Evaluation phase failed. Check hadoop logs");
                }
                setJobStatus(z ? DistributedJob.JobStatus.FINISHED : DistributedJob.JobStatus.FAILED);
                if (z) {
                    retrieveResults();
                }
                Thread.currentThread().setContextClassLoader(contextClassLoader);
                return z;
            } catch (Exception e) {
                setJobStatus(DistributedJob.JobStatus.FAILED);
                throw new DistributedWekaException(e);
            }
        } catch (Throwable th) {
            Thread.currentThread().setContextClassLoader(contextClassLoader);
            throw th;
        }
    }

    protected void retrieveResults() throws DistributedWekaException {
        try {
            Configuration configuration = new Configuration();
            this.m_mrConfig.getHDFSConfig().configureForHadoop(configuration, this.m_env);
            FileSystem fileSystem = FileSystem.get(configuration);
            FSDataInputStream open = fileSystem.open(new Path(this.m_mrConfig.getOutputPath() + "/part-r-00000"));
            BufferedReader bufferedReader = null;
            try {
                StringBuilder sb = new StringBuilder();
                BufferedReader bufferedReader2 = new BufferedReader(new InputStreamReader(open));
                while (true) {
                    String readLine = bufferedReader2.readLine();
                    if (readLine == null) {
                        break;
                    } else {
                        sb.append(readLine).append("\n");
                    }
                }
                bufferedReader2.close();
                this.m_textEvalResults = sb.toString();
                BufferedReader bufferedReader3 = new BufferedReader(new InputStreamReader(fileSystem.open(new Path(this.m_mrConfig.getOutputPath() + "/evaluation.arff"))));
                this.m_evalResults = new Instances(bufferedReader3);
                bufferedReader3.close();
                bufferedReader = null;
                if (0 != 0) {
                    bufferedReader.close();
                }
            } catch (Throwable th) {
                if (bufferedReader != null) {
                    bufferedReader.close();
                }
                throw th;
            }
        } catch (IOException e) {
            throw new DistributedWekaException(e);
        }
    }

    public String getText() {
        return this.m_textEvalResults;
    }

    public Instances getInstances() {
        return this.m_evalResults;
    }

    public static void main(String[] strArr) {
        WekaClassifierEvaluationHadoopJob wekaClassifierEvaluationHadoopJob = new WekaClassifierEvaluationHadoopJob();
        wekaClassifierEvaluationHadoopJob.run(wekaClassifierEvaluationHadoopJob, strArr);
    }

    public void run(Object obj, String[] strArr) throws IllegalArgumentException {
        if (!(obj instanceof WekaClassifierEvaluationHadoopJob)) {
            throw new IllegalArgumentException("Object to run is not a WekaClassifierEvaluationHadoopJob!");
        }
        try {
            WekaClassifierEvaluationHadoopJob wekaClassifierEvaluationHadoopJob = (WekaClassifierEvaluationHadoopJob) obj;
            if (Utils.getFlag('h', strArr)) {
                System.err.println(DistributedJob.makeOptionsStr(wekaClassifierEvaluationHadoopJob));
                System.exit(1);
            }
            wekaClassifierEvaluationHadoopJob.setOptions(strArr);
            wekaClassifierEvaluationHadoopJob.runJob();
            System.out.print(wekaClassifierEvaluationHadoopJob.getText());
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
