package distributed.hadoop;

import distributed.core.DistributedJobConfig;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Map;
import java.util.Vector;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import weka.core.Environment;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Utils;

/* loaded from: input_file:distributed/hadoop/MapReduceJobConfig.class */
public class MapReduceJobConfig extends AbstractHadoopJobConfig implements OptionHandler {
    public static final String NUM_MAPPERS = "numMappers";
    public static final String NUM_REDUCERS = "numReducers";
    public static final String TASK_TRACKER_MAP_MAXIMUM = "taskTrackerMaxMappers";
    public static final String MAPPER_CLASS = "mapperClass";
    public static final String REDUCER_CLASS = "reducerClass";
    public static final String COMBINER_CLASS = "combinerClass";
    public static final String INPUT_FORMAT_CLASS = "inputFormatClass";
    public static final String OUTPUT_FORMAT_CLASS = "outputFormatClass";
    public static final String MAP_OUTPUT_KEY_CLASS = "mapOutputKeyClass";
    public static final String MAP_OUTPUT_VALUE_CLASS = "mapOutputValueClass";
    public static final String OUTPUT_KEY_CLASS = "outputKeyClass";
    public static final String OUTPUT_VALUE_CLASS = "outputValueClass";
    public static final String INPUT_PATHS = "inputPaths";
    public static final String OUTPUT_PATH = "outputPath";
    public static final String MAPRED_MAX_SPLIT_SIZE = "mapredMaxSplitSize";
    protected static final String HADOOP_JOB_TRACKER_HOST = "mapred.job.tracker";
    protected static final String HADOOP_MAPRED_MAX_SPLIT_SIZE = "mapred.max.split.size";
    private static final long serialVersionUID = -1721850598954532369L;
    protected HDFSConfig m_hdfsConfig = new HDFSConfig();

    public MapReduceJobConfig() {
        setJobTrackerHost(HDFSConfig.DEFAULT_HOST);
        setJobTrackerPort(HDFSConfig.DEFAULT_PORT);
        getHDFSConfig().setHDFSHost(HDFSConfig.DEFAULT_HOST);
        getHDFSConfig().setHDFSPort("8021");
        setProperty(INPUT_FORMAT_CLASS, TextInputFormat.class.getName());
        setProperty(OUTPUT_FORMAT_CLASS, TextOutputFormat.class.getName());
        setProperty(MAP_OUTPUT_KEY_CLASS, Text.class.getName());
        setProperty(MAP_OUTPUT_VALUE_CLASS, BytesWritable.class.getName());
        setProperty(OUTPUT_KEY_CLASS, Text.class.getName());
        setProperty(OUTPUT_VALUE_CLASS, Text.class.getName());
    }

    public Enumeration<Option> listOptions() {
        Vector vector = new Vector();
        Enumeration<Option> listOptions = this.m_hdfsConfig.listOptions();
        while (listOptions.hasMoreElements()) {
            vector.add(listOptions.nextElement());
        }
        vector.addElement(new Option("\tJob tracker hostname. (default: localhost)", "jobtracker-host", 1, "-jobtracker-host <hostname>"));
        vector.addElement(new Option("\tJob tracker port. (default 9001)", "jobtracker-port", 1, "-jobtracker-port <port number>"));
        vector.addElement(new Option("\tThe number of maps (hint to MR).", "num-maps", 1, "-num-maps <integer>"));
        vector.addElement(new Option("\tMaximum number of map tasks to run concurrently per node.", "map-maximum", 1, "-map-maximum"));
        vector.addElement(new Option("\tNumber of reducers to use.", "num-reducers", 1, "-num-reducers"));
        vector.addElement(new Option("\tInput path(s) in HDFS (comma-separated)", "input-paths", 1, "-input-paths"));
        vector.addElement(new Option("\tOutput path for the job in HDFS.", "output-path", 1, "-output-path"));
        vector.addElement(new Option("\tMaximum split size (in bytes) for each mapper to process.", "max-split-size", 1, "-max-split-size"));
        Enumeration listOptions2 = super.listOptions();
        while (listOptions2.hasMoreElements()) {
            vector.add(listOptions2.nextElement());
        }
        return vector.elements();
    }

    public void setOptions(String[] strArr) throws Exception {
        super.setOptions(strArr);
        setJobTrackerHost(Utils.getOption("jobtracker-host", strArr));
        setJobTrackerPort(Utils.getOption("jobtracker-port", strArr));
        setNumberOfMaps(Utils.getOption("num-maps", strArr));
        setTaskTrackerMapTasksMaximum(Utils.getOption("map-maximum", strArr));
        setNumberOfReducers(Utils.getOption("num-reducers", strArr));
        setInputPaths(Utils.getOption("input-paths", strArr));
        setOutputPath(Utils.getOption("output-path", strArr));
        setMapredMaxSplitSize(Utils.getOption("max-split-size", strArr));
        this.m_hdfsConfig.setOptions(strArr);
    }

    public String[] getOptions() {
        ArrayList arrayList = new ArrayList();
        for (String str : this.m_hdfsConfig.getOptions()) {
            arrayList.add(str);
        }
        arrayList.add("-jobtracker-host");
        arrayList.add(getJobTrackerHost());
        arrayList.add("-jobtracker-port");
        arrayList.add(getJobTrackerPort());
        if (!DistributedJobConfig.isEmpty(getNumberOfMaps())) {
            arrayList.add("-num-maps");
            arrayList.add(getNumberOfMaps());
        }
        if (!DistributedJobConfig.isEmpty(getTaskTrackerMapTasksMaximum())) {
            arrayList.add("-map-maximum");
            arrayList.add(getTaskTrackerMapTasksMaximum());
        }
        if (!DistributedJobConfig.isEmpty(getNumberOfReducers())) {
            arrayList.add("-num-reducers");
            arrayList.add(getNumberOfReducers());
        }
        if (!DistributedJobConfig.isEmpty(getInputPaths())) {
            arrayList.add("-input-paths");
            arrayList.add(getInputPaths());
        }
        if (!DistributedJobConfig.isEmpty(getOutputPath())) {
            arrayList.add("-output-path");
            arrayList.add(getOutputPath());
        }
        if (!DistributedJobConfig.isEmpty(getMapredMaxSplitSize())) {
            arrayList.add("-max-split-size");
            arrayList.add(getMapredMaxSplitSize());
        }
        for (String str2 : super.getOptions()) {
            arrayList.add(str2);
        }
        return (String[]) arrayList.toArray(new String[arrayList.size()]);
    }

    public void setHDFSConfig(HDFSConfig hDFSConfig) {
        this.m_hdfsConfig = hDFSConfig;
    }

    public HDFSConfig getHDFSConfig() {
        return this.m_hdfsConfig;
    }

    public String HDFSHostTipText() {
        return "The HDFS (name node) host to use";
    }

    public void setHDFSHost(String str) {
        this.m_hdfsConfig.setHDFSHost(str);
    }

    public String getHDFSHost() {
        return this.m_hdfsConfig.getHDFSHost();
    }

    public String HDFSPortTipText() {
        return "The HDFS (name node) port";
    }

    public void setHDFSPort(String str) {
        this.m_hdfsConfig.setHDFSPort(str);
    }

    public String getHDFSPort() {
        return this.m_hdfsConfig.getHDFSPort();
    }

    public String numberOfMapsTipText() {
        return "The number of maps to use. This is just a hint to the underlying Hadoop framework for how many maps to use. Using setMapredMaxSplitSize(), which sets the Hadoop property mapred.max.split.size, gives greater control over how many maps will be run (and thus how much data each map processes).";
    }

    public void setNumberOfMaps(String str) {
        setProperty(NUM_MAPPERS, str);
    }

    public String getNumberOfMaps() {
        return getProperty(NUM_MAPPERS);
    }

    public String taskTrackerMapTasksMaximumTipText() {
        return "The maximum number of map tasks to run concurrently by a task tracker (node)";
    }

    public void setTaskTrackerMapTasksMaximum(String str) {
        setProperty(TASK_TRACKER_MAP_MAXIMUM, str);
    }

    public String getTaskTrackerMapTasksMaximum() {
        return getProperty(TASK_TRACKER_MAP_MAXIMUM);
    }

    public String numberOfReducersTipText() {
        return "The number of reducers to use. Weka jobs set this property automatically.";
    }

    public void setNumberOfReducers(String str) {
        setProperty(NUM_REDUCERS, str);
    }

    public String getNumberOfReducers() {
        return getProperty(NUM_REDUCERS);
    }

    public void setMapperClass(String str) {
        setProperty(MAPPER_CLASS, str);
    }

    public String getMapperClass() {
        return getProperty(MAPPER_CLASS);
    }

    public void setReducerClass(String str) {
        setProperty(REDUCER_CLASS, str);
    }

    public String getReducerClass() {
        return getProperty(REDUCER_CLASS);
    }

    public void setCombinerClass(String str) {
        setProperty(COMBINER_CLASS, str);
    }

    public String getCombinerClass() {
        return getProperty(COMBINER_CLASS);
    }

    public void setInputFormatClass(String str) {
        setProperty(INPUT_FORMAT_CLASS, str);
    }

    public String getInputFormatClass() {
        return getProperty(INPUT_FORMAT_CLASS);
    }

    public void setOutputFormatClass(String str) {
        setProperty(OUTPUT_FORMAT_CLASS, str);
    }

    public String getOutputFormatClass() {
        return getProperty(OUTPUT_FORMAT_CLASS);
    }

    public void setMapOutputKeyClass(String str) {
        setProperty(MAP_OUTPUT_KEY_CLASS, str);
    }

    public String getMapOutputKeyClass() {
        return getProperty(MAP_OUTPUT_KEY_CLASS);
    }

    public void setMapOutputValueClass(String str) {
        setProperty(MAP_OUTPUT_VALUE_CLASS, str);
    }

    public String getMapOutputValueClass() {
        return getProperty(MAP_OUTPUT_VALUE_CLASS);
    }

    public void setOutputKeyClass(String str) {
        setProperty(OUTPUT_KEY_CLASS, str);
    }

    public String getOutputKeyClass() {
        return getProperty(OUTPUT_KEY_CLASS);
    }

    public void setOutputValueClass(String str) {
        setProperty(OUTPUT_VALUE_CLASS, str);
    }

    public String getOutputValueClass() {
        return getProperty(OUTPUT_VALUE_CLASS);
    }

    public String inputPathsTipText() {
        return "The path to the directory in HDFS that contains the input files";
    }

    public void setInputPaths(String str) {
        setProperty(INPUT_PATHS, str);
    }

    public String getInputPaths() {
        return getProperty(INPUT_PATHS);
    }

    public String outputPathTipText() {
        return "The path in HDFS to the output directory";
    }

    public void setOutputPath(String str) {
        setProperty(OUTPUT_PATH, str);
    }

    public String getOutputPath() {
        return getProperty(OUTPUT_PATH);
    }

    public void setMapredMaxSplitSize(String str) {
        setProperty(MAPRED_MAX_SPLIT_SIZE, str);
    }

    public String getMapredMaxSplitSize() {
        return getProperty(MAPRED_MAX_SPLIT_SIZE);
    }

    protected static String environmentSubstitute(String str, Environment environment) {
        if (environment != null) {
            try {
                str = environment.substitute(str);
            } catch (Exception e) {
            }
        }
        return str;
    }

    public Job configureForHadoop(String str, Configuration configuration, Environment environment) throws IOException, ClassNotFoundException {
        String str2 = getJobTrackerHost() + ":" + getJobTrackerPort();
        System.err.println("Using jobtracker: " + str2);
        if (DistributedJobConfig.isEmpty(str2)) {
            System.err.println("No JobTracker set - running locally...");
        } else {
            configuration.set(HADOOP_JOB_TRACKER_HOST, environmentSubstitute(str2, environment));
        }
        if (!DistributedJobConfig.isEmpty(getMapredMaxSplitSize())) {
            configuration.set(HADOOP_MAPRED_MAX_SPLIT_SIZE, getMapredMaxSplitSize());
        }
        for (Map.Entry entry : this.m_additionalUserSuppliedProperties.entrySet()) {
            configuration.set((String) entry.getKey(), (String) entry.getValue());
        }
        this.m_hdfsConfig.configureForHadoop(configuration, environment);
        Job job = new Job(configuration, str);
        String numberOfMaps = getNumberOfMaps();
        if (!DistributedJobConfig.isEmpty(numberOfMaps)) {
            job.getConfiguration().setNumMapTasks(Integer.parseInt(environmentSubstitute(numberOfMaps, environment)));
        }
        String taskTrackerMapTasksMaximum = getTaskTrackerMapTasksMaximum();
        if (!DistributedJobConfig.isEmpty(taskTrackerMapTasksMaximum)) {
            job.getConfiguration().set("mapred.tasktracker.map.tasks.maximum", taskTrackerMapTasksMaximum);
        }
        String numberOfReducers = getNumberOfReducers();
        if (DistributedJobConfig.isEmpty(numberOfReducers)) {
            job.setNumReduceTasks(1);
        } else {
            numberOfReducers = environmentSubstitute(numberOfReducers, environment);
            job.setNumReduceTasks(Integer.parseInt(numberOfReducers));
            if (Integer.parseInt(numberOfReducers) == 0) {
                System.err.println("Warning - no reducer class set. Configuring for a map only job");
            }
        }
        String mapperClass = getMapperClass();
        if (DistributedJobConfig.isEmpty(mapperClass)) {
            throw new IOException("No mapper class specified!");
        }
        job.setMapperClass(Class.forName(environmentSubstitute(mapperClass, environment)));
        String reducerClass = getReducerClass();
        if (DistributedJobConfig.isEmpty(reducerClass) && Integer.parseInt(numberOfReducers) > 0) {
            throw new IOException("No reducer class specified!");
        }
        if (job.getNumReduceTasks() > 0) {
            job.setReducerClass(Class.forName(environmentSubstitute(reducerClass, environment)));
        }
        String combinerClass = getCombinerClass();
        if (!DistributedJobConfig.isEmpty(combinerClass)) {
            job.setCombinerClass(Class.forName(environmentSubstitute(combinerClass, environment)));
        }
        String inputFormatClass = getInputFormatClass();
        if (DistributedJobConfig.isEmpty(inputFormatClass)) {
            throw new IOException("No input format class specified");
        }
        job.setInputFormatClass(Class.forName(environmentSubstitute(inputFormatClass, environment)));
        String outputFormatClass = getOutputFormatClass();
        if (DistributedJobConfig.isEmpty(outputFormatClass)) {
            throw new IOException("No output format class specified");
        }
        job.setOutputFormatClass(Class.forName(environmentSubstitute(outputFormatClass, environment)));
        String mapOutputKeyClass = getMapOutputKeyClass();
        if (DistributedJobConfig.isEmpty(mapOutputKeyClass)) {
            throw new IOException("No map output key class defined");
        }
        job.setMapOutputKeyClass(Class.forName(environmentSubstitute(mapOutputKeyClass, environment)));
        String mapOutputValueClass = getMapOutputValueClass();
        if (DistributedJobConfig.isEmpty(mapOutputValueClass)) {
            throw new IOException("No map output value class defined");
        }
        job.setMapOutputValueClass(Class.forName(environmentSubstitute(mapOutputValueClass, environment)));
        String outputKeyClass = getOutputKeyClass();
        if (DistributedJobConfig.isEmpty(outputKeyClass)) {
            throw new IOException("No output key class defined");
        }
        job.setOutputKeyClass(Class.forName(environmentSubstitute(outputKeyClass, environment)));
        String outputValueClass = getOutputValueClass();
        if (DistributedJobConfig.isEmpty(outputValueClass)) {
            throw new IOException("No output value class defined");
        }
        job.setOutputValueClass(Class.forName(environmentSubstitute(outputValueClass, environment)));
        String inputPaths = getInputPaths();
        if (!DistributedJobConfig.isEmpty(inputPaths)) {
            FileInputFormat.setInputPaths(job, environmentSubstitute(inputPaths, environment));
        }
        String outputPath = getOutputPath();
        if (DistributedJobConfig.isEmpty(outputPath)) {
            throw new IOException("No output path specified");
        }
        FileOutputFormat.setOutputPath(job, new Path(environmentSubstitute(outputPath, environment)));
        return job;
    }

    public void deleteOutputDirectory(Job job, Environment environment) throws IOException {
        Configuration configuration = job.getConfiguration();
        String outputPath = getOutputPath();
        if (DistributedJobConfig.isEmpty(outputPath)) {
            throw new IOException("Can't delete output path - no path defined!");
        }
        HDFSUtils.deleteDirectory(getHDFSConfig(), configuration, outputPath, environment);
    }
}
