package weka.distributed.hadoop;

import distributed.core.DistributedJobConfig;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import weka.core.Instances;
import weka.core.Utils;
import weka.distributed.CSVToARFFHeaderReduceTask;

/* loaded from: input_file:weka/distributed/hadoop/RandomizedDataChunkHadoopReducer.class */
public class RandomizedDataChunkHadoopReducer extends Reducer<Text, Text, Text, Text> {
    public static String NUM_DATA_CHUNKS = "*weka.distributed.num_randomized_data_chunks";
    protected MultipleOutputs<Text, Text> m_mos;
    protected Instances m_trainingHeader;
    protected List<List<String>> m_classInstancesBuffer;
    protected int[] m_countsPerClass;
    protected int m_numberOfDataChunks = 0;
    protected Random m_random = new Random(42);
    protected Text m_outVal = new Text();

    public void setup(Reducer<Text, Text, Text, Text>.Context context) throws IOException {
        this.m_mos = new MultipleOutputs<>(context);
        Configuration configuration = context.getConfiguration();
        String str = configuration.get(NUM_DATA_CHUNKS);
        String str2 = configuration.get(RandomizedDataChunkHadoopMapper.RANDOMIZED_DATA_CHUNK_MAP_TASK_OPTIONS);
        if (str == null || DistributedJobConfig.isEmpty(str)) {
            throw new IOException("Number of output files/data chunks not available!!");
        }
        try {
            if (DistributedJobConfig.isEmpty(str2)) {
                throw new Exception("Can't continue without the name of the ARFF header file!");
            }
            String[] splitOptions = Utils.splitOptions(str2);
            String option = Utils.getOption("arff-header", splitOptions);
            if (DistributedJobConfig.isEmpty(option)) {
                throw new IOException("Can't continue without the name of the ARFF header file!");
            }
            this.m_trainingHeader = CSVToARFFHeaderReduceTask.stripSummaryAtts(WekaClassifierHadoopMapper.loadTrainingHeader(option));
            WekaClassifierHadoopMapper.setClassIndex(splitOptions, this.m_trainingHeader, true);
            try {
                this.m_numberOfDataChunks = Integer.parseInt(str);
                int i = 1;
                if (this.m_trainingHeader.classAttribute().isNominal()) {
                    i = this.m_trainingHeader.classAttribute().numValues();
                    this.m_classInstancesBuffer = new ArrayList();
                    for (int i2 = 0; i2 < i; i2++) {
                        this.m_classInstancesBuffer.add(new ArrayList());
                    }
                }
                this.m_countsPerClass = new int[i];
            } catch (NumberFormatException e) {
                throw new Exception(e);
            }
        } catch (Exception e2) {
            throw new IOException(e2);
        }
    }

    protected void randomizeAndStratify(Iterable<Text> iterable) throws InterruptedException, IOException {
        Iterator<Text> it = iterable.iterator();
        while (it.hasNext()) {
            String[] split = it.next().toString().split("@:@");
            String str = split[0];
            int parseInt = Integer.parseInt(split[1]);
            String str2 = "chunk" + (this.m_countsPerClass[parseInt] % this.m_numberOfDataChunks);
            this.m_outVal.set(str);
            this.m_mos.write(str2, (Object) null, this.m_outVal);
            if (this.m_countsPerClass[parseInt] < this.m_numberOfDataChunks) {
                this.m_classInstancesBuffer.get(parseInt).add(str);
            }
            int[] iArr = this.m_countsPerClass;
            iArr[parseInt] = iArr[parseInt] + 1;
        }
    }

    protected void randomize(Iterable<Text> iterable) throws InterruptedException, IOException {
        Iterator<Text> it = iterable.iterator();
        while (it.hasNext()) {
            this.m_mos.write("chunk" + (this.m_countsPerClass[0] % this.m_numberOfDataChunks), (Object) null, it.next());
            int[] iArr = this.m_countsPerClass;
            iArr[0] = iArr[0] + 1;
        }
    }

    public void reduce(Text text, Iterable<Text> iterable, Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {
        if (this.m_trainingHeader.classAttribute().isNumeric()) {
            randomize(iterable);
        } else {
            randomizeAndStratify(iterable);
        }
    }

    public void cleanup(Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {
        for (int i = 0; i < this.m_countsPerClass.length; i++) {
            if (this.m_countsPerClass[i] < this.m_numberOfDataChunks) {
                while (this.m_countsPerClass[i] < this.m_numberOfDataChunks) {
                    this.m_outVal.set(this.m_classInstancesBuffer.get(i).get(this.m_random.nextInt(this.m_classInstancesBuffer.get(i).size())));
                    this.m_mos.write("chunk" + this.m_countsPerClass[i], (Object) null, this.m_outVal);
                    int[] iArr = this.m_countsPerClass;
                    int i2 = i;
                    iArr[i2] = iArr[i2] + 1;
                }
            }
        }
        this.m_mos.close();
    }

    public /* bridge */ /* synthetic */ void reduce(Object obj, Iterable iterable, Reducer.Context context) throws IOException, InterruptedException {
        reduce((Text) obj, (Iterable<Text>) iterable, (Reducer<Text, Text, Text, Text>.Context) context);
    }
}
