/*
 * Decompiled with CFR 0.152.
 */
package org.apache.hadoop.mapred.lib;

import com.facebook.presto.hadoop.shaded.org.apache.commons.logging.Log;
import com.facebook.presto.hadoop.shaded.org.apache.commons.logging.LogFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Random;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.lib.TotalOrderPartitioner;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class InputSampler<K, V>
implements Tool {
    private static final Log LOG = LogFactory.getLog(InputSampler.class);
    private JobConf conf;

    static int printUsage() {
        System.out.println("sampler -r <reduces>\n      [-inFormat <input format class>]\n      [-keyClass <map input & output key class>]\n      [-splitRandom <double pcnt> <numSamples> <maxsplits> | // Sample from random splits at random (general)\n       -splitSample <numSamples> <maxsplits> |              // Sample from first records in splits (random data)\n       -splitInterval <double pcnt> <maxsplits>]             // Sample from splits at intervals (sorted data)");
        System.out.println("Default sampler: -splitRandom 0.1 10000 10");
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    public InputSampler(JobConf conf) {
        this.conf = conf;
    }

    @Override
    public Configuration getConf() {
        return this.conf;
    }

    @Override
    public void setConf(Configuration conf) {
        this.conf = !(conf instanceof JobConf) ? new JobConf(conf) : (JobConf)conf;
    }

    public static <K, V> void writePartitionFile(JobConf job, Sampler<K, V> sampler) throws IOException {
        InputFormat inf = job.getInputFormat();
        int numPartitions = job.getNumReduceTasks();
        K[] samples = sampler.getSample(inf, job);
        LOG.info("Using " + samples.length + " samples");
        RawComparator comparator = job.getOutputKeyComparator();
        Arrays.sort(samples, comparator);
        Path dst = new Path(TotalOrderPartitioner.getPartitionFile(job));
        FileSystem fs = dst.getFileSystem(job);
        if (fs.exists(dst)) {
            fs.delete(dst, false);
        }
        SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, dst, job.getMapOutputKeyClass(), NullWritable.class);
        NullWritable nullValue = NullWritable.get();
        float stepSize = (float)samples.length / (float)numPartitions;
        int last = -1;
        for (int i = 1; i < numPartitions; ++i) {
            int k;
            for (k = Math.round(stepSize * (float)i); last >= k && comparator.compare(samples[last], samples[k]) == 0; ++k) {
            }
            writer.append(samples[k], (Object)nullValue);
            last = k;
        }
        writer.close();
    }

    @Override
    public int run(String[] args) throws Exception {
        JobConf job = (JobConf)this.getConf();
        ArrayList<String> otherArgs = new ArrayList<String>();
        Sampler sampler = null;
        for (int i = 0; i < args.length; ++i) {
            try {
                double pcnt;
                if ("-r".equals(args[i])) {
                    job.setNumReduceTasks(Integer.parseInt(args[++i]));
                    continue;
                }
                if ("-inFormat".equals(args[i])) {
                    job.setInputFormat(Class.forName(args[++i]).asSubclass(InputFormat.class));
                    continue;
                }
                if ("-keyClass".equals(args[i])) {
                    job.setMapOutputKeyClass(Class.forName(args[++i]).asSubclass(WritableComparable.class));
                    continue;
                }
                if ("-splitSample".equals(args[i])) {
                    int maxSplits;
                    int numSamples = Integer.parseInt(args[++i]);
                    if (0 >= (maxSplits = Integer.parseInt(args[++i]))) {
                        maxSplits = Integer.MAX_VALUE;
                    }
                    sampler = new SplitSampler(numSamples, maxSplits);
                    continue;
                }
                if ("-splitRandom".equals(args[i])) {
                    int maxSplits;
                    pcnt = Double.parseDouble(args[++i]);
                    int numSamples = Integer.parseInt(args[++i]);
                    if (0 >= (maxSplits = Integer.parseInt(args[++i]))) {
                        maxSplits = Integer.MAX_VALUE;
                    }
                    sampler = new RandomSampler(pcnt, numSamples, maxSplits);
                    continue;
                }
                if ("-splitInterval".equals(args[i])) {
                    int maxSplits;
                    pcnt = Double.parseDouble(args[++i]);
                    if (0 >= (maxSplits = Integer.parseInt(args[++i]))) {
                        maxSplits = Integer.MAX_VALUE;
                    }
                    sampler = new IntervalSampler(pcnt, maxSplits);
                    continue;
                }
                otherArgs.add(args[i]);
                continue;
            }
            catch (NumberFormatException except) {
                System.out.println("ERROR: Integer expected instead of " + args[i]);
                return InputSampler.printUsage();
            }
            catch (ArrayIndexOutOfBoundsException except) {
                System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
                return InputSampler.printUsage();
            }
        }
        if (job.getNumReduceTasks() <= 1) {
            System.err.println("Sampler requires more than one reducer");
            return InputSampler.printUsage();
        }
        if (otherArgs.size() < 2) {
            System.out.println("ERROR: Wrong number of parameters: ");
            return InputSampler.printUsage();
        }
        if (null == sampler) {
            sampler = new RandomSampler(0.1, 10000, 10);
        }
        Path outf = new Path((String)otherArgs.remove(otherArgs.size() - 1));
        TotalOrderPartitioner.setPartitionFile(job, outf);
        for (String s : otherArgs) {
            FileInputFormat.addInputPath(job, new Path(s));
        }
        InputSampler.writePartitionFile(job, sampler);
        return 0;
    }

    public static void main(String[] args) throws Exception {
        JobConf job = new JobConf(InputSampler.class);
        InputSampler sampler = new InputSampler(job);
        int res = ToolRunner.run(sampler, args);
        System.exit(res);
    }

    public static class IntervalSampler<K, V>
    implements Sampler<K, V> {
        private final double freq;
        private final int maxSplitsSampled;

        public IntervalSampler(double freq) {
            this(freq, Integer.MAX_VALUE);
        }

        public IntervalSampler(double freq, int maxSplitsSampled) {
            this.freq = freq;
            this.maxSplitsSampled = maxSplitsSampled;
        }

        @Override
        public K[] getSample(InputFormat<K, V> inf, JobConf job) throws IOException {
            InputSplit[] splits = inf.getSplits(job, job.getNumMapTasks());
            ArrayList<K> samples = new ArrayList<K>();
            int splitsToSample = Math.min(this.maxSplitsSampled, splits.length);
            int splitStep = splits.length / splitsToSample;
            long records = 0L;
            long kept = 0L;
            for (int i = 0; i < splitsToSample; ++i) {
                RecordReader<K, V> reader = inf.getRecordReader(splits[i * splitStep], job, Reporter.NULL);
                K key = reader.createKey();
                V value = reader.createValue();
                while (reader.next(key, value)) {
                    if (!((double)kept / (double)(++records) < this.freq)) continue;
                    ++kept;
                    samples.add(key);
                    key = reader.createKey();
                }
                reader.close();
            }
            return samples.toArray();
        }
    }

    public static class RandomSampler<K, V>
    implements Sampler<K, V> {
        private double freq;
        private final int numSamples;
        private final int maxSplitsSampled;

        public RandomSampler(double freq, int numSamples) {
            this(freq, numSamples, Integer.MAX_VALUE);
        }

        public RandomSampler(double freq, int numSamples, int maxSplitsSampled) {
            this.freq = freq;
            this.numSamples = numSamples;
            this.maxSplitsSampled = maxSplitsSampled;
        }

        @Override
        public K[] getSample(InputFormat<K, V> inf, JobConf job) throws IOException {
            int i;
            InputSplit[] splits = inf.getSplits(job, job.getNumMapTasks());
            ArrayList<K> samples = new ArrayList<K>(this.numSamples);
            int splitsToSample = Math.min(this.maxSplitsSampled, splits.length);
            Random r = new Random();
            long seed = r.nextLong();
            r.setSeed(seed);
            LOG.debug("seed: " + seed);
            for (i = 0; i < splits.length; ++i) {
                InputSplit tmp = splits[i];
                int j = r.nextInt(splits.length);
                splits[i] = splits[j];
                splits[j] = tmp;
            }
            for (i = 0; i < splitsToSample || i < splits.length && samples.size() < this.numSamples; ++i) {
                RecordReader<K, V> reader = inf.getRecordReader(splits[i], job, Reporter.NULL);
                K key = reader.createKey();
                V value = reader.createValue();
                while (reader.next(key, value)) {
                    if (!(r.nextDouble() <= this.freq)) continue;
                    if (samples.size() < this.numSamples) {
                        samples.add(key);
                    } else {
                        int ind = r.nextInt(this.numSamples);
                        if (ind != this.numSamples) {
                            samples.set(ind, key);
                        }
                        this.freq *= (double)(this.numSamples - 1) / (double)this.numSamples;
                    }
                    key = reader.createKey();
                }
                reader.close();
            }
            return samples.toArray();
        }
    }

    public static class SplitSampler<K, V>
    implements Sampler<K, V> {
        private final int numSamples;
        private final int maxSplitsSampled;

        public SplitSampler(int numSamples) {
            this(numSamples, Integer.MAX_VALUE);
        }

        public SplitSampler(int numSamples, int maxSplitsSampled) {
            this.numSamples = numSamples;
            this.maxSplitsSampled = maxSplitsSampled;
        }

        @Override
        public K[] getSample(InputFormat<K, V> inf, JobConf job) throws IOException {
            InputSplit[] splits = inf.getSplits(job, job.getNumMapTasks());
            ArrayList<K> samples = new ArrayList<K>(this.numSamples);
            int splitsToSample = Math.min(this.maxSplitsSampled, splits.length);
            int splitStep = splits.length / splitsToSample;
            int samplesPerSplit = this.numSamples / splitsToSample;
            long records = 0L;
            for (int i = 0; i < splitsToSample; ++i) {
                RecordReader<K, V> reader = inf.getRecordReader(splits[i * splitStep], job, Reporter.NULL);
                K key = reader.createKey();
                V value = reader.createValue();
                while (reader.next(key, value)) {
                    samples.add(key);
                    key = reader.createKey();
                    if ((long)((i + 1) * samplesPerSplit) > ++records) continue;
                }
                reader.close();
            }
            return samples.toArray();
        }
    }

    public static interface Sampler<K, V> {
        public K[] getSample(InputFormat<K, V> var1, JobConf var2) throws IOException;
    }
}

