package io.druid.indexer;

import com.fasterxml.jackson.core.type.TypeReference;
import com.google.common.base.Optional;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.hash.HashFunction;
import com.google.common.hash.Hashing;
import com.google.common.io.Closeables;
import com.metamx.common.ISE;
import com.metamx.common.logger.Logger;
import io.druid.data.input.InputRow;
import io.druid.data.input.Rows;
import io.druid.granularity.QueryGranularity;
import io.druid.query.aggregation.hyperloglog.HyperLogLogCollector;
import io.druid.segment.indexing.granularity.UniformGranularitySpec;
import io.druid.timeline.partition.HashBasedNumberedShardSpec;
import io.druid.timeline.partition.NoneShardSpec;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.joda.time.DateTime;
import org.joda.time.DateTimeComparator;
import org.joda.time.Interval;

/* loaded from: input_file:io/druid/indexer/DetermineHashedPartitionsJob.class */
public class DetermineHashedPartitionsJob implements Jobby {
    private static final Logger log = new Logger(DetermineHashedPartitionsJob.class);
    private final HadoopDruidIndexerConfig config;

    /* loaded from: input_file:io/druid/indexer/DetermineHashedPartitionsJob$DetermineCardinalityMapper.class */
    public static class DetermineCardinalityMapper extends HadoopDruidIndexerMapper<LongWritable, BytesWritable> {
        private static HashFunction hashFunction = Hashing.murmur3_128();
        private QueryGranularity rollupGranularity = null;
        private Map<Interval, HyperLogLogCollector> hyperLogLogs;
        private HadoopDruidIndexerConfig config;
        private boolean determineIntervals;

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // io.druid.indexer.HadoopDruidIndexerMapper
        public void setup(Mapper<Writable, Writable, LongWritable, BytesWritable>.Context context) throws IOException, InterruptedException {
            super.setup(context);
            this.rollupGranularity = getConfig().getGranularitySpec().getQueryGranularity();
            this.config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration());
            Optional<Set<Interval>> segmentGranularIntervals = this.config.getSegmentGranularIntervals();
            if (!segmentGranularIntervals.isPresent()) {
                this.determineIntervals = true;
                this.hyperLogLogs = Maps.newHashMap();
                return;
            }
            this.determineIntervals = false;
            ImmutableMap.Builder builder = ImmutableMap.builder();
            Iterator it = ((Set) segmentGranularIntervals.get()).iterator();
            while (it.hasNext()) {
                builder.put((Interval) it.next(), HyperLogLogCollector.makeLatestCollector());
            }
            this.hyperLogLogs = builder.build();
        }

        @Override // io.druid.indexer.HadoopDruidIndexerMapper
        protected void innerMap(InputRow inputRow, Writable writable, Mapper<Writable, Writable, LongWritable, BytesWritable>.Context context) throws IOException, InterruptedException {
            Interval interval;
            List groupKey = Rows.toGroupKey(this.rollupGranularity.truncate(inputRow.getTimestampFromEpoch()), inputRow);
            if (this.determineIntervals) {
                interval = this.config.getGranularitySpec().getSegmentGranularity().bucket(new DateTime(inputRow.getTimestampFromEpoch()));
                if (!this.hyperLogLogs.containsKey(interval)) {
                    this.hyperLogLogs.put(interval, HyperLogLogCollector.makeLatestCollector());
                }
            } else {
                Optional bucketInterval = this.config.getGranularitySpec().bucketInterval(new DateTime(inputRow.getTimestampFromEpoch()));
                if (!bucketInterval.isPresent()) {
                    throw new ISE("WTF?! No bucket found for timestamp: %s", new Object[]{Long.valueOf(inputRow.getTimestampFromEpoch())});
                }
                interval = (Interval) bucketInterval.get();
            }
            this.hyperLogLogs.get(interval).add(hashFunction.hashBytes(HadoopDruidIndexerConfig.jsonMapper.writeValueAsBytes(groupKey)).asBytes());
        }

        public void run(Mapper<Writable, Writable, LongWritable, BytesWritable>.Context context) throws IOException, InterruptedException {
            setup(context);
            while (context.nextKeyValue()) {
                map((Writable) context.getCurrentKey(), (Writable) context.getCurrentValue(), (Mapper.Context) context);
            }
            for (Map.Entry<Interval, HyperLogLogCollector> entry : this.hyperLogLogs.entrySet()) {
                context.write(new LongWritable(entry.getKey().getStartMillis()), new BytesWritable(entry.getValue().toByteArray()));
            }
            cleanup(context);
        }
    }

    /* loaded from: input_file:io/druid/indexer/DetermineHashedPartitionsJob$DetermineCardinalityReducer.class */
    public static class DetermineCardinalityReducer extends Reducer<LongWritable, BytesWritable, NullWritable, NullWritable> {
        private final List<Interval> intervals = Lists.newArrayList();
        protected HadoopDruidIndexerConfig config = null;

        protected void setup(Reducer<LongWritable, BytesWritable, NullWritable, NullWritable>.Context context) throws IOException, InterruptedException {
            this.config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration());
        }

        protected void reduce(LongWritable longWritable, Iterable<BytesWritable> iterable, Reducer<LongWritable, BytesWritable, NullWritable, NullWritable>.Context context) throws IOException, InterruptedException {
            HyperLogLogCollector makeLatestCollector = HyperLogLogCollector.makeLatestCollector();
            for (BytesWritable bytesWritable : iterable) {
                makeLatestCollector.fold(ByteBuffer.wrap(bytesWritable.getBytes(), 0, bytesWritable.getLength()));
            }
            Interval bucket = this.config.getGranularitySpec().getSegmentGranularity().bucket(new DateTime(longWritable.get()));
            this.intervals.add(bucket);
            OutputStream makePathAndOutputStream = Utils.makePathAndOutputStream(context, this.config.makeSegmentPartitionInfoPath(bucket), this.config.isOverwriteFiles());
            try {
                HadoopDruidIndexerConfig.jsonMapper.writerWithType(new TypeReference<Long>() { // from class: io.druid.indexer.DetermineHashedPartitionsJob.DetermineCardinalityReducer.1
                }).writeValue(makePathAndOutputStream, Long.valueOf(new Double(makeLatestCollector.estimateCardinality()).longValue()));
                Closeables.close(makePathAndOutputStream, false);
            } catch (Throwable th) {
                Closeables.close(makePathAndOutputStream, false);
                throw th;
            }
        }

        public void run(Reducer<LongWritable, BytesWritable, NullWritable, NullWritable>.Context context) throws IOException, InterruptedException {
            super.run(context);
            if (this.config.getSegmentGranularIntervals().isPresent()) {
                return;
            }
            OutputStream makePathAndOutputStream = Utils.makePathAndOutputStream(context, this.config.makeIntervalInfoPath(), this.config.isOverwriteFiles());
            try {
                HadoopDruidIndexerConfig.jsonMapper.writerWithType(new TypeReference<List<Interval>>() { // from class: io.druid.indexer.DetermineHashedPartitionsJob.DetermineCardinalityReducer.2
                }).writeValue(makePathAndOutputStream, this.intervals);
                Closeables.close(makePathAndOutputStream, false);
            } catch (Throwable th) {
                Closeables.close(makePathAndOutputStream, false);
                throw th;
            }
        }

        protected /* bridge */ /* synthetic */ void reduce(Object obj, Iterable iterable, Reducer.Context context) throws IOException, InterruptedException {
            reduce((LongWritable) obj, (Iterable<BytesWritable>) iterable, (Reducer<LongWritable, BytesWritable, NullWritable, NullWritable>.Context) context);
        }
    }

    /* loaded from: input_file:io/druid/indexer/DetermineHashedPartitionsJob$DetermineHashedPartitionsPartitioner.class */
    public static class DetermineHashedPartitionsPartitioner extends Partitioner<LongWritable, BytesWritable> implements Configurable {
        private Configuration config;
        private boolean determineIntervals;
        private Map<LongWritable, Integer> reducerLookup;

        public int getPartition(LongWritable longWritable, BytesWritable bytesWritable, int i) {
            if (this.config.get("mapred.job.tracker").equals("local") || this.determineIntervals) {
                return 0;
            }
            return this.reducerLookup.get(longWritable).intValue();
        }

        public Configuration getConf() {
            return this.config;
        }

        public void setConf(Configuration configuration) {
            this.config = configuration;
            HadoopDruidIndexerConfig fromConfiguration = HadoopDruidIndexerConfig.fromConfiguration(configuration);
            if (!fromConfiguration.getSegmentGranularIntervals().isPresent()) {
                this.determineIntervals = true;
                return;
            }
            this.determineIntervals = false;
            int i = 0;
            ImmutableMap.Builder builder = ImmutableMap.builder();
            Iterator it = ((Set) fromConfiguration.getSegmentGranularIntervals().get()).iterator();
            while (it.hasNext()) {
                int i2 = i;
                i++;
                builder.put(new LongWritable(((Interval) it.next()).getStartMillis()), Integer.valueOf(i2));
            }
            this.reducerLookup = builder.build();
        }
    }

    public DetermineHashedPartitionsJob(HadoopDruidIndexerConfig hadoopDruidIndexerConfig) {
        this.config = hadoopDruidIndexerConfig;
    }

    public boolean run() {
        try {
            long currentTimeMillis = System.currentTimeMillis();
            Job job = Job.getInstance(new Configuration(), String.format("%s-determine_partitions_hashed-%s", this.config.getDataSource(), this.config.getIntervals()));
            JobHelper.injectSystemProperties(job);
            JobHelper.setInputFormat(job, this.config);
            job.setMapperClass(DetermineCardinalityMapper.class);
            job.setMapOutputKeyClass(LongWritable.class);
            job.setMapOutputValueClass(BytesWritable.class);
            job.setReducerClass(DetermineCardinalityReducer.class);
            job.setOutputKeyClass(NullWritable.class);
            job.setOutputValueClass(NullWritable.class);
            job.setOutputFormatClass(SequenceFileOutputFormat.class);
            job.setPartitionerClass(DetermineHashedPartitionsPartitioner.class);
            if (this.config.getSegmentGranularIntervals().isPresent()) {
                job.setNumReduceTasks(((Set) this.config.getSegmentGranularIntervals().get()).size());
            } else {
                job.setNumReduceTasks(1);
            }
            JobHelper.setupClasspath(JobHelper.distributedClassPath(this.config.getWorkingPath()), job);
            this.config.addInputPaths(job);
            this.config.addJobProperties(job);
            this.config.intoConfiguration(job);
            FileOutputFormat.setOutputPath(job, this.config.makeGroupedDataDir());
            job.submit();
            log.info("Job %s submitted, status available at: %s", new Object[]{job.getJobName(), job.getTrackingURL()});
            if (!job.waitForCompletion(true)) {
                log.error("Job failed: %s", new Object[]{job.getJobID()});
                return false;
            }
            log.info("Job completed, loading up partitions for intervals[%s].", new Object[]{this.config.getSegmentGranularIntervals()});
            FileSystem fileSystem = null;
            if (!this.config.getSegmentGranularIntervals().isPresent()) {
                Path makeIntervalInfoPath = this.config.makeIntervalInfoPath();
                fileSystem = makeIntervalInfoPath.getFileSystem(job.getConfiguration());
                if (!Utils.exists(job, fileSystem, makeIntervalInfoPath)) {
                    throw new ISE("Path[%s] didn't exist!?", new Object[]{makeIntervalInfoPath});
                }
                HadoopDruidIndexerConfig hadoopDruidIndexerConfig = this.config;
                this.config.setGranularitySpec(new UniformGranularitySpec(this.config.getGranularitySpec().getSegmentGranularity(), this.config.getGranularitySpec().getQueryGranularity(), (List) HadoopDruidIndexerConfig.jsonMapper.readValue(Utils.openInputStream(job, makeIntervalInfoPath), new TypeReference<List<Interval>>() { // from class: io.druid.indexer.DetermineHashedPartitionsJob.1
                })));
                log.info("Determined Intervals for Job [%s]" + this.config.getSegmentGranularIntervals(), new Object[0]);
            }
            TreeMap newTreeMap = Maps.newTreeMap(DateTimeComparator.getInstance());
            int i = 0;
            for (Interval interval : (Set) this.config.getSegmentGranularIntervals().get()) {
                DateTime start = interval.getStart();
                Path makeSegmentPartitionInfoPath = this.config.makeSegmentPartitionInfoPath(interval);
                if (fileSystem == null) {
                    fileSystem = makeSegmentPartitionInfoPath.getFileSystem(job.getConfiguration());
                }
                if (Utils.exists(job, fileSystem, makeSegmentPartitionInfoPath)) {
                    HadoopDruidIndexerConfig hadoopDruidIndexerConfig2 = this.config;
                    log.info("Found approximately [%,d] rows in data.", new Object[]{(Long) HadoopDruidIndexerConfig.jsonMapper.readValue(Utils.openInputStream(job, makeSegmentPartitionInfoPath), new TypeReference<Long>() { // from class: io.druid.indexer.DetermineHashedPartitionsJob.2
                    })});
                    int ceil = (int) Math.ceil(r0.longValue() / this.config.getTargetPartitionSize().longValue());
                    log.info("Creating [%,d] shards", new Object[]{Integer.valueOf(ceil)});
                    ArrayList newArrayListWithExpectedSize = Lists.newArrayListWithExpectedSize(ceil);
                    if (ceil == 1) {
                        int i2 = i;
                        i++;
                        newArrayListWithExpectedSize.add(new HadoopyShardSpec(new NoneShardSpec(), i2));
                    } else {
                        for (int i3 = 0; i3 < ceil; i3++) {
                            int i4 = i;
                            i++;
                            newArrayListWithExpectedSize.add(new HadoopyShardSpec(new HashBasedNumberedShardSpec(i3, ceil, HadoopDruidIndexerConfig.jsonMapper), i4));
                            log.info("DateTime[%s], partition[%d], spec[%s]", new Object[]{start, Integer.valueOf(i3), newArrayListWithExpectedSize.get(i3)});
                        }
                    }
                    newTreeMap.put(start, newArrayListWithExpectedSize);
                } else {
                    log.info("Path[%s] didn't exist!?", new Object[]{makeSegmentPartitionInfoPath});
                }
            }
            this.config.setShardSpecs(newTreeMap);
            log.info("DetermineHashedPartitionsJob took %d millis", new Object[]{Long.valueOf(System.currentTimeMillis() - currentTimeMillis)});
            return true;
        } catch (Exception e) {
            throw Throwables.propagate(e);
        }
    }
}
