package io.druid.indexer;

import com.fasterxml.jackson.core.type.TypeReference;
import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.base.Optional;
import com.google.common.base.Splitter;
import com.google.common.base.Throwables;
import com.google.common.collect.ComparisonChain;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.PeekingIterator;
import com.google.common.io.Closeables;
import com.metamx.common.ISE;
import com.metamx.common.guava.nary.BinaryFn;
import com.metamx.common.logger.Logger;
import io.druid.collections.CombiningIterable;
import io.druid.data.input.InputRow;
import io.druid.data.input.Rows;
import io.druid.granularity.QueryGranularity;
import io.druid.indexer.partitions.SingleDimensionPartitionsSpec;
import io.druid.timeline.partition.NoneShardSpec;
import io.druid.timeline.partition.ShardSpec;
import io.druid.timeline.partition.SingleDimensionShardSpec;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeMap;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.InvalidJobConfException;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskInputOutputContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.joda.time.DateTime;
import org.joda.time.DateTimeComparator;
import org.joda.time.Interval;

/* loaded from: input_file:io/druid/indexer/DeterminePartitionsJob.class */
public class DeterminePartitionsJob implements Jobby {
    private static final Logger log = new Logger(DeterminePartitionsJob.class);
    private static final Joiner tabJoiner = HadoopDruidIndexerConfig.tabJoiner;
    private static final Splitter tabSplitter = HadoopDruidIndexerConfig.tabSplitter;
    private final HadoopDruidIndexerConfig config;

    /* loaded from: input_file:io/druid/indexer/DeterminePartitionsJob$DeterminePartitionsDimSelectionAssumeGroupedMapper.class */
    public static class DeterminePartitionsDimSelectionAssumeGroupedMapper extends HadoopDruidIndexerMapper<BytesWritable, Text> {
        private DeterminePartitionsDimSelectionMapperHelper helper;

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // io.druid.indexer.HadoopDruidIndexerMapper
        public void setup(Mapper<Writable, Writable, BytesWritable, Text>.Context context) throws IOException, InterruptedException {
            super.setup(context);
            HadoopDruidIndexerConfig fromConfiguration = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration());
            this.helper = new DeterminePartitionsDimSelectionMapperHelper(fromConfiguration, ((SingleDimensionPartitionsSpec) fromConfiguration.getPartitionsSpec()).getPartitionDimension());
        }

        @Override // io.druid.indexer.HadoopDruidIndexerMapper
        protected void innerMap(InputRow inputRow, Writable writable, Mapper<Writable, Writable, BytesWritable, Text>.Context context) throws IOException, InterruptedException {
            HashMap newHashMap = Maps.newHashMap();
            for (String str : inputRow.getDimensions()) {
                newHashMap.put(str, inputRow.getDimension(str));
            }
            this.helper.emitDimValueCounts(context, new DateTime(inputRow.getTimestampFromEpoch()), newHashMap);
        }
    }

    /* loaded from: input_file:io/druid/indexer/DeterminePartitionsJob$DeterminePartitionsDimSelectionBaseReducer.class */
    private static abstract class DeterminePartitionsDimSelectionBaseReducer extends Reducer<BytesWritable, Text, BytesWritable, Text> {
        protected volatile HadoopDruidIndexerConfig config;

        private DeterminePartitionsDimSelectionBaseReducer() {
            this.config = null;
        }

        protected void setup(Reducer<BytesWritable, Text, BytesWritable, Text>.Context context) throws IOException, InterruptedException {
            if (this.config == null) {
                synchronized (DeterminePartitionsDimSelectionBaseReducer.class) {
                    if (this.config == null) {
                        this.config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration());
                    }
                }
            }
        }

        protected void reduce(BytesWritable bytesWritable, Iterable<Text> iterable, Reducer<BytesWritable, Text, BytesWritable, Text>.Context context) throws IOException, InterruptedException {
            innerReduce(context, SortableBytes.fromBytesWritable(bytesWritable), combineRows(iterable));
        }

        protected abstract void innerReduce(Reducer<BytesWritable, Text, BytesWritable, Text>.Context context, SortableBytes sortableBytes, Iterable<DimValueCount> iterable) throws IOException, InterruptedException;

        private Iterable<DimValueCount> combineRows(Iterable<Text> iterable) {
            return new CombiningIterable(Iterables.transform(iterable, new Function<Text, DimValueCount>() { // from class: io.druid.indexer.DeterminePartitionsJob.DeterminePartitionsDimSelectionBaseReducer.1
                public DimValueCount apply(Text text) {
                    return DimValueCount.fromText(text);
                }
            }), new Comparator<DimValueCount>() { // from class: io.druid.indexer.DeterminePartitionsJob.DeterminePartitionsDimSelectionBaseReducer.2
                @Override // java.util.Comparator
                public int compare(DimValueCount dimValueCount, DimValueCount dimValueCount2) {
                    return ComparisonChain.start().compare(dimValueCount.dim, dimValueCount2.dim).compare(dimValueCount.value, dimValueCount2.value).result();
                }
            }, new BinaryFn<DimValueCount, DimValueCount, DimValueCount>() { // from class: io.druid.indexer.DeterminePartitionsJob.DeterminePartitionsDimSelectionBaseReducer.3
                public DimValueCount apply(DimValueCount dimValueCount, DimValueCount dimValueCount2) {
                    if (dimValueCount2 == null) {
                        return dimValueCount;
                    }
                    return new DimValueCount(dimValueCount.dim, dimValueCount.value, (dimValueCount.numRows < 0 || dimValueCount2.numRows < 0) ? -1 : dimValueCount.numRows + dimValueCount2.numRows);
                }
            });
        }

        protected /* bridge */ /* synthetic */ void reduce(Object obj, Iterable iterable, Reducer.Context context) throws IOException, InterruptedException {
            reduce((BytesWritable) obj, (Iterable<Text>) iterable, (Reducer<BytesWritable, Text, BytesWritable, Text>.Context) context);
        }
    }

    /* loaded from: input_file:io/druid/indexer/DeterminePartitionsJob$DeterminePartitionsDimSelectionCombiner.class */
    public static class DeterminePartitionsDimSelectionCombiner extends DeterminePartitionsDimSelectionBaseReducer {
        public DeterminePartitionsDimSelectionCombiner() {
            super();
        }

        @Override // io.druid.indexer.DeterminePartitionsJob.DeterminePartitionsDimSelectionBaseReducer
        protected void innerReduce(Reducer<BytesWritable, Text, BytesWritable, Text>.Context context, SortableBytes sortableBytes, Iterable<DimValueCount> iterable) throws IOException, InterruptedException {
            Iterator<DimValueCount> it = iterable.iterator();
            while (it.hasNext()) {
                DeterminePartitionsJob.write(context, sortableBytes.getGroupKey(), it.next());
            }
        }
    }

    /* loaded from: input_file:io/druid/indexer/DeterminePartitionsJob$DeterminePartitionsDimSelectionMapperHelper.class */
    public static class DeterminePartitionsDimSelectionMapperHelper {
        private final HadoopDruidIndexerConfig config;
        private final String partitionDimension;
        private final Map<DateTime, Integer> intervalIndexes;

        public DeterminePartitionsDimSelectionMapperHelper(HadoopDruidIndexerConfig hadoopDruidIndexerConfig, String str) {
            this.config = hadoopDruidIndexerConfig;
            this.partitionDimension = str;
            ImmutableMap.Builder builder = ImmutableMap.builder();
            int i = 0;
            Iterator it = ((SortedSet) hadoopDruidIndexerConfig.getGranularitySpec().bucketIntervals().get()).iterator();
            while (it.hasNext()) {
                builder.put(((Interval) it.next()).getStart(), Integer.valueOf(i));
                i++;
            }
            this.intervalIndexes = builder.build();
        }

        public void emitDimValueCounts(TaskInputOutputContext<? extends Writable, ? extends Writable, BytesWritable, Text> taskInputOutputContext, DateTime dateTime, Map<String, Iterable<String>> map) throws IOException, InterruptedException {
            Optional bucketInterval = this.config.getGranularitySpec().bucketInterval(dateTime);
            if (!bucketInterval.isPresent()) {
                throw new ISE("WTF?! No bucket found for timestamp: %s", new Object[]{dateTime});
            }
            Interval interval = (Interval) bucketInterval.get();
            int intValue = this.intervalIndexes.get(interval.getStart()).intValue();
            ByteBuffer allocate = ByteBuffer.allocate(12);
            allocate.putInt(intValue);
            allocate.putLong(interval.getStartMillis());
            byte[] array = allocate.array();
            DeterminePartitionsJob.write(taskInputOutputContext, array, new DimValueCount("", "", 1));
            for (Map.Entry<String, Iterable<String>> entry : map.entrySet()) {
                String key = entry.getKey();
                if (this.partitionDimension == null || this.partitionDimension.equals(key)) {
                    Iterable<String> value = entry.getValue();
                    if (Iterables.size(value) == 1) {
                        DeterminePartitionsJob.write(taskInputOutputContext, array, new DimValueCount(key, (String) Iterables.getOnlyElement(value), 1));
                    } else {
                        DeterminePartitionsJob.write(taskInputOutputContext, array, new DimValueCount(key, "", -1));
                    }
                }
            }
        }
    }

    /* loaded from: input_file:io/druid/indexer/DeterminePartitionsJob$DeterminePartitionsDimSelectionOutputFormat.class */
    public static class DeterminePartitionsDimSelectionOutputFormat extends FileOutputFormat {
        public RecordWriter getRecordWriter(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
            return new RecordWriter<SortableBytes, List<ShardSpec>>() { // from class: io.druid.indexer.DeterminePartitionsJob.DeterminePartitionsDimSelectionOutputFormat.1
                public void write(SortableBytes sortableBytes, List<ShardSpec> list) throws IOException, InterruptedException {
                }

                public void close(TaskAttemptContext taskAttemptContext2) throws IOException, InterruptedException {
                }
            };
        }

        public void checkOutputSpecs(JobContext jobContext) throws IOException {
            if (getOutputPath(jobContext) == null) {
                throw new InvalidJobConfException("Output directory not set.");
            }
        }
    }

    /* loaded from: input_file:io/druid/indexer/DeterminePartitionsJob$DeterminePartitionsDimSelectionPartitioner.class */
    public static class DeterminePartitionsDimSelectionPartitioner extends Partitioner<BytesWritable, Text> implements Configurable {
        private Configuration config;

        public int getPartition(BytesWritable bytesWritable, Text text, int i) {
            ByteBuffer wrap = ByteBuffer.wrap(bytesWritable.getBytes());
            wrap.position(4);
            int i2 = wrap.getInt();
            if (this.config.get("mapred.job.tracker").equals("local")) {
                return i2 % i;
            }
            if (i2 >= i) {
                throw new ISE("Not enough partitions, index[%,d] >= numPartitions[%,d]. Please increase the number of reducers to the index size or check your config & settings!", new Object[]{Integer.valueOf(i2), Integer.valueOf(i)});
            }
            return i2;
        }

        public Configuration getConf() {
            return this.config;
        }

        public void setConf(Configuration configuration) {
            this.config = configuration;
        }
    }

    /* loaded from: input_file:io/druid/indexer/DeterminePartitionsJob$DeterminePartitionsDimSelectionPostGroupByMapper.class */
    public static class DeterminePartitionsDimSelectionPostGroupByMapper extends Mapper<BytesWritable, NullWritable, BytesWritable, Text> {
        private DeterminePartitionsDimSelectionMapperHelper helper;

        protected void setup(Mapper<BytesWritable, NullWritable, BytesWritable, Text>.Context context) throws IOException, InterruptedException {
            HadoopDruidIndexerConfig fromConfiguration = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration());
            this.helper = new DeterminePartitionsDimSelectionMapperHelper(fromConfiguration, ((SingleDimensionPartitionsSpec) fromConfiguration.getPartitionsSpec()).getPartitionDimension());
        }

        protected void map(BytesWritable bytesWritable, NullWritable nullWritable, Mapper<BytesWritable, NullWritable, BytesWritable, Text>.Context context) throws IOException, InterruptedException {
            List list = (List) HadoopDruidIndexerConfig.jsonMapper.readValue(bytesWritable.getBytes(), List.class);
            this.helper.emitDimValueCounts(context, new DateTime(list.get(0)), (Map) list.get(1));
        }

        protected /* bridge */ /* synthetic */ void map(Object obj, Object obj2, Mapper.Context context) throws IOException, InterruptedException {
            map((BytesWritable) obj, (NullWritable) obj2, (Mapper<BytesWritable, NullWritable, BytesWritable, Text>.Context) context);
        }
    }

    /* loaded from: input_file:io/druid/indexer/DeterminePartitionsJob$DeterminePartitionsDimSelectionReducer.class */
    public static class DeterminePartitionsDimSelectionReducer extends DeterminePartitionsDimSelectionBaseReducer {
        private static final double SHARD_COMBINE_THRESHOLD = 0.25d;
        private static final int HIGH_CARDINALITY_THRESHOLD = 3000000;

        public DeterminePartitionsDimSelectionReducer() {
            super();
        }

        @Override // io.druid.indexer.DeterminePartitionsJob.DeterminePartitionsDimSelectionBaseReducer
        protected void innerReduce(Reducer<BytesWritable, Text, BytesWritable, Text>.Context context, SortableBytes sortableBytes, Iterable<DimValueCount> iterable) throws IOException, InterruptedException {
            NoneShardSpec singleDimensionShardSpec;
            ByteBuffer wrap = ByteBuffer.wrap(sortableBytes.getGroupKey());
            wrap.position(4);
            DateTime dateTime = new DateTime(wrap.getLong());
            PeekingIterator peekingIterator = Iterators.peekingIterator(iterable.iterator());
            DeterminePartitionsJob.log.info("Determining partitions for interval: %s", new Object[]{this.config.getGranularitySpec().bucketInterval(dateTime).orNull()});
            DimValueCount dimValueCount = (DimValueCount) peekingIterator.next();
            int i = dimValueCount.numRows;
            if (!dimValueCount.dim.equals("") || !dimValueCount.value.equals("")) {
                throw new IllegalStateException("WTF?! Expected total row indicator on first k/v pair!");
            }
            DimPartitions dimPartitions = null;
            DimPartition dimPartition = null;
            String str = null;
            boolean z = false;
            HashMap newHashMap = Maps.newHashMap();
            while (peekingIterator.hasNext()) {
                DimValueCount dimValueCount2 = (DimValueCount) peekingIterator.next();
                if (dimPartitions == null || !dimPartitions.dim.equals(dimValueCount2.dim)) {
                    dimPartitions = new DimPartitions(dimValueCount2.dim);
                    dimPartition = new DimPartition();
                    str = null;
                    z = false;
                }
                if (!z && dimValueCount2.numRows < 0) {
                    DeterminePartitionsJob.log.info("Cannot partition on multi-valued dimension: %s", new Object[]{dimValueCount2.dim});
                    z = true;
                }
                if (!z) {
                    if (dimPartition.rows > 0 && dimPartition.rows + dimValueCount2.numRows >= this.config.getTargetPartitionSize().longValue()) {
                        SingleDimensionShardSpec singleDimensionShardSpec2 = new SingleDimensionShardSpec(dimPartitions.dim, str, dimValueCount2.value, dimPartitions.partitions.size());
                        DeterminePartitionsJob.log.info("Adding possible shard with %,d rows and %,d unique values: %s", new Object[]{Integer.valueOf(dimPartition.rows), Integer.valueOf(dimPartition.cardinality), singleDimensionShardSpec2});
                        dimPartition.shardSpec = singleDimensionShardSpec2;
                        dimPartitions.partitions.add(dimPartition);
                        dimPartition = new DimPartition();
                        str = dimValueCount2.value;
                    }
                    dimPartition.cardinality++;
                    dimPartition.rows += dimValueCount2.numRows;
                    if (!peekingIterator.hasNext() || !dimPartitions.dim.equals(((DimValueCount) peekingIterator.peek()).dim)) {
                        if (dimPartition.rows > 0) {
                            if (dimPartitions.partitions.isEmpty()) {
                                singleDimensionShardSpec = new NoneShardSpec();
                            } else if (dimPartition.rows < this.config.getTargetPartitionSize().longValue() * SHARD_COMBINE_THRESHOLD) {
                                DimPartition remove = dimPartitions.partitions.remove(dimPartitions.partitions.size() - 1);
                                SingleDimensionShardSpec singleDimensionShardSpec3 = remove.shardSpec;
                                singleDimensionShardSpec = new SingleDimensionShardSpec(dimPartitions.dim, singleDimensionShardSpec3.getStart(), (String) null, singleDimensionShardSpec3.getPartitionNum());
                                DeterminePartitionsJob.log.info("Removing possible shard: %s", new Object[]{singleDimensionShardSpec3});
                                dimPartition.rows += remove.rows;
                                dimPartition.cardinality += remove.cardinality;
                            } else {
                                singleDimensionShardSpec = new SingleDimensionShardSpec(dimPartitions.dim, str, (String) null, dimPartitions.partitions.size());
                            }
                            DeterminePartitionsJob.log.info("Adding possible shard with %,d rows and %,d unique values: %s", new Object[]{Integer.valueOf(dimPartition.rows), Integer.valueOf(dimPartition.cardinality), singleDimensionShardSpec});
                            dimPartition.shardSpec = singleDimensionShardSpec;
                            dimPartitions.partitions.add(dimPartition);
                        }
                        DeterminePartitionsJob.log.info("Completed dimension[%s]: %,d possible shards with %,d unique values", new Object[]{dimPartitions.dim, Integer.valueOf(dimPartitions.partitions.size()), Integer.valueOf(dimPartitions.getCardinality())});
                        newHashMap.put(dimPartitions.dim, dimPartitions);
                    }
                }
            }
            if (newHashMap.isEmpty()) {
                throw new ISE("No suitable partitioning dimension found!", new Object[0]);
            }
            int i2 = Integer.MIN_VALUE;
            long j = Long.MAX_VALUE;
            DimPartitions dimPartitions2 = null;
            DimPartitions dimPartitions3 = null;
            for (DimPartitions dimPartitions4 : newHashMap.values()) {
                if (dimPartitions4.getRows() != i) {
                    DeterminePartitionsJob.log.info("Dimension[%s] is not present in all rows (row count %,d != expected row count %,d)", new Object[]{dimPartitions4.dim, Integer.valueOf(dimPartitions4.getRows()), Integer.valueOf(i)});
                } else {
                    boolean z2 = false;
                    for (DimPartition dimPartition2 : dimPartitions4.partitions) {
                        if (dimPartition2.rows > this.config.getMaxPartitionSize()) {
                            DeterminePartitionsJob.log.info("Dimension[%s] has an oversized shard: %s", new Object[]{dimPartitions4.dim, dimPartition2.shardSpec});
                            z2 = true;
                        }
                    }
                    if (!z2) {
                        int cardinality = dimPartitions4.getCardinality();
                        long distanceSquaredFromTarget = dimPartitions4.getDistanceSquaredFromTarget(this.config.getTargetPartitionSize().longValue());
                        if (cardinality > i2) {
                            i2 = cardinality;
                            dimPartitions3 = dimPartitions4;
                        }
                        if (distanceSquaredFromTarget < j) {
                            j = distanceSquaredFromTarget;
                            dimPartitions2 = dimPartitions4;
                        }
                    }
                }
            }
            if (dimPartitions3 == null) {
                throw new ISE("No suitable partitioning dimension found!", new Object[0]);
            }
            OutputStream makePathAndOutputStream = Utils.makePathAndOutputStream(context, this.config.makeSegmentPartitionInfoPath((Interval) this.config.getGranularitySpec().bucketInterval(dateTime).get()), this.config.isOverwriteFiles());
            List transform = Lists.transform((i2 > HIGH_CARDINALITY_THRESHOLD ? dimPartitions3 : dimPartitions2).partitions, new Function<DimPartition, ShardSpec>() { // from class: io.druid.indexer.DeterminePartitionsJob.DeterminePartitionsDimSelectionReducer.1
                public ShardSpec apply(DimPartition dimPartition3) {
                    return dimPartition3.shardSpec;
                }
            });
            DeterminePartitionsJob.log.info("Chosen partitions:", new Object[0]);
            Iterator it = transform.iterator();
            while (it.hasNext()) {
                DeterminePartitionsJob.log.info("  %s", new Object[]{HadoopDruidIndexerConfig.jsonMapper.writeValueAsString((ShardSpec) it.next())});
            }
            try {
                HadoopDruidIndexerConfig.jsonMapper.writerWithType(new TypeReference<List<ShardSpec>>() { // from class: io.druid.indexer.DeterminePartitionsJob.DeterminePartitionsDimSelectionReducer.2
                }).writeValue(makePathAndOutputStream, transform);
                Closeables.close(makePathAndOutputStream, false);
            } catch (Throwable th) {
                Closeables.close(makePathAndOutputStream, false);
                throw th;
            }
        }
    }

    /* loaded from: input_file:io/druid/indexer/DeterminePartitionsJob$DeterminePartitionsGroupByMapper.class */
    public static class DeterminePartitionsGroupByMapper extends HadoopDruidIndexerMapper<BytesWritable, NullWritable> {
        private QueryGranularity rollupGranularity = null;

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // io.druid.indexer.HadoopDruidIndexerMapper
        public void setup(Mapper<Writable, Writable, BytesWritable, NullWritable>.Context context) throws IOException, InterruptedException {
            super.setup(context);
            this.rollupGranularity = getConfig().getGranularitySpec().getQueryGranularity();
        }

        @Override // io.druid.indexer.HadoopDruidIndexerMapper
        protected void innerMap(InputRow inputRow, Writable writable, Mapper<Writable, Writable, BytesWritable, NullWritable>.Context context) throws IOException, InterruptedException {
            context.write(new BytesWritable(HadoopDruidIndexerConfig.jsonMapper.writeValueAsBytes(Rows.toGroupKey(this.rollupGranularity.truncate(inputRow.getTimestampFromEpoch()), inputRow))), NullWritable.get());
        }
    }

    /* loaded from: input_file:io/druid/indexer/DeterminePartitionsJob$DeterminePartitionsGroupByReducer.class */
    public static class DeterminePartitionsGroupByReducer extends Reducer<BytesWritable, NullWritable, BytesWritable, NullWritable> {
        protected void reduce(BytesWritable bytesWritable, Iterable<NullWritable> iterable, Reducer<BytesWritable, NullWritable, BytesWritable, NullWritable>.Context context) throws IOException, InterruptedException {
            context.write(bytesWritable, NullWritable.get());
        }

        protected /* bridge */ /* synthetic */ void reduce(Object obj, Iterable iterable, Reducer.Context context) throws IOException, InterruptedException {
            reduce((BytesWritable) obj, (Iterable<NullWritable>) iterable, (Reducer<BytesWritable, NullWritable, BytesWritable, NullWritable>.Context) context);
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:io/druid/indexer/DeterminePartitionsJob$DimPartition.class */
    public static class DimPartition {
        public ShardSpec shardSpec;
        public int cardinality;
        public int rows;

        private DimPartition() {
            this.shardSpec = null;
            this.cardinality = 0;
            this.rows = 0;
        }
    }

    /* loaded from: input_file:io/druid/indexer/DeterminePartitionsJob$DimPartitions.class */
    private static class DimPartitions {
        public final String dim;
        public final List<DimPartition> partitions;

        private DimPartitions(String str) {
            this.partitions = Lists.newArrayList();
            this.dim = str;
        }

        public int getCardinality() {
            int i = 0;
            Iterator<DimPartition> it = this.partitions.iterator();
            while (it.hasNext()) {
                i += it.next().cardinality;
            }
            return i;
        }

        public long getDistanceSquaredFromTarget(long j) {
            long j2 = 0;
            for (DimPartition dimPartition : this.partitions) {
                j2 += (dimPartition.rows - j) * (dimPartition.rows - j);
            }
            return j2 / this.partitions.size();
        }

        public int getRows() {
            int i = 0;
            Iterator<DimPartition> it = this.partitions.iterator();
            while (it.hasNext()) {
                i += it.next().rows;
            }
            return i;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:io/druid/indexer/DeterminePartitionsJob$DimValueCount.class */
    public static class DimValueCount {
        public final String dim;
        public final String value;
        public final int numRows;

        private DimValueCount(String str, String str2, int i) {
            this.dim = str;
            this.value = str2;
            this.numRows = i;
        }

        public Text toText() {
            return new Text(DeterminePartitionsJob.tabJoiner.join(this.dim, String.valueOf(this.numRows), new Object[]{this.value}));
        }

        public static DimValueCount fromText(Text text) {
            Iterator it = DeterminePartitionsJob.tabSplitter.limit(3).split(text.toString()).iterator();
            return new DimValueCount((String) it.next(), (String) it.next(), Integer.parseInt((String) it.next()));
        }
    }

    public DeterminePartitionsJob(HadoopDruidIndexerConfig hadoopDruidIndexerConfig) {
        this.config = hadoopDruidIndexerConfig;
    }

    public boolean run() {
        try {
            if (!(this.config.getPartitionsSpec() instanceof SingleDimensionPartitionsSpec)) {
                throw new ISE("DeterminePartitionsJob can only be run for SingleDimensionPartitionsSpec, partitionSpec found [%s]", new Object[]{this.config.getPartitionsSpec()});
            }
            if (this.config.getPartitionsSpec().isAssumeGrouped()) {
                log.info("Skipping group-by job.", new Object[0]);
            } else {
                Job job = Job.getInstance(new Configuration(), String.format("%s-determine_partitions_groupby-%s", this.config.getDataSource(), this.config.getIntervals()));
                JobHelper.injectSystemProperties(job);
                JobHelper.setInputFormat(job, this.config);
                job.setMapperClass(DeterminePartitionsGroupByMapper.class);
                job.setMapOutputKeyClass(BytesWritable.class);
                job.setMapOutputValueClass(NullWritable.class);
                job.setCombinerClass(DeterminePartitionsGroupByReducer.class);
                job.setReducerClass(DeterminePartitionsGroupByReducer.class);
                job.setOutputKeyClass(BytesWritable.class);
                job.setOutputValueClass(NullWritable.class);
                job.setOutputFormatClass(SequenceFileOutputFormat.class);
                JobHelper.setupClasspath(JobHelper.distributedClassPath(this.config.getWorkingPath()), job);
                this.config.addInputPaths(job);
                this.config.addJobProperties(job);
                this.config.intoConfiguration(job);
                FileOutputFormat.setOutputPath(job, this.config.makeGroupedDataDir());
                job.submit();
                log.info("Job %s submitted, status available at: %s", new Object[]{job.getJobName(), job.getTrackingURL()});
                if (!job.waitForCompletion(true)) {
                    log.error("Job failed: %s", new Object[]{job.getJobID()});
                    return false;
                }
            }
            Job job2 = Job.getInstance(new Configuration(), String.format("%s-determine_partitions_dimselection-%s", this.config.getDataSource(), this.config.getIntervals()));
            job2.getConfiguration().set("io.sort.record.percent", "0.19");
            JobHelper.injectSystemProperties(job2);
            if (this.config.getPartitionsSpec().isAssumeGrouped()) {
                job2.setMapperClass(DeterminePartitionsDimSelectionAssumeGroupedMapper.class);
                JobHelper.setInputFormat(job2, this.config);
                this.config.addInputPaths(job2);
            } else {
                job2.setMapperClass(DeterminePartitionsDimSelectionPostGroupByMapper.class);
                job2.setInputFormatClass(SequenceFileInputFormat.class);
                FileInputFormat.addInputPath(job2, this.config.makeGroupedDataDir());
            }
            SortableBytes.useSortableBytesAsMapOutputKey(job2);
            job2.setMapOutputValueClass(Text.class);
            job2.setCombinerClass(DeterminePartitionsDimSelectionCombiner.class);
            job2.setReducerClass(DeterminePartitionsDimSelectionReducer.class);
            job2.setOutputKeyClass(BytesWritable.class);
            job2.setOutputValueClass(Text.class);
            job2.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class);
            job2.setPartitionerClass(DeterminePartitionsDimSelectionPartitioner.class);
            job2.setNumReduceTasks(((SortedSet) this.config.getGranularitySpec().bucketIntervals().get()).size());
            JobHelper.setupClasspath(JobHelper.distributedClassPath(this.config.getWorkingPath()), job2);
            this.config.addJobProperties(job2);
            this.config.intoConfiguration(job2);
            FileOutputFormat.setOutputPath(job2, this.config.makeIntermediatePath());
            job2.submit();
            log.info("Job %s submitted, status available at: %s", new Object[]{job2.getJobName(), job2.getTrackingURL()});
            if (!job2.waitForCompletion(true)) {
                log.error("Job failed: %s", new Object[]{job2.getJobID().toString()});
                return false;
            }
            log.info("Job completed, loading up partitions for intervals[%s].", new Object[]{this.config.getSegmentGranularIntervals()});
            FileSystem fileSystem = null;
            TreeMap newTreeMap = Maps.newTreeMap(DateTimeComparator.getInstance());
            int i = 0;
            for (Interval interval : (Set) this.config.getSegmentGranularIntervals().get()) {
                Path makeSegmentPartitionInfoPath = this.config.makeSegmentPartitionInfoPath(interval);
                if (fileSystem == null) {
                    fileSystem = makeSegmentPartitionInfoPath.getFileSystem(job2.getConfiguration());
                }
                if (Utils.exists(job2, fileSystem, makeSegmentPartitionInfoPath)) {
                    HadoopDruidIndexerConfig hadoopDruidIndexerConfig = this.config;
                    List list = (List) HadoopDruidIndexerConfig.jsonMapper.readValue(Utils.openInputStream(job2, makeSegmentPartitionInfoPath), new TypeReference<List<ShardSpec>>() { // from class: io.druid.indexer.DeterminePartitionsJob.1
                    });
                    ArrayList newArrayListWithExpectedSize = Lists.newArrayListWithExpectedSize(list.size());
                    for (int i2 = 0; i2 < list.size(); i2++) {
                        int i3 = i;
                        i++;
                        newArrayListWithExpectedSize.add(new HadoopyShardSpec((ShardSpec) list.get(i2), i3));
                        log.info("DateTime[%s], partition[%d], spec[%s]", new Object[]{interval, Integer.valueOf(i2), newArrayListWithExpectedSize.get(i2)});
                    }
                    newTreeMap.put(interval.getStart(), newArrayListWithExpectedSize);
                } else {
                    log.info("Path[%s] didn't exist!?", new Object[]{makeSegmentPartitionInfoPath});
                }
            }
            this.config.setShardSpecs(newTreeMap);
            return true;
        } catch (Exception e) {
            throw Throwables.propagate(e);
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static void write(TaskInputOutputContext<? extends Writable, ? extends Writable, BytesWritable, Text> taskInputOutputContext, byte[] bArr, DimValueCount dimValueCount) throws IOException, InterruptedException {
        taskInputOutputContext.write(new SortableBytes(bArr, tabJoiner.join(dimValueCount.dim, dimValueCount.value, new Object[0]).getBytes(HadoopDruidIndexerConfig.javaNativeCharset)).toBytesWritable(), dimValueCount.toText());
    }
}
