package org.apache.spark.ml.odkl;

import java.io.IOException;
import org.apache.spark.annotation.DeveloperApi;
import org.apache.spark.ml.Transformer;
import org.apache.spark.ml.linalg.VectorUDT;
import org.apache.spark.ml.odkl.HasGroupByColumns;
import org.apache.spark.ml.param.DoubleArrayParam;
import org.apache.spark.ml.param.Param;
import org.apache.spark.ml.param.ParamMap;
import org.apache.spark.ml.param.ParamPair;
import org.apache.spark.ml.param.Params;
import org.apache.spark.ml.param.StringArrayParam;
import org.apache.spark.ml.param.shared.HasInputCol;
import org.apache.spark.ml.util.DefaultParamsWritable;
import org.apache.spark.ml.util.Identifiable$;
import org.apache.spark.ml.util.MLWritable;
import org.apache.spark.ml.util.MLWriter;
import org.apache.spark.rdd.RDD;
import org.apache.spark.rdd.RDD$;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.LongType$;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import scala.Array$;
import scala.Predef$;
import scala.Predef$ArrowAssoc$;
import scala.StringContext;
import scala.Tuple2;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.immutable.Map$;
import scala.collection.immutable.Nil$;
import scala.math.Ordering;
import scala.reflect.ClassTag;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxesRunTime;

/* compiled from: VectorStatCollector.scala */
@ScalaSignature(bytes = "\u0006\u0001\u0005ud\u0001B\u0001\u0003\u00015\u00111CV3di>\u00148\u000b^1u\u0007>dG.Z2u_JT!a\u0001\u0003\u0002\t=$7\u000e\u001c\u0006\u0003\u000b\u0019\t!!\u001c7\u000b\u0005\u001dA\u0011!B:qCJ\\'BA\u0005\u000b\u0003\u0019\t\u0007/Y2iK*\t1\"A\u0002pe\u001e\u001c\u0001aE\u0003\u0001\u001dIQb\u0004\u0005\u0002\u0010!5\tA!\u0003\u0002\u0012\t\tYAK]1og\u001a|'/\\3s!\t\u0019\u0002$D\u0001\u0015\u0015\t)b#\u0001\u0004tQ\u0006\u0014X\r\u001a\u0006\u0003/\u0011\tQ\u0001]1sC6L!!\u0007\u000b\u0003\u0017!\u000b7/\u00138qkR\u001cu\u000e\u001c\t\u00037qi\u0011AA\u0005\u0003;\t\u0011\u0011\u0003S1t\u000fJ|W\u000f\u001d\"z\u0007>dW/\u001c8t!\ty\"%D\u0001!\u0015\t\tC!\u0001\u0003vi&d\u0017BA\u0012!\u0005U!UMZ1vYR\u0004\u0016M]1ng^\u0013\u0018\u000e^1cY\u0016D\u0001\"\n\u0001\u0003\u0006\u0004%\tEJ\u0001\u0004k&$W#A\u0014\u0011\u0005!rcBA\u0015-\u001b\u0005Q#\"A\u0016\u0002\u000bM\u001c\u0017\r\\1\n\u00055R\u0013A\u0002)sK\u0012,g-\u0003\u00020a\t11\u000b\u001e:j]\u001eT!!\f\u0016\t\u0011I\u0002!\u0011!Q\u0001\n\u001d\nA!^5eA!)A\u0007\u0001C\u0001k\u00051A(\u001b8jiz\"\"AN\u001c\u0011\u0005m\u0001\u0001\"B\u00134\u0001\u00049\u0003bB\u001d\u0001\u0005\u0004%\tAO\u0001\fa\u0016\u00148-\u001a8uS2,7/F\u0001<!\taT(D\u0001\u0017\u0013\tqdC\u0001\tE_V\u0014G.Z!se\u0006L\b+\u0019:b[\"1\u0001\t\u0001Q\u0001\nm\nA\u0002]3sG\u0016tG/\u001b7fg\u0002BqA\u0011\u0001C\u0002\u0013\u00051)\u0001\u0006eS6,gn]5p]N,\u0012\u0001\u0012\t\u0004y\u0015;\u0015B\u0001$\u0017\u0005\u0015\u0001\u0016M]1n!\tI\u0003*\u0003\u0002JU\t\u0019\u0011J\u001c;\t\r-\u0003\u0001\u0015!\u0003E\u0003-!\u0017.\\3og&|gn\u001d\u0011\t\u000f5\u0003!\u0019!C\u0001\u0007\u0006Y1m\\7qe\u0016\u001c8/[8o\u0011\u0019y\u0005\u0001)A\u0005\t\u0006a1m\\7qe\u0016\u001c8/[8oA!9\u0011\u000b\u0001b\u0001\n\u0003\u0019\u0015!\u00048v[B\u000b'\u000f^5uS>t7\u000f\u0003\u0004T\u0001\u0001\u0006I\u0001R\u0001\u000f]Vl\u0007+\u0019:uSRLwN\\:!\u0011\u001d)\u0006A1A\u0005\u0002\r\u000bAC\\;n'\",hM\u001a7f!\u0006\u0014H/\u001b;j_:\u001c\bBB,\u0001A\u0003%A)A\u000bok6\u001c\u0006.\u001e4gY\u0016\u0004\u0016M\u001d;ji&|gn\u001d\u0011\t\u000be\u0003A\u0011\u0001.\u0002\u0017M,G/\u00138qkR\u001cu\u000e\u001c\u000b\u00037rk\u0011\u0001\u0001\u0005\u0006;b\u0003\raJ\u0001\u0007G>dW/\u001c8\t\u000b}\u0003A\u0011\u00011\u0002!M,GOT;n!\u0006\u0014H/\u001b;j_:\u001cHCA.b\u0011\u0015\u0011g\f1\u0001H\u0003\u00151\u0018\r\\;f\u0011\u0015!\u0007\u0001\"\u0001f\u0003]\u0019X\r\u001e(v[NCWO\u001a4mKB\u000b'\u000f^5uS>t7\u000f\u0006\u0002\\M\")!m\u0019a\u0001\u000f\")\u0001\u000e\u0001C\u0001S\u0006q1/\u001a;D_6\u0004(/Z:tS>tGCA.k\u0011\u0015\u0011w\r1\u0001H\u0011\u0015a\u0007\u0001\"\u0001n\u00035\u0019X\r\u001e#j[\u0016t7/[8ogR\u00111L\u001c\u0005\u0006E.\u0004\ra\u0012\u0005\u0006a\u0002!\t!]\u0001\u000fg\u0016$\b+\u001a:dK:$\u0018\u000e\\3t)\tY&\u000fC\u0003c_\u0002\u00071\u000fE\u0002*iZL!!\u001e\u0016\u0003\u000b\u0005\u0013(/Y=\u0011\u0005%:\u0018B\u0001=+\u0005\u0019!u.\u001e2mK\")A\u0007\u0001C\u0001uR\ta\u0007C\u0003}\u0001\u0011\u0005S0A\u0005ue\u0006t7OZ8s[R\u0019a0!\n\u0011\u0007}\fyB\u0004\u0003\u0002\u0002\u0005ea\u0002BA\u0002\u0003+qA!!\u0002\u0002\u00149!\u0011qAA\t\u001d\u0011\tI!a\u0004\u000e\u0005\u0005-!bAA\u0007\u0019\u00051AH]8pizJ\u0011aC\u0005\u0003\u0013)I!a\u0002\u0005\n\u0007\u0005]a!A\u0002tc2LA!a\u0007\u0002\u001e\u00059\u0001/Y2lC\u001e,'bAA\f\r%!\u0011\u0011EA\u0012\u0005%!\u0015\r^1Ge\u0006lWM\u0003\u0003\u0002\u001c\u0005u\u0001bBA\u0014w\u0002\u0007\u0011\u0011F\u0001\bI\u0006$\u0018m]3ua\u0011\tY#a\u000e\u0011\r\u00055\u0012qFA\u001a\u001b\t\ti\"\u0003\u0003\u00022\u0005u!a\u0002#bi\u0006\u001cX\r\u001e\t\u0005\u0003k\t9\u0004\u0004\u0001\u0005\u0019\u0005e\u0012QEA\u0001\u0002\u0003\u0015\t!a\u000f\u0003\u0007}#\u0013'\u0005\u0003\u0002>\u0005\r\u0003cA\u0015\u0002@%\u0019\u0011\u0011\t\u0016\u0003\u000f9{G\u000f[5oOB\u0019\u0011&!\u0012\n\u0007\u0005\u001d#FA\u0002B]fDq!a\u0013\u0001\t\u0003\ni%\u0001\u0003d_BLHc\u0001\b\u0002P!A\u0011\u0011KA%\u0001\u0004\t\u0019&A\u0003fqR\u0014\u0018\rE\u0002=\u0003+J1!a\u0016\u0017\u0005!\u0001\u0016M]1n\u001b\u0006\u0004\bbBA.\u0001\u0011\u0005\u0013QL\u0001\u0010iJ\fgn\u001d4pe6\u001c6\r[3nCR!\u0011qLA6!\u0011\t\t'a\u001a\u000e\u0005\u0005\r$\u0002BA3\u0003;\tQ\u0001^=qKNLA!!\u001b\u0002d\tQ1\u000b\u001e:vGR$\u0016\u0010]3\t\u0011\u00055\u0014\u0011\fa\u0001\u0003?\naa]2iK6\f\u0007\u0006BA-\u0003c\u0002B!a\u001d\u0002z5\u0011\u0011Q\u000f\u0006\u0004\u0003o2\u0011AC1o]>$\u0018\r^5p]&!\u00111PA;\u00051!UM^3m_B,'/\u00119j\u0001")
/* loaded from: input_file:org/apache/spark/ml/odkl/VectorStatCollector.class */
public class VectorStatCollector extends Transformer implements HasInputCol, HasGroupByColumns, DefaultParamsWritable {
    private final String uid;
    private final DoubleArrayParam percentiles;
    private final Param<Object> dimensions;
    private final Param<Object> compression;
    private final Param<Object> numPartitions;
    private final Param<Object> numShufflePartitions;
    private final StringArrayParam groupByColumns;
    private final Param<String> inputCol;

    public MLWriter write() {
        return DefaultParamsWritable.class.write(this);
    }

    public void save(String str) throws IOException {
        MLWritable.class.save(this, str);
    }

    @Override // org.apache.spark.ml.odkl.HasGroupByColumns
    public final StringArrayParam groupByColumns() {
        return this.groupByColumns;
    }

    @Override // org.apache.spark.ml.odkl.HasGroupByColumns
    public final void org$apache$spark$ml$odkl$HasGroupByColumns$_setter_$groupByColumns_$eq(StringArrayParam stringArrayParam) {
        this.groupByColumns = stringArrayParam;
    }

    @Override // org.apache.spark.ml.odkl.HasGroupByColumns
    public HasGroupByColumns setGroupByColumns(Seq<String> seq) {
        return HasGroupByColumns.Cclass.setGroupByColumns(this, seq);
    }

    public final Param<String> inputCol() {
        return this.inputCol;
    }

    public final void org$apache$spark$ml$param$shared$HasInputCol$_setter_$inputCol_$eq(Param param) {
        this.inputCol = param;
    }

    public final String getInputCol() {
        return HasInputCol.class.getInputCol(this);
    }

    public String uid() {
        return this.uid;
    }

    public DoubleArrayParam percentiles() {
        return this.percentiles;
    }

    public Param<Object> dimensions() {
        return this.dimensions;
    }

    public Param<Object> compression() {
        return this.compression;
    }

    public Param<Object> numPartitions() {
        return this.numPartitions;
    }

    public Param<Object> numShufflePartitions() {
        return this.numShufflePartitions;
    }

    public VectorStatCollector setInputCol(String str) {
        return set(inputCol(), str);
    }

    public VectorStatCollector setNumPartitions(int i) {
        return set(numPartitions(), BoxesRunTime.boxToInteger(i));
    }

    public VectorStatCollector setNumShufflePartitions(int i) {
        return set(numShufflePartitions(), BoxesRunTime.boxToInteger(i));
    }

    public VectorStatCollector setCompression(int i) {
        return set(compression(), BoxesRunTime.boxToInteger(i));
    }

    public VectorStatCollector setDimensions(int i) {
        return set(dimensions(), BoxesRunTime.boxToInteger(i));
    }

    public VectorStatCollector setPercentiles(double[] dArr) {
        return set(percentiles(), dArr);
    }

    public Dataset<Row> transform(Dataset<?> dataset) {
        RDD rdd;
        int unboxToInt = BoxesRunTime.unboxToInt(get(dimensions()).getOrElse(new VectorStatCollector$$anonfun$1(this, dataset)));
        int unboxToInt2 = BoxesRunTime.unboxToInt($(compression()));
        int unboxToInt3 = BoxesRunTime.unboxToInt(get(numPartitions()).getOrElse(new VectorStatCollector$$anonfun$2(this, dataset)));
        boolean z = isDefined(groupByColumns()) && !Predef$.MODULE$.refArrayOps((Object[]) $(groupByColumns())).isEmpty();
        RDD rdd2 = dataset.toDF().select(Predef$.MODULE$.wrapRefArray(new Column[]{z ? functions$.MODULE$.struct(Predef$.MODULE$.wrapRefArray((Object[]) Predef$.MODULE$.refArrayOps((Object[]) $(groupByColumns())).map(new VectorStatCollector$$anonfun$3(this, dataset), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Column.class))))) : functions$.MODULE$.struct(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.lit(BoxesRunTime.boxToInteger(1)).as("lit")})), dataset.apply((String) $(inputCol()))})).rdd();
        RDD mapPartitions = rdd2.mapPartitions(new VectorStatCollector$$anonfun$4(this, unboxToInt, unboxToInt2), rdd2.mapPartitions$default$2(), ClassTag$.MODULE$.apply(Tuple2.class));
        if (isDefined(numShufflePartitions())) {
            int unboxToInt4 = BoxesRunTime.unboxToInt($(numShufflePartitions()));
            rdd = mapPartitions.repartition(unboxToInt4, mapPartitions.repartition$default$2(unboxToInt4));
        } else {
            rdd = mapPartitions;
        }
        RDD rdd3 = rdd;
        SQLContext sqlContext = dataset.sqlContext();
        ClassTag apply = ClassTag$.MODULE$.apply(Row.class);
        ClassTag apply2 = ClassTag$.MODULE$.apply(ExtendedMultivariateOnlineSummarizer.class);
        RDD$.MODULE$.rddToPairRDDFunctions$default$4(rdd3);
        return sqlContext.createDataFrame(RDD$.MODULE$.rddToPairRDDFunctions(rdd3, apply, apply2, (Ordering) null).reduceByKey(new VectorStatCollector$$anonfun$transform$1(this), unboxToInt3).map(new VectorStatCollector$$anonfun$transform$2(this, z), ClassTag$.MODULE$.apply(Row.class)), transformSchema(dataset.schema()));
    }

    /* renamed from: copy, reason: merged with bridge method [inline-methods] and merged with bridge method [inline-methods] */
    public Transformer m499copy(ParamMap paramMap) {
        return defaultCopy(paramMap);
    }

    @DeveloperApi
    public StructType transformSchema(StructType structType) {
        Predef$.MODULE$.Map().apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(""), BoxesRunTime.boxToDouble(0.0d))})).map(new VectorStatCollector$$anonfun$transformSchema$1(this), Map$.MODULE$.canBuildFrom());
        logInfo(new VectorStatCollector$$anonfun$transformSchema$2(this, structType));
        return new StructType((StructField[]) Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps((Object[]) $(groupByColumns())).map(new VectorStatCollector$$anonfun$transformSchema$3(this, structType), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(StructField.class)))).$plus$plus(Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new StructField[]{new StructField(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"", "_count"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{$(inputCol())})), LongType$.MODULE$, false, structType.apply((String) $(inputCol())).metadata()), new StructField(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"", "_mean"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{$(inputCol())})), new VectorUDT(), false, structType.apply((String) $(inputCol())).metadata()), new StructField(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"", "_var"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{$(inputCol())})), new VectorUDT(), false, structType.apply((String) $(inputCol())).metadata()), new StructField(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"", "_min"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{$(inputCol())})), new VectorUDT(), false, structType.apply((String) $(inputCol())).metadata()), new StructField(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"", "_max"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{$(inputCol())})), new VectorUDT(), false, structType.apply((String) $(inputCol())).metadata()), new StructField(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"", "_nonZeros"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{$(inputCol())})), new VectorUDT(), false, structType.apply((String) $(inputCol())).metadata()), new StructField(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"", "_L1"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{$(inputCol())})), new VectorUDT(), false, structType.apply((String) $(inputCol())).metadata()), new StructField(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"", "_L2"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{$(inputCol())})), new VectorUDT(), false, structType.apply((String) $(inputCol())).metadata())})), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(StructField.class)))).$plus$plus(Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.doubleArrayOps((double[]) $(percentiles())).map(new VectorStatCollector$$anonfun$transformSchema$4(this, structType), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(StructField.class)))), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(StructField.class))));
    }

    public VectorStatCollector(String str) {
        this.uid = str;
        HasInputCol.class.$init$(this);
        org$apache$spark$ml$odkl$HasGroupByColumns$_setter_$groupByColumns_$eq(new StringArrayParam((Params) this, "groupByColumns", "Grouping criteria for the evaluation."));
        MLWritable.class.$init$(this);
        DefaultParamsWritable.class.$init$(this);
        this.percentiles = new DoubleArrayParam(this, "percentiles", "Percentiles to calculate for the vectors.");
        this.dimensions = new Param<>(this, "dimensions", "Dimensionality of vectors to aggregate. Taken from metadata if not provided.");
        this.compression = new Param<>(this, "compression", "How should accuracy be traded for size?  A value of N here will give quantile errors almost always less than 3/N with considerably smaller errors expected for extreme quantiles.  Conversely, you should expect to track about 5 N centroids for this accuracy.");
        this.numPartitions = new Param<>(this, "numPartitions", "Number of partitions for final result.");
        this.numShufflePartitions = new Param<>(this, "numShufflePartitions", "Number of partitions used for intermediate shuffle. In case if there are only a few keys in the result this could improve performance by adding an intermediate combiner.");
        setDefault(Predef$.MODULE$.wrapRefArray(new ParamPair[]{groupByColumns().$minus$greater(Array$.MODULE$.apply(Nil$.MODULE$, ClassTag$.MODULE$.apply(String.class))), percentiles().$minus$greater(new double[]{0.1d, 0.5d, 0.9d}), compression().$minus$greater(BoxesRunTime.boxToInteger(150))}));
    }

    public VectorStatCollector() {
        this(Identifiable$.MODULE$.randomUID("vectorsStatCollector"));
    }
}
