package org.apache.spark.examples.mllib;

import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.examples.mllib.SampledRDDs;
import org.apache.spark.mllib.linalg.Vector;
import org.apache.spark.mllib.regression.LabeledPoint;
import org.apache.spark.mllib.util.MLUtils$;
import org.apache.spark.rdd.PairRDDFunctions;
import org.apache.spark.rdd.RDD;
import org.apache.spark.rdd.RDD$;
import scala.Predef$;
import scala.StringContext;
import scala.Tuple2;
import scala.collection.Iterable$;
import scala.collection.IterableLike;
import scala.collection.Map;
import scala.collection.TraversableOnce;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.StringBuilder;
import scala.math.Numeric$LongIsIntegral$;
import scala.math.Ordering$Int$;
import scala.reflect.ClassTag$;
import scala.runtime.BoxesRunTime;
import scopt.OptionParser;
import scopt.Read$;

/* compiled from: SampledRDDs.scala */
/* loaded from: input_file:org/apache/spark/examples/mllib/SampledRDDs$.class */
public final class SampledRDDs$ {
    public static final SampledRDDs$ MODULE$ = null;

    static {
        new SampledRDDs$();
    }

    public void main(String[] strArr) {
        final SampledRDDs.Params params = new SampledRDDs.Params(SampledRDDs$Params$.MODULE$.apply$default$1());
        new OptionParser<SampledRDDs.Params>(params) { // from class: org.apache.spark.examples.mllib.SampledRDDs$$anon$1
            {
                super("SampledRDDs");
                head(Predef$.MODULE$.wrapRefArray(new String[]{"SampledRDDs: an example app for randomly generated and sampled RDDs."}));
                opt("input", Read$.MODULE$.stringRead()).text(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Input path to labeled examples in LIBSVM format, default: ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{params.input()}))).action(new SampledRDDs$$anon$1$$anonfun$3(this));
                note(new StringOps(Predef$.MODULE$.augmentString("\n        |For example, the following command runs this app:\n        |\n        | bin/spark-submit --class org.apache.spark.examples.mllib.SampledRDDs \\\n        |  examples/target/scala-*/spark-examples-*.jar\n        ")).stripMargin());
            }
        }.parse(Predef$.MODULE$.wrapRefArray(strArr), params).map(new SampledRDDs$$anonfun$main$1()).getOrElse(new SampledRDDs$$anonfun$main$2());
    }

    public void run(SampledRDDs.Params params) {
        SparkContext sparkContext = new SparkContext(new SparkConf().setAppName(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"SampledRDDs with ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{params}))));
        RDD loadLibSVMFile = MLUtils$.MODULE$.loadLibSVMFile(sparkContext, params.input());
        long count = loadLibSVMFile.count();
        if (count == 0) {
            throw new RuntimeException("Error: Data file had no samples to load.");
        }
        Predef$.MODULE$.println(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Loaded data with ", " examples from file: ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToLong(count), params.input()})));
        int i = (int) (count * 0.1d);
        Predef$.MODULE$.println(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Sampling RDD using fraction ", ".  Expected sample size = ", "."})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToDouble(0.1d), BoxesRunTime.boxToInteger(i)})));
        Predef$.MODULE$.println(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"  RDD.sample(): sample has ", " examples"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToLong(loadLibSVMFile.sample(true, 0.1d, loadLibSVMFile.sample$default$3()).count())})));
        Predef$.MODULE$.println(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"  RDD.takeSample(): sample has ", " examples"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToInteger(((LabeledPoint[]) loadLibSVMFile.takeSample(true, i, loadLibSVMFile.takeSample$default$3())).length)})));
        Predef$.MODULE$.println();
        RDD map = loadLibSVMFile.map(new SampledRDDs$$anonfun$4(), ClassTag$.MODULE$.apply(Tuple2.class));
        Predef$.MODULE$.println(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"  Keyed data using label (Int) as key ==> Orig"})).s(Nil$.MODULE$));
        Map countByKey = RDD$.MODULE$.rddToPairRDDFunctions(map, ClassTag$.MODULE$.Int(), ClassTag$.MODULE$.apply(Vector.class), Ordering$Int$.MODULE$).countByKey();
        scala.collection.immutable.Map map2 = ((TraversableOnce) countByKey.keys().map(new SampledRDDs$$anonfun$5(0.1d), Iterable$.MODULE$.canBuildFrom())).toMap(Predef$.MODULE$.$conforms());
        PairRDDFunctions rddToPairRDDFunctions = RDD$.MODULE$.rddToPairRDDFunctions(map, ClassTag$.MODULE$.Int(), ClassTag$.MODULE$.apply(Vector.class), Ordering$Int$.MODULE$);
        Map countByKey2 = RDD$.MODULE$.rddToPairRDDFunctions(rddToPairRDDFunctions.sampleByKey(true, map2, rddToPairRDDFunctions.sampleByKey$default$3()), ClassTag$.MODULE$.Int(), ClassTag$.MODULE$.apply(Vector.class), Ordering$Int$.MODULE$).countByKey();
        long unboxToLong = BoxesRunTime.unboxToLong(countByKey2.values().sum(Numeric$LongIsIntegral$.MODULE$));
        Predef$.MODULE$.println(new StringBuilder().append(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"  Sampled ", " examples using approximate stratified sampling (by label)."})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToLong(unboxToLong)}))).append(" ==> Approx Sample").toString());
        PairRDDFunctions rddToPairRDDFunctions2 = RDD$.MODULE$.rddToPairRDDFunctions(map, ClassTag$.MODULE$.Int(), ClassTag$.MODULE$.apply(Vector.class), Ordering$Int$.MODULE$);
        Map countByKey3 = RDD$.MODULE$.rddToPairRDDFunctions(rddToPairRDDFunctions2.sampleByKeyExact(true, map2, rddToPairRDDFunctions2.sampleByKeyExact$default$3()), ClassTag$.MODULE$.Int(), ClassTag$.MODULE$.apply(Vector.class), Ordering$Int$.MODULE$).countByKey();
        long unboxToLong2 = BoxesRunTime.unboxToLong(countByKey3.values().sum(Numeric$LongIsIntegral$.MODULE$));
        Predef$.MODULE$.println(new StringBuilder().append(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"  Sampled ", " examples using exact stratified sampling (by label)."})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToLong(unboxToLong2)}))).append(" ==> Exact Sample").toString());
        Predef$.MODULE$.println(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"   \\tFractions of examples with key"})).s(Nil$.MODULE$));
        Predef$.MODULE$.println(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Key\\tOrig\\tApprox Sample\\tExact Sample"})).s(Nil$.MODULE$));
        ((IterableLike) countByKey.keys().toSeq().sorted(Ordering$Int$.MODULE$)).foreach(new SampledRDDs$$anonfun$run$1(count, countByKey, countByKey2, unboxToLong, countByKey3, unboxToLong2));
        sparkContext.stop();
    }

    private SampledRDDs$() {
        MODULE$ = this;
    }
}
