/*
 * Decompiled with CFR 0.152.
 */
package org.clulab.learning;

import java.io.Serializable;
import java.util.concurrent.ForkJoinPool;
import org.clulab.learning.BVFDataset;
import org.clulab.learning.Classifier;
import org.clulab.learning.Dataset;
import org.clulab.learning.DatasetFold;
import org.clulab.learning.Datasets;
import org.clulab.learning.FeatureTraversable;
import org.clulab.learning.RVFDataset;
import org.clulab.learning.RankingDataset;
import org.clulab.learning.ScaleRange;
import org.clulab.struct.Counter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Array$;
import scala.Double$;
import scala.Function0;
import scala.Function1;
import scala.MatchError;
import scala.Option;
import scala.Predef;
import scala.Predef$;
import scala.Some;
import scala.StringContext;
import scala.Tuple2;
import scala.collection.Iterable;
import scala.collection.Parallelizable;
import scala.collection.Seq;
import scala.collection.TraversableOnce;
import scala.collection.generic.CanBuildFrom;
import scala.collection.immutable.List;
import scala.collection.immutable.Map;
import scala.collection.immutable.Set;
import scala.collection.mutable.ArrayBuffer;
import scala.collection.mutable.ArrayOps;
import scala.collection.mutable.HashMap;
import scala.collection.mutable.HashSet;
import scala.collection.mutable.ListBuffer;
import scala.collection.parallel.ForkJoinTaskSupport;
import scala.collection.parallel.ParIterableLike;
import scala.collection.parallel.TaskSupport;
import scala.collection.parallel.immutable.ParSet;
import scala.collection.parallel.immutable.ParSet$;
import scala.math.package$;
import scala.reflect.ClassTag$;
import scala.runtime.BooleanRef;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.DoubleRef;
import scala.runtime.IntRef;
import scala.runtime.ObjectRef;
import scala.runtime.RichInt$;
import scala.runtime.java8.JFunction1;

public final class Datasets$ {
    public static Datasets$ MODULE$;
    private final Logger logger;

    static {
        new Datasets$();
    }

    public Logger logger() {
        return this.logger;
    }

    public Iterable<DatasetFold> mkFolds(int numFolds, int size) {
        int foldSize = size / numFolds;
        ArrayBuffer folds = new ArrayBuffer();
        RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), numFolds).foreach((Function1 & Serializable & scala.Serializable)i -> Datasets$.$anonfun$mkFolds$1(numFolds, size, foldSize, folds, BoxesRunTime.unboxToInt((Object)i)));
        return folds.toList();
    }

    public <F> int[] mkTrainIndices(int datasetSize, Option<Iterable<Tuple2<Object, Object>>> spans) {
        ArrayBuffer indices = new ArrayBuffer();
        Iterable trainFolds = (Iterable)spans.getOrElse((Function0 & Serializable & scala.Serializable)() -> MODULE$.mkFullFold(datasetSize));
        trainFolds.foreach((Function1 & Serializable & scala.Serializable)fold -> {
            Datasets$.$anonfun$mkTrainIndices$2(indices, fold);
            return BoxedUnit.UNIT;
        });
        return (int[])indices.toArray(ClassTag$.MODULE$.Int());
    }

    private Iterable<Tuple2<Object, Object>> mkFullFold(int size) {
        Tuple2[] folds = new Tuple2[]{new Tuple2.mcII.sp(0, size)};
        return Predef$.MODULE$.wrapRefArray((Object[])folds);
    }

    public <L, F> ScaleRange<F> svmScaleDataset(Dataset<L, F> dataset, double lower, double upper) {
        ScaleRange<F> scaleRange;
        Dataset<L, F> dataset2 = dataset;
        if (dataset2 instanceof RVFDataset) {
            scaleRange = this.svmScaleRVFDataset((RVFDataset)dataset, lower, upper);
        } else if (dataset2 instanceof BVFDataset) {
            scaleRange = this.svmScaleBVFDataset((BVFDataset)dataset, lower, upper);
        } else {
            throw new RuntimeException("ERROR: unknown dataset type in svmScale!");
        }
        return scaleRange;
    }

    public <L, F> double svmScaleDataset$default$2() {
        return -1.0;
    }

    public <L, F> double svmScaleDataset$default$3() {
        return 1.0;
    }

    public <F> Counter<F> svmScaleDatum(Counter<F> features, ScaleRange<F> ranges, double lower, double upper) {
        Predef$.MODULE$.assert(ranges != null);
        Predef$.MODULE$.assert(features != null);
        Counter scaledFeatures = new Counter();
        features.keySet().foreach((Function1 & Serializable & scala.Serializable)f -> {
            Datasets$.$anonfun$svmScaleDatum$1(features, ranges, lower, upper, scaledFeatures, f);
            return BoxedUnit.UNIT;
        });
        return scaledFeatures;
    }

    public <F> double svmScaleDatum$default$3() {
        return -1.0;
    }

    public <F> double svmScaleDatum$default$4() {
        return 1.0;
    }

    public <L, F> ScaleRange<F> svmScaleRankingDataset(RankingDataset<F> dataset, double lower, double upper) {
        ScaleRange<F> scaleRange;
        try {
            scaleRange = this.svmScaleFeatureTraversable((FeatureTraversable)((Object)dataset), lower, upper);
        }
        catch (ClassCastException e) {
            throw new RuntimeException("Feature traverser not implemented! " + e.getMessage());
        }
        return scaleRange;
    }

    public <L, F> double svmScaleRankingDataset$default$2() {
        return -1.0;
    }

    public <L, F> double svmScaleRankingDataset$default$3() {
        return 1.0;
    }

    public <F> ScaleRange<F> svmScaleFeatureTraversable(FeatureTraversable<F, Object> dataset, double lower, double upper) {
        ScaleRange ranges = new ScaleRange();
        dataset.featureUpdater().foreach((Function1 & Serializable & scala.Serializable)x0$1 -> {
            Datasets$.$anonfun$svmScaleFeatureTraversable$1(ranges, x0$1);
            return BoxedUnit.UNIT;
        });
        dataset.featureUpdater().updateAll((Function1 & Serializable & scala.Serializable)x0$2 -> BoxesRunTime.boxToDouble((double)Datasets$.$anonfun$svmScaleFeatureTraversable$2(lower, upper, ranges, x0$2)));
        return ranges;
    }

    public <L, F> ScaleRange<F> svmScaleBVFDataset(BVFDataset<L, F> dataset, double lower, double upper) {
        throw new RuntimeException("ERROR: scaling of BVF datasets not implemented yet!");
    }

    public <L, F> ScaleRange<F> svmScaleRVFDataset(RVFDataset<L, F> dataset, double lower, double upper) {
        ScaleRange ranges = new ScaleRange();
        RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), dataset.size()).foreach$mVc$sp((Function1)(JFunction1.mcVI.sp & Serializable & scala.Serializable)i -> new ArrayOps.ofInt(Predef$.MODULE$.intArrayOps((int[])dataset.features().apply(i))).indices().foreach$mVc$sp((Function1)(JFunction1.mcVI.sp & Serializable & scala.Serializable)j -> {
            int fi = ((int[])dataset.features().apply(i))[j];
            double v = ((double[])dataset.values().apply(i))[j];
            Object f = dataset.featureLexicon().get(fi);
            ranges.update(f, v);
        }));
        RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), dataset.size()).foreach$mVc$sp((Function1)(JFunction1.mcVI.sp & Serializable & scala.Serializable)i -> new ArrayOps.ofInt(Predef$.MODULE$.intArrayOps((int[])dataset.features().apply(i))).indices().foreach$mVc$sp((Function1)(JFunction1.mcVI.sp & Serializable & scala.Serializable)j -> {
            int fi = ((int[])dataset.features().apply(i))[j];
            double v = ((double[])dataset.values().apply(i))[j];
            Object f = dataset.featureLexicon().get(fi);
            ((double[])dataset$4.values().apply((int)i$2))[j] = MODULE$.scale(v, ranges.min(f), ranges.max(f), lower, upper);
        }));
        return ranges;
    }

    private double scale(double value, double min, double max, double lower, double upper) {
        if (min == max) {
            return upper;
        }
        return lower + (upper - lower) * (value - min) / (max - min);
    }

    public <L, F> Set<String> incrementalFeatureSelection(Dataset<L, F> dataset, Function0<Classifier<L, F>> classifierFactory, Function1<Iterable<Tuple2<L, L>>, Object> scoringMetric, Map<String, Set<Object>> featureGroups, int numFolds, int nCores) {
        Iterable<Tuple2<L, L>> datasetOutput = this.crossValidate(dataset, classifierFactory, numFolds);
        double datasetScore = BoxesRunTime.unboxToDouble((Object)scoringMetric.apply(datasetOutput));
        this.logger().info(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"Iteration #0: Score using ALL features is ", "."})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{BoxesRunTime.boxToDouble((double)datasetScore)})));
        this.logger().info(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"Iteration #0: Using ", " feature groups and ", " features."})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{BoxesRunTime.boxToInteger((int)featureGroups.size()), BoxesRunTime.boxToInteger((int)dataset.featureLexicon().size())})));
        HashSet chosenGroups = new HashSet();
        HashSet allBetterChosenGroups = new HashSet();
        HashSet chosenFeatures = new HashSet();
        DoubleRef bestScore = DoubleRef.create((double)Double$.MODULE$.MinValue());
        IntRef iteration = IntRef.create((int)1);
        boolean meatLeftOnTheBone = true;
        while (meatLeftOnTheBone) {
            ObjectRef bestGroup = ObjectRef.create(null);
            ObjectRef bestFeatures = ObjectRef.create(null);
            ParSet workingGroups = (ParSet)((Parallelizable)featureGroups.keySet().filter((Function1 & Serializable & scala.Serializable)x$1 -> BoxesRunTime.boxToBoolean((boolean)Datasets$.$anonfun$incrementalFeatureSelection$1(chosenGroups, x$1)))).par();
            workingGroups.tasksupport_$eq((TaskSupport)new ForkJoinTaskSupport(new ForkJoinPool(nCores)));
            List scores = ((ParIterableLike)workingGroups.map((Function1 & Serializable & scala.Serializable)x$2 -> MODULE$.scoreGroup((String)x$2, featureGroups, (HashSet<Object>)chosenFeatures, dataset, classifierFactory, numFolds, scoringMetric), (CanBuildFrom)ParSet$.MODULE$.canBuildFrom())).toList();
            scores.foreach((Function1 & Serializable & scala.Serializable)gs -> {
                Object object;
                String group = (String)gs._1();
                double score = gs._2$mcD$sp();
                if (score > bestScore$1.elem) {
                    bestScore$1.elem = score;
                    bestGroup$1.elem = group;
                    bestFeatures$1.elem = (Set)featureGroups.apply((Object)group);
                    MODULE$.logger().debug(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"Iteration #", ": found new best group [", "] with score ", "."})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{BoxesRunTime.boxToInteger((int)iteration$1.elem), (String)bestGroup$1.elem, BoxesRunTime.boxToDouble((double)bestScore$1.elem)})));
                    object = iteration$1.elem > 1 ? allBetterChosenGroups.$plus$eq((Object)((String)bestGroup$1.elem)) : BoxedUnit.UNIT;
                } else {
                    object = BoxedUnit.UNIT;
                }
                return object;
            });
            if ((String)bestGroup.elem == null) {
                meatLeftOnTheBone = false;
                this.logger().info(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"Iteration #", ": no better group found. Search complete."})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{BoxesRunTime.boxToInteger((int)iteration.elem)})));
            } else {
                this.logger().info(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"Iteration #", ": best group found is [", "] with score ", "."})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{BoxesRunTime.boxToInteger((int)iteration.elem), (String)bestGroup.elem, BoxesRunTime.boxToDouble((double)bestScore.elem)})));
                chosenGroups.$plus$eq((Object)((String)bestGroup.elem));
                chosenFeatures.$plus$plus$eq((TraversableOnce)((Set)bestFeatures.elem));
                this.logger().info(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"Iteration #", ": we now have ", " chosen groups and ", " chosen features."})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{BoxesRunTime.boxToInteger((int)iteration.elem), BoxesRunTime.boxToInteger((int)chosenGroups.size()), BoxesRunTime.boxToInteger((int)chosenFeatures.size())})));
            }
            ++iteration.elem;
        }
        this.logger().info(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"Iteration #", ": process ended with score ", " using ", " chosen groups and ", " chosen features."})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{BoxesRunTime.boxToInteger((int)iteration.elem), BoxesRunTime.boxToDouble((double)bestScore.elem), BoxesRunTime.boxToInteger((int)chosenGroups.size()), BoxesRunTime.boxToInteger((int)chosenFeatures.size())})));
        this.logger().info(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"Found ", " better groups: ", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{BoxesRunTime.boxToInteger((int)allBetterChosenGroups.size()), allBetterChosenGroups.toSet()})));
        return chosenGroups.toSet();
    }

    public <L, F> int incrementalFeatureSelection$default$5() {
        return 5;
    }

    public <L, F> int incrementalFeatureSelection$default$6() {
        return 8;
    }

    public <L, F> Set<Object> featureSelectionByInformativeness(Dataset<L, F> dataset, Function0<Classifier<L, F>> classifierFactory, Function1<Iterable<Tuple2<L, L>>, Object> scoringMetric, int minFreq, int numFolds, int step) {
        Iterable<Tuple2<L, L>> datasetOutput = this.crossValidate(dataset, classifierFactory, numFolds);
        double datasetScore = BoxesRunTime.unboxToDouble((Object)scoringMetric.apply(datasetOutput));
        this.logger().info(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"Score using ALL features is ", "."})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{BoxesRunTime.boxToDouble((double)datasetScore)})));
        Tuple2[] features = (Tuple2[])this.sortFeaturesByInformativeness(dataset, minFreq).sorted().toArray(ClassTag$.MODULE$.apply(Tuple2.class));
        this.logger().debug("Top 20 most informative features:");
        RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), package$.MODULE$.min(20, features.length)).foreach$mVc$sp((Function1)(JFunction1.mcVI.sp & Serializable & scala.Serializable)i -> MODULE$.logger().debug(Predef.any2stringadd$.MODULE$.$plus$extension(Predef$.MODULE$.any2stringadd(dataset.featureLexicon().get(features[i]._1$mcI$sp())), "\t") + features[i]._2$mcD$sp()));
        double bestScore = Double$.MODULE$.MinValue();
        int bestCut = 0;
        int cut = package$.MODULE$.min(step, features.length);
        boolean meatLeftOnTheBone = true;
        while (cut <= features.length && meatLeftOnTheBone) {
            Dataset<L, F> smallDataset = dataset.keepOnly((Set<Object>)new ArrayOps.ofInt(Predef$.MODULE$.intArrayOps((int[])new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])features)).slice(0, cut))).map((Function1 & Serializable & scala.Serializable)x$3 -> BoxesRunTime.boxToInteger((int)x$3._1$mcI$sp()), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.Int())))).toSet());
            Iterable<Tuple2<L, L>> output = this.crossValidate(smallDataset, classifierFactory, numFolds);
            double score = BoxesRunTime.unboxToDouble((Object)scoringMetric.apply(output));
            if (score > bestScore) {
                bestScore = score;
                bestCut = cut;
                this.logger().debug(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"Found better cut at ", " with score ", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{BoxesRunTime.boxToInteger((int)bestCut), BoxesRunTime.boxToDouble((double)bestScore)})));
            } else {
                meatLeftOnTheBone = false;
            }
            cut = package$.MODULE$.min(features.length, cut + step);
        }
        this.logger().info(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"Cutting features at ", " out of ", "."})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{BoxesRunTime.boxToInteger((int)bestCut), BoxesRunTime.boxToInteger((int)features.length)})));
        return new ArrayOps.ofInt(Predef$.MODULE$.intArrayOps((int[])new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])features)).slice(0, bestCut))).map((Function1 & Serializable & scala.Serializable)x$4 -> BoxesRunTime.boxToInteger((int)x$4._1$mcI$sp()), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.Int())))).toSet();
    }

    public <L, F> int featureSelectionByInformativeness$default$4() {
        return 10;
    }

    public <L, F> int featureSelectionByInformativeness$default$5() {
        return 5;
    }

    public <L, F> int featureSelectionByInformativeness$default$6() {
        return 1000;
    }

    public <L, F> Set<Object> featureSelectionByFrequency(Dataset<L, F> dataset, Function0<Classifier<L, F>> classifierFactory, Function1<Iterable<Tuple2<L, L>>, Object> scoringMetric, int numFolds) {
        Iterable<Tuple2<L, L>> datasetOutput = this.crossValidate(dataset, classifierFactory, numFolds);
        double datasetScore = BoxesRunTime.unboxToDouble((Object)scoringMetric.apply(datasetOutput));
        this.logger().info(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"Score using ALL features is ", "."})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{BoxesRunTime.boxToDouble((double)datasetScore)})));
        Counter<Object> features = this.sortFeaturesByFrequency(dataset);
        DoubleRef bestScore = DoubleRef.create((double)datasetScore);
        IntRef bestCut = IntRef.create((int)0);
        BooleanRef meatLeftOnTheBone = BooleanRef.create((boolean)true);
        RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(1), 100).withFilter((Function1)(JFunction1.mcZI.sp & Serializable & scala.Serializable)t -> meatLeftOnTheBone$1.elem).foreach((Function1)(JFunction1.mcVI.sp & Serializable & scala.Serializable)t -> {
            Set<Object> smallFeats = MODULE$.keepMoreFrequent(features, t);
            if (smallFeats.isEmpty()) {
                meatLeftOnTheBone$1.elem = false;
            } else {
                Dataset smallDataset = dataset.keepOnly(smallFeats);
                Iterable output = MODULE$.crossValidate(smallDataset, classifierFactory, numFolds);
                double score = BoxesRunTime.unboxToDouble((Object)scoringMetric.apply(output));
                if (score > bestScore$2.elem) {
                    bestScore$2.elem = score;
                    bestCut$1.elem = t;
                    MODULE$.logger().debug(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"Found better frequency cutoff at ", " with score ", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{BoxesRunTime.boxToInteger((int)bestCut$1.elem), BoxesRunTime.boxToDouble((double)bestScore$2.elem)})));
                } else {
                    meatLeftOnTheBone$1.elem = false;
                }
            }
        });
        this.logger().info(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"Cutting features at ", " out of ", "."})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{BoxesRunTime.boxToInteger((int)bestCut.elem), BoxesRunTime.boxToInteger((int)features.size())})));
        return this.keepMoreFrequent(features, bestCut.elem);
    }

    public <L, F> int featureSelectionByFrequency$default$4() {
        return 5;
    }

    public Set<Object> keepMoreFrequent(Counter<Object> features, double threshold) {
        HashSet s = new HashSet();
        features.keySet().foreach((Function1 & Serializable & scala.Serializable)f -> Datasets$.$anonfun$keepMoreFrequent$1(features, threshold, s, BoxesRunTime.unboxToInt((Object)f)));
        return s.toSet();
    }

    public <L, F> Tuple2<String, Object> scoreGroup(String group, Map<String, Set<Object>> featureGroups, HashSet<Object> chosenFeatures, Dataset<L, F> dataset, Function0<Classifier<L, F>> classifierFactory, int numFolds, Function1<Iterable<Tuple2<L, L>>, Object> scoringMetric) {
        HashSet currentFeatures = new HashSet();
        currentFeatures.$plus$plus$eq(chosenFeatures);
        currentFeatures.$plus$plus$eq((TraversableOnce)featureGroups.apply((Object)group));
        double score = this.scoreFeatures(dataset, (HashSet<Object>)currentFeatures, classifierFactory, numFolds, scoringMetric);
        return new Tuple2((Object)group, (Object)BoxesRunTime.boxToDouble((double)score));
    }

    public <L, F> double scoreFeatures(Dataset<L, F> dataset, HashSet<Object> features, Function0<Classifier<L, F>> classifierFactory, int numFolds, Function1<Iterable<Tuple2<L, L>>, Object> scoringMetric) {
        Dataset<L, F> filteredDataset = dataset.keepOnly((Set<Object>)features.toSet());
        Iterable<Tuple2<L, L>> output = this.crossValidate(filteredDataset, classifierFactory, numFolds);
        double score = BoxesRunTime.unboxToDouble((Object)scoringMetric.apply(output));
        return score;
    }

    public <L, F> Iterable<Tuple2<L, L>> crossValidate(Dataset<L, F> dataset, Function0<Classifier<L, F>> classifierFactory, int numFolds) {
        Iterable<DatasetFold> folds = this.mkFolds(numFolds, dataset.size());
        ListBuffer output = new ListBuffer();
        folds.foreach((Function1 & Serializable & scala.Serializable)fold -> {
            Datasets$.$anonfun$crossValidate$1(dataset, classifierFactory, output, fold);
            return BoxedUnit.UNIT;
        });
        return output.toList();
    }

    public <L, F> int crossValidate$default$3() {
        return 5;
    }

    /*
     * WARNING - void declaration
     */
    public <L, F> Counter<Object> sortFeaturesByFrequency(Dataset<L, F> dataset) {
        void var2_2;
        Counter featCounts = new Counter();
        RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), dataset.size()).foreach$mVc$sp((Function1)(JFunction1.mcVI.sp & Serializable & scala.Serializable)row -> {
            Counter<Object> fs = dataset.featuresCounter(row);
            fs.keySet().foreach((Function1)(JFunction1.mcDI.sp & Serializable & scala.Serializable)f -> featCounts.incrementCount(BoxesRunTime.boxToInteger((int)f), featCounts.incrementCount$default$2()));
        });
        return var2_2;
    }

    public <L, F> Counter<Object> sortFeaturesByInformativeness(Dataset<L, F> dataset, int minFreq) {
        Counter featCounts = new Counter();
        RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), dataset.size()).foreach$mVc$sp((Function1)(JFunction1.mcVI.sp & Serializable & scala.Serializable)row -> {
            Counter<Object> fs = dataset.featuresCounter(row);
            fs.keySet().foreach((Function1)(JFunction1.mcDI.sp & Serializable & scala.Serializable)f -> featCounts.incrementCount(BoxesRunTime.boxToInteger((int)f), featCounts.incrementCount$default$2()));
        });
        HashSet frequentFeatures = new HashSet();
        featCounts.keySet().foreach((Function1 & Serializable & scala.Serializable)f -> Datasets$.$anonfun$sortFeaturesByInformativeness$3(minFreq, featCounts, frequentFeatures, BoxesRunTime.unboxToInt((Object)f)));
        this.logger().info(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"Using ", " out of ", " features with count > ", "."})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{BoxesRunTime.boxToInteger((int)frequentFeatures.size()), BoxesRunTime.boxToInteger((int)dataset.featureLexicon().size()), BoxesRunTime.boxToInteger((int)minFreq)})));
        Counter rowsWithTerm = new Counter();
        Counter rowsWithoutTerm = new Counter();
        HashMap labelsWithTerm = new HashMap();
        HashMap labelsWithoutTerm = new HashMap();
        RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), dataset.size()).foreach$mVc$sp((Function1)(JFunction1.mcVI.sp & Serializable & scala.Serializable)row -> {
            block0: {
                int l = BoxesRunTime.unboxToInt((Object)dataset.labels().apply(row));
                Counter<Object> fs = dataset.featuresCounter(row);
                fs.keySet().foreach((Function1)(JFunction1.mcDI.sp & Serializable & scala.Serializable)f -> {
                    rowsWithTerm.incrementCount(BoxesRunTime.boxToInteger((int)f), rowsWithTerm.incrementCount$default$2());
                    Object object = !labelsWithTerm.contains((Object)BoxesRunTime.boxToInteger((int)f)) ? labelsWithTerm.put((Object)BoxesRunTime.boxToInteger((int)f), new Counter()) : BoxedUnit.UNIT;
                    Counter qual$1 = (Counter)labelsWithTerm.apply((Object)BoxesRunTime.boxToInteger((int)f));
                    int x$5 = l;
                    double x$6 = qual$1.incrementCount$default$2();
                    return qual$1.incrementCount(BoxesRunTime.boxToInteger((int)x$5), x$6);
                });
                frequentFeatures.foreach((Function1 & Serializable & scala.Serializable)nf -> Datasets$.$anonfun$sortFeaturesByInformativeness$6(rowsWithoutTerm, labelsWithoutTerm, l, fs, BoxesRunTime.unboxToInt((Object)nf)));
                if (row % 100 != 0) break block0;
                MODULE$.logger().debug(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"Processed ", " datums out of ", "."})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{BoxesRunTime.boxToInteger((int)row), BoxesRunTime.boxToInteger((int)dataset.size())})));
            }
        });
        Counter<Object> c = new Counter<Object>();
        IntRef fc = IntRef.create((int)0);
        frequentFeatures.foreach((Function1)(JFunction1.mcVI.sp & Serializable & scala.Serializable)fi -> {
            block0: {
                c.setCount(BoxesRunTime.boxToInteger((int)fi), MODULE$.informationGain(rowsWithTerm.getCount(BoxesRunTime.boxToInteger((int)fi)), rowsWithoutTerm.getCount(BoxesRunTime.boxToInteger((int)fi)), (Counter)labelsWithTerm.getOrElse((Object)BoxesRunTime.boxToInteger((int)fi), (Function0 & Serializable & scala.Serializable)() -> new Counter()), (Counter)labelsWithoutTerm.getOrElse((Object)BoxesRunTime.boxToInteger((int)fi), (Function0 & Serializable & scala.Serializable)() -> new Counter()), dataset.size(), dataset.labelLexicon().size()));
                ++fc$1.elem;
                if (fc$1.elem % 100 != 0) break block0;
                MODULE$.logger().debug(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"Processed ", " out of ", " features."})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{BoxesRunTime.boxToInteger((int)fc$1.elem), BoxesRunTime.boxToInteger((int)frequentFeatures.size())})));
            }
        });
        return c;
    }

    public <L, F> double informationGain(double rowsWithTerm, double rowsWithoutTerm, Counter<Object> labelsWithTerm, Counter<Object> labelsWithoutTerm, int ND, int NL) {
        DoubleRef probWithTerm = DoubleRef.create((double)0.0);
        DoubleRef probWithoutTerm = DoubleRef.create((double)0.0);
        RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), NL).foreach$mVc$sp((Function1)(JFunction1.mcVI.sp & Serializable & scala.Serializable)l -> {
            double probWith = labelsWithTerm.getCount(BoxesRunTime.boxToInteger((int)l)) / rowsWithTerm;
            probWithTerm$1.elem += probWith * package$.MODULE$.log(probWith);
            double probWithout = labelsWithoutTerm.getCount(BoxesRunTime.boxToInteger((int)l)) / rowsWithoutTerm;
            probWithoutTerm$1.elem += probWithout * package$.MODULE$.log(probWithout);
        });
        double ig = probWithTerm.elem * rowsWithTerm / (double)ND + probWithoutTerm.elem * rowsWithoutTerm / (double)ND;
        return ig;
    }

    public static final /* synthetic */ ArrayBuffer $anonfun$mkFolds$1(int numFolds$3, int size$1, int foldSize$1, ArrayBuffer folds$1, int i) {
        int startTest = i * foldSize$1;
        int endTest = (i + 1) * foldSize$1;
        if (i == numFolds$3 - 1) {
            endTest = package$.MODULE$.max(size$1, endTest);
        }
        ArrayBuffer trainFolds = new ArrayBuffer();
        Object object = startTest > 0 ? trainFolds.$plus$eq((Object)new Tuple2.mcII.sp(0, startTest)) : BoxedUnit.UNIT;
        Object object2 = endTest < size$1 ? trainFolds.$plus$eq((Object)new Tuple2.mcII.sp(endTest, size$1)) : BoxedUnit.UNIT;
        return folds$1.$plus$eq((Object)new DatasetFold((Tuple2<Object, Object>)new Tuple2.mcII.sp(startTest, endTest), (Iterable<Tuple2<Object, Object>>)trainFolds.toList()));
    }

    public static final /* synthetic */ void $anonfun$mkTrainIndices$2(ArrayBuffer indices$1, Tuple2 fold) {
        RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(fold._1$mcI$sp()), fold._2$mcI$sp()).foreach((Function1 & Serializable & scala.Serializable)i -> indices$1.$plus$eq((Object)BoxesRunTime.boxToInteger((int)BoxesRunTime.unboxToInt((Object)i))));
    }

    public static final /* synthetic */ void $anonfun$svmScaleDatum$1(Counter features$1, ScaleRange ranges$1, double lower$1, double upper$1, Counter scaledFeatures$1, Object f) {
        double v = features$1.getCount(f);
        double min = 0.0;
        double max = 0.0;
        if (ranges$1.contains(f)) {
            min = ranges$1.min(f);
            max = ranges$1.max(f);
        }
        scaledFeatures$1.setCount(f, MODULE$.scale(v, min, max, lower$1, upper$1));
    }

    public static final /* synthetic */ void $anonfun$svmScaleFeatureTraversable$1(ScaleRange ranges$2, Tuple2 x0$1) {
        Tuple2 tuple2 = x0$1;
        if (tuple2 == null) {
            throw new MatchError((Object)tuple2);
        }
        Object f = tuple2._1();
        double v = tuple2._2$mcD$sp();
        ranges$2.update(f, v);
        BoxedUnit boxedUnit = BoxedUnit.UNIT;
    }

    public static final /* synthetic */ double $anonfun$svmScaleFeatureTraversable$2(double lower$3, double upper$3, ScaleRange ranges$2, Tuple2 x0$2) {
        Tuple2 tuple2 = x0$2;
        if (tuple2 == null) {
            throw new MatchError((Object)tuple2);
        }
        Object f = tuple2._1();
        double v = tuple2._2$mcD$sp();
        double d = MODULE$.scale(v, ranges$2.min(f), ranges$2.max(f), lower$3, upper$3);
        return d;
    }

    public static final /* synthetic */ boolean $anonfun$incrementalFeatureSelection$1(HashSet chosenGroups$1, String x$1) {
        return !chosenGroups$1.contains((Object)x$1);
    }

    public static final /* synthetic */ Object $anonfun$keepMoreFrequent$1(Counter features$4, double threshold$1, HashSet s$1, int f) {
        return features$4.getCount(BoxesRunTime.boxToInteger((int)f)) > threshold$1 ? s$1.$plus$eq((Object)BoxesRunTime.boxToInteger((int)f)) : BoxedUnit.UNIT;
    }

    public static final /* synthetic */ ListBuffer $anonfun$crossValidate$2(Dataset dataset$5, ListBuffer output$1, Classifier classifier$1, int i) {
        Object sys = classifier$1.classOf(dataset$5.mkDatum(i));
        int gold = BoxesRunTime.unboxToInt((Object)dataset$5.labels().apply(i));
        return output$1.$plus$eq((Object)new Tuple2(dataset$5.labelLexicon().get(gold), sys));
    }

    public static final /* synthetic */ void $anonfun$crossValidate$1(Dataset dataset$5, Function0 classifierFactory$3, ListBuffer output$1, DatasetFold fold) {
        Classifier classifier = (Classifier)classifierFactory$3.apply();
        classifier.train(dataset$5, (Option<Iterable<Tuple2<Object, Object>>>)new Some(fold.trainFolds()));
        RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(fold.testFold()._1$mcI$sp()), fold.testFold()._2$mcI$sp()).foreach((Function1 & Serializable & scala.Serializable)i -> Datasets$.$anonfun$crossValidate$2(dataset$5, output$1, classifier, BoxesRunTime.unboxToInt((Object)i)));
    }

    public static final /* synthetic */ Object $anonfun$sortFeaturesByInformativeness$3(int minFreq$1, Counter featCounts$2, HashSet frequentFeatures$1, int f) {
        return featCounts$2.getCount(BoxesRunTime.boxToInteger((int)f)) > (double)minFreq$1 ? frequentFeatures$1.$plus$eq((Object)BoxesRunTime.boxToInteger((int)f)) : BoxedUnit.UNIT;
    }

    public static final /* synthetic */ Object $anonfun$sortFeaturesByInformativeness$6(Counter rowsWithoutTerm$1, HashMap labelsWithoutTerm$1, int l$1, Counter fs$1, int nf) {
        Object object;
        if (!fs$1.keySet().contains((Object)BoxesRunTime.boxToInteger((int)nf))) {
            rowsWithoutTerm$1.incrementCount(BoxesRunTime.boxToInteger((int)nf), rowsWithoutTerm$1.incrementCount$default$2());
            Object object2 = !labelsWithoutTerm$1.contains((Object)BoxesRunTime.boxToInteger((int)nf)) ? labelsWithoutTerm$1.put((Object)BoxesRunTime.boxToInteger((int)nf), new Counter()) : BoxedUnit.UNIT;
            Counter qual$2 = (Counter)labelsWithoutTerm$1.apply((Object)BoxesRunTime.boxToInteger((int)nf));
            int x$7 = l$1;
            double x$8 = qual$2.incrementCount$default$2();
            object = BoxesRunTime.boxToDouble((double)qual$2.incrementCount(BoxesRunTime.boxToInteger((int)x$7), x$8));
        } else {
            object = BoxedUnit.UNIT;
        }
        return object;
    }

    private Datasets$() {
        MODULE$ = this;
        this.logger = LoggerFactory.getLogger(Datasets.class);
    }
}

