package org.clulab.learning;

import org.clulab.struct.Counter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Array$;
import scala.Double$;
import scala.Function0;
import scala.Function1;
import scala.Option;
import scala.Predef$;
import scala.StringContext;
import scala.Tuple2;
import scala.collection.Iterable;
import scala.collection.TraversableOnce;
import scala.collection.immutable.Map;
import scala.collection.immutable.Set;
import scala.collection.mutable.ArrayBuffer;
import scala.collection.mutable.HashMap;
import scala.collection.mutable.HashSet;
import scala.collection.mutable.ListBuffer;
import scala.collection.mutable.StringBuilder;
import scala.collection.parallel.ForkJoinTaskSupport;
import scala.collection.parallel.ParIterableLike;
import scala.collection.parallel.immutable.ParSet;
import scala.collection.parallel.immutable.ParSet$;
import scala.concurrent.forkjoin.ForkJoinPool;
import scala.math.package$;
import scala.reflect.ClassTag$;
import scala.runtime.BooleanRef;
import scala.runtime.BoxesRunTime;
import scala.runtime.DoubleRef;
import scala.runtime.IntRef;
import scala.runtime.ObjectRef;
import scala.runtime.RichInt$;

/* compiled from: Datasets.scala */
/* loaded from: input_file:org/clulab/learning/Datasets$.class */
public final class Datasets$ {
    public static final Datasets$ MODULE$ = null;
    private final Logger logger;

    static {
        new Datasets$();
    }

    public Logger logger() {
        return this.logger;
    }

    public Iterable<DatasetFold> mkFolds(int i, int i2) {
        ArrayBuffer arrayBuffer = new ArrayBuffer();
        RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), i).foreach(new Datasets$$anonfun$mkFolds$1(i, i2, i2 / i, arrayBuffer));
        return arrayBuffer.toList();
    }

    public <F> int[] mkTrainIndices(int i, Option<Iterable<Tuple2<Object, Object>>> option) {
        ArrayBuffer arrayBuffer = new ArrayBuffer();
        ((Iterable) option.getOrElse(new Datasets$$anonfun$1(i))).foreach(new Datasets$$anonfun$mkTrainIndices$1(arrayBuffer));
        return (int[]) arrayBuffer.toArray(ClassTag$.MODULE$.Int());
    }

    public Iterable<Tuple2<Object, Object>> org$clulab$learning$Datasets$$mkFullFold(int i) {
        return Predef$.MODULE$.wrapRefArray(new Tuple2[]{new Tuple2.mcII.sp(0, i)});
    }

    public <L, F> ScaleRange<F> svmScaleDataset(Dataset<L, F> dataset, double d, double d2) {
        ScaleRange<F> svmScaleBVFDataset;
        if (dataset instanceof RVFDataset) {
            svmScaleBVFDataset = svmScaleRVFDataset((RVFDataset) dataset, d, d2);
        } else {
            if (!(dataset instanceof BVFDataset)) {
                throw new RuntimeException("ERROR: unknown dataset type in svmScale!");
            }
            svmScaleBVFDataset = svmScaleBVFDataset((BVFDataset) dataset, d, d2);
        }
        return svmScaleBVFDataset;
    }

    public <L, F> double svmScaleDataset$default$2() {
        return -1.0d;
    }

    public <L, F> double svmScaleDataset$default$3() {
        return 1.0d;
    }

    public <F> Counter<F> svmScaleDatum(Counter<F> counter, ScaleRange<F> scaleRange, double d, double d2) {
        Predef$.MODULE$.assert(scaleRange != null);
        Predef$.MODULE$.assert(counter != null);
        Counter<F> counter2 = new Counter<>();
        counter.keySet().foreach(new Datasets$$anonfun$svmScaleDatum$1(counter, scaleRange, d, d2, counter2));
        return counter2;
    }

    public <F> double svmScaleDatum$default$3() {
        return -1.0d;
    }

    public <F> double svmScaleDatum$default$4() {
        return 1.0d;
    }

    public <L, F> ScaleRange<F> svmScaleRankingDataset(RankingDataset<F> rankingDataset, double d, double d2) {
        try {
            return svmScaleFeatureTraversable((FeatureTraversable) rankingDataset, d, d2);
        } catch (ClassCastException e) {
            throw new RuntimeException(new StringBuilder().append("Feature traverser not implemented! ").append(e.getMessage()).toString());
        }
    }

    public <L, F> double svmScaleRankingDataset$default$2() {
        return -1.0d;
    }

    public <L, F> double svmScaleRankingDataset$default$3() {
        return 1.0d;
    }

    public <F> ScaleRange<F> svmScaleFeatureTraversable(FeatureTraversable<F, Object> featureTraversable, double d, double d2) {
        ScaleRange<F> scaleRange = new ScaleRange<>();
        featureTraversable.featureUpdater().foreach(new Datasets$$anonfun$svmScaleFeatureTraversable$1(scaleRange));
        featureTraversable.featureUpdater().updateAll(new Datasets$$anonfun$svmScaleFeatureTraversable$2(d, d2, scaleRange));
        return scaleRange;
    }

    public <L, F> ScaleRange<F> svmScaleBVFDataset(BVFDataset<L, F> bVFDataset, double d, double d2) {
        throw new RuntimeException("ERROR: scaling of BVF datasets not implemented yet!");
    }

    public <L, F> ScaleRange<F> svmScaleRVFDataset(RVFDataset<L, F> rVFDataset, double d, double d2) {
        ScaleRange<F> scaleRange = new ScaleRange<>();
        RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), rVFDataset.size()).foreach$mVc$sp(new Datasets$$anonfun$svmScaleRVFDataset$1(rVFDataset, scaleRange));
        RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), rVFDataset.size()).foreach$mVc$sp(new Datasets$$anonfun$svmScaleRVFDataset$2(rVFDataset, d, d2, scaleRange));
        return scaleRange;
    }

    public double org$clulab$learning$Datasets$$scale(double d, double d2, double d3, double d4, double d5) {
        return d2 == d3 ? d5 : d4 + (((d5 - d4) * (d - d2)) / (d3 - d2));
    }

    public <L, F> Set<String> incrementalFeatureSelection(Dataset<L, F> dataset, Function0<Classifier<L, F>> function0, Function1<Iterable<Tuple2<L, L>>, Object> function1, Map<String, Set<Object>> map, int i, int i2) {
        logger().info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Iteration #0: Score using ALL features is ", "."})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToDouble(BoxesRunTime.unboxToDouble(function1.apply(crossValidate(dataset, function0, i))))})));
        logger().info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Iteration #0: Using ", " feature groups and ", " features."})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToInteger(map.size()), BoxesRunTime.boxToInteger(dataset.featureLexicon().size())})));
        HashSet hashSet = new HashSet();
        HashSet hashSet2 = new HashSet();
        HashSet hashSet3 = new HashSet();
        DoubleRef create = DoubleRef.create(Double$.MODULE$.MinValue());
        IntRef create2 = IntRef.create(1);
        boolean z = true;
        while (z) {
            ObjectRef create3 = ObjectRef.create((Object) null);
            ObjectRef create4 = ObjectRef.create((Object) null);
            ParSet par = ((Set) map.keySet().filter(new Datasets$$anonfun$2(hashSet))).toSet().par();
            par.tasksupport_$eq(new ForkJoinTaskSupport(new ForkJoinPool(i2)));
            ((ParIterableLike) par.map(new Datasets$$anonfun$3(dataset, function0, function1, map, i, hashSet3), ParSet$.MODULE$.canBuildFrom())).toList().foreach(new Datasets$$anonfun$incrementalFeatureSelection$1(map, hashSet2, create, create2, create3, create4));
            if (((String) create3.elem) == null) {
                z = false;
                logger().info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Iteration #", ": no better group found. Search complete."})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToInteger(create2.elem)})));
            } else {
                logger().info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Iteration #", ": best group found is [", "] with score ", "."})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToInteger(create2.elem), (String) create3.elem, BoxesRunTime.boxToDouble(create.elem)})));
                hashSet.$plus$eq((String) create3.elem);
                hashSet3.$plus$plus$eq((Set) create4.elem);
                logger().info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Iteration #", ": we now have ", " chosen groups and ", " chosen features."})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToInteger(create2.elem), BoxesRunTime.boxToInteger(hashSet.size()), BoxesRunTime.boxToInteger(hashSet3.size())})));
            }
            create2.elem++;
        }
        logger().info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Iteration #", ": process ended with score ", " using ", " chosen groups and ", " chosen features."})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToInteger(create2.elem), BoxesRunTime.boxToDouble(create.elem), BoxesRunTime.boxToInteger(hashSet.size()), BoxesRunTime.boxToInteger(hashSet3.size())})));
        logger().info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Found ", " better groups: ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToInteger(hashSet2.size()), hashSet2.toSet()})));
        return hashSet.toSet();
    }

    public <L, F> int incrementalFeatureSelection$default$5() {
        return 5;
    }

    public <L, F> int incrementalFeatureSelection$default$6() {
        return 8;
    }

    public <L, F> Set<Object> featureSelectionByInformativeness(Dataset<L, F> dataset, Function0<Classifier<L, F>> function0, Function1<Iterable<Tuple2<L, L>>, Object> function1, int i, int i2, int i3) {
        logger().info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Score using ALL features is ", "."})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToDouble(BoxesRunTime.unboxToDouble(function1.apply(crossValidate(dataset, function0, i2))))})));
        Tuple2[] tuple2Arr = (Tuple2[]) sortFeaturesByInformativeness(dataset, i).sorted().toArray(ClassTag$.MODULE$.apply(Tuple2.class));
        logger().debug("Top 20 most informative features:");
        RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), package$.MODULE$.min(20, Predef$.MODULE$.refArrayOps(tuple2Arr).size())).foreach$mVc$sp(new Datasets$$anonfun$featureSelectionByInformativeness$1(dataset, tuple2Arr));
        double MinValue = Double$.MODULE$.MinValue();
        int i4 = 0;
        int min = package$.MODULE$.min(i3, Predef$.MODULE$.refArrayOps(tuple2Arr).size());
        boolean z = true;
        while (min <= Predef$.MODULE$.refArrayOps(tuple2Arr).size() && z) {
            double unboxToDouble = BoxesRunTime.unboxToDouble(function1.apply(crossValidate(dataset.keepOnly(Predef$.MODULE$.intArrayOps((int[]) Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps(tuple2Arr).slice(0, min)).map(new Datasets$$anonfun$4(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.Int()))).toSet()), function0, i2)));
            if (unboxToDouble > MinValue) {
                MinValue = unboxToDouble;
                i4 = min;
                logger().debug(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Found better cut at ", " with score ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToInteger(i4), BoxesRunTime.boxToDouble(MinValue)})));
            } else {
                z = false;
            }
            min = package$.MODULE$.min(Predef$.MODULE$.refArrayOps(tuple2Arr).size(), min + i3);
        }
        logger().info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Cutting features at ", " out of ", "."})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToInteger(i4), BoxesRunTime.boxToInteger(Predef$.MODULE$.refArrayOps(tuple2Arr).size())})));
        return Predef$.MODULE$.intArrayOps((int[]) Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps(tuple2Arr).slice(0, i4)).map(new Datasets$$anonfun$featureSelectionByInformativeness$2(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.Int()))).toSet();
    }

    public <L, F> int featureSelectionByInformativeness$default$4() {
        return 10;
    }

    public <L, F> int featureSelectionByInformativeness$default$5() {
        return 5;
    }

    public <L, F> int featureSelectionByInformativeness$default$6() {
        return 1000;
    }

    public <L, F> Set<Object> featureSelectionByFrequency(Dataset<L, F> dataset, Function0<Classifier<L, F>> function0, Function1<Iterable<Tuple2<L, L>>, Object> function1, int i) {
        double unboxToDouble = BoxesRunTime.unboxToDouble(function1.apply(crossValidate(dataset, function0, i)));
        logger().info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Score using ALL features is ", "."})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToDouble(unboxToDouble)})));
        Counter<Object> sortFeaturesByFrequency = sortFeaturesByFrequency(dataset);
        DoubleRef create = DoubleRef.create(unboxToDouble);
        IntRef create2 = IntRef.create(0);
        BooleanRef create3 = BooleanRef.create(true);
        RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(1), 100).withFilter(new Datasets$$anonfun$featureSelectionByFrequency$1(create3)).foreach(new Datasets$$anonfun$featureSelectionByFrequency$2(dataset, function0, function1, i, sortFeaturesByFrequency, create, create2, create3));
        logger().info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Cutting features at ", " out of ", "."})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToInteger(create2.elem), BoxesRunTime.boxToInteger(sortFeaturesByFrequency.size())})));
        return keepMoreFrequent(sortFeaturesByFrequency, create2.elem);
    }

    public <L, F> int featureSelectionByFrequency$default$4() {
        return 5;
    }

    public Set<Object> keepMoreFrequent(Counter<Object> counter, double d) {
        HashSet hashSet = new HashSet();
        counter.keySet().foreach(new Datasets$$anonfun$keepMoreFrequent$1(counter, d, hashSet));
        return hashSet.toSet();
    }

    public <L, F> Tuple2<String, Object> scoreGroup(String str, Map<String, Set<Object>> map, HashSet<Object> hashSet, Dataset<L, F> dataset, Function0<Classifier<L, F>> function0, int i, Function1<Iterable<Tuple2<L, L>>, Object> function1) {
        HashSet<Object> hashSet2 = new HashSet<>();
        hashSet2.$plus$plus$eq(hashSet);
        hashSet2.$plus$plus$eq((TraversableOnce) map.get(str).get());
        return new Tuple2<>(str, BoxesRunTime.boxToDouble(scoreFeatures(dataset, hashSet2, function0, i, function1)));
    }

    public <L, F> double scoreFeatures(Dataset<L, F> dataset, HashSet<Object> hashSet, Function0<Classifier<L, F>> function0, int i, Function1<Iterable<Tuple2<L, L>>, Object> function1) {
        return BoxesRunTime.unboxToDouble(function1.apply(crossValidate(dataset.keepOnly(hashSet.toSet()), function0, i)));
    }

    public <L, F> Iterable<Tuple2<L, L>> crossValidate(Dataset<L, F> dataset, Function0<Classifier<L, F>> function0, int i) {
        Iterable<DatasetFold> mkFolds = mkFolds(i, dataset.size());
        ListBuffer listBuffer = new ListBuffer();
        mkFolds.foreach(new Datasets$$anonfun$crossValidate$1(dataset, function0, listBuffer));
        return listBuffer.toList();
    }

    public <L, F> int crossValidate$default$3() {
        return 5;
    }

    public <L, F> Counter<Object> sortFeaturesByFrequency(Dataset<L, F> dataset) {
        Counter<Object> counter = new Counter<>();
        RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), dataset.size()).foreach$mVc$sp(new Datasets$$anonfun$sortFeaturesByFrequency$1(dataset, counter));
        return counter;
    }

    public <L, F> Counter<Object> sortFeaturesByInformativeness(Dataset<L, F> dataset, int i) {
        Counter counter = new Counter();
        RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), dataset.size()).foreach$mVc$sp(new Datasets$$anonfun$sortFeaturesByInformativeness$1(dataset, counter));
        HashSet hashSet = new HashSet();
        counter.keySet().foreach(new Datasets$$anonfun$sortFeaturesByInformativeness$4(i, counter, hashSet));
        logger().info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Using ", " out of ", " features with count > ", "."})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToInteger(hashSet.size()), BoxesRunTime.boxToInteger(dataset.featureLexicon().size()), BoxesRunTime.boxToInteger(i)})));
        Counter counter2 = new Counter();
        Counter counter3 = new Counter();
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), dataset.size()).foreach$mVc$sp(new Datasets$$anonfun$sortFeaturesByInformativeness$2(dataset, hashSet, counter2, counter3, hashMap, hashMap2));
        Counter<Object> counter4 = new Counter<>();
        hashSet.foreach(new Datasets$$anonfun$sortFeaturesByInformativeness$3(dataset, hashSet, counter2, counter3, hashMap, hashMap2, counter4, IntRef.create(0)));
        return counter4;
    }

    public <L, F> double informationGain(double d, double d2, Counter<Object> counter, Counter<Object> counter2, int i, int i2) {
        DoubleRef create = DoubleRef.create(0.0d);
        DoubleRef create2 = DoubleRef.create(0.0d);
        RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), i2).foreach$mVc$sp(new Datasets$$anonfun$informationGain$1(d, d2, counter, counter2, create, create2));
        return ((create.elem * d) / i) + ((create2.elem * d2) / i);
    }

    private Datasets$() {
        MODULE$ = this;
        this.logger = LoggerFactory.getLogger(Datasets.class);
    }
}
