package org.clulab.learning;

import org.clulab.struct.Counter;
import org.clulab.struct.Lexicon;
import scala.$less$colon$less$;
import scala.MatchError;
import scala.Predef$;
import scala.Predef$ArrowAssoc$;
import scala.Tuple2;
import scala.collection.ArrayOps$;
import scala.collection.Iterable;
import scala.collection.IterableOnceOps;
import scala.collection.immutable.Map;
import scala.collection.immutable.Set;
import scala.collection.mutable.ArrayBuffer;
import scala.collection.mutable.HashMap;
import scala.collection.mutable.HashSet;
import scala.collection.mutable.ListBuffer;
import scala.math.Ordering$DeprecatedDoubleOrdering$;
import scala.math.Ordering$Int$;
import scala.reflect.ClassTag;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.IntRef;
import scala.runtime.RichDouble$;
import scala.runtime.RichInt$;

/* compiled from: Dataset.scala */
@ScalaSignature(bytes = "\u0006\u0005\u0005%e\u0001\u0002\u000b\u0016\u0001qA\u0011b\r\u0001\u0003\u0002\u0003\u0006I\u0001\u000e\u001e\t\u0013q\u0002!\u0011!Q\u0001\nur\u0004\"\u0003!\u0001\u0005\u0003\u0005\u000b\u0011B!M\u0011!q\u0005A!b\u0001\n\u0003y\u0005\u0002\u0003+\u0001\u0005\u0003\u0005\u000b\u0011\u0002)\t\u0011U\u0003!1!Q\u0001\fYCQ\u0001\u0018\u0001\u0005\u0002uCQ\u0001\u0018\u0001\u0005\u0002\u0015DQ!\u001b\u0001\u0005\u0002)DQa\u001d\u0001\u0005\nQDq!a\u0002\u0001\t\u0003\nI\u0001C\u0004\u0002\u0010\u0001!\t%!\u0005\t\u000f\u0005u\u0001\u0001\"\u0001\u0002 !9\u0011q\u0007\u0001\u0005B\u0005e\u0002bBA#\u0001\u0011\u0005\u0011q\t\u0005\b\u0003?\u0002A\u0011IA1\u0011\u001d\t)\u0007\u0001C!\u0003OBq!!\u001c\u0001\t\u0003\ty\u0007C\u0004\u0002��\u0001!\t%!!\u0003\u0015\t3f\tR1uCN,GO\u0003\u0002\u0017/\u0005AA.Z1s]&twM\u0003\u0002\u00193\u000511\r\\;mC\nT\u0011AG\u0001\u0004_J<7\u0001A\u000b\u0004;\u0011\n4C\u0001\u0001\u001f!\u0011y\u0002E\t\u0019\u000e\u0003UI!!I\u000b\u0003\u000f\u0011\u000bG/Y:fiB\u00111\u0005\n\u0007\u0001\t\u0015)\u0003A1\u0001'\u0005\u0005a\u0015CA\u0014.!\tA3&D\u0001*\u0015\u0005Q\u0013!B:dC2\f\u0017B\u0001\u0017*\u0005\u001dqu\u000e\u001e5j]\u001e\u0004\"\u0001\u000b\u0018\n\u0005=J#aA!osB\u00111%\r\u0003\u0006e\u0001\u0011\rA\n\u0002\u0002\r\u0006\u0011A\u000e\u001c\t\u0004ka\u0012S\"\u0001\u001c\u000b\u0005]:\u0012AB:ueV\u001cG/\u0003\u0002:m\t9A*\u001a=jG>t\u0017BA\u001e!\u00031a\u0017MY3m\u0019\u0016D\u0018nY8o\u0003\t1G\u000eE\u00026qAJ!a\u0010\u0011\u0002\u001d\u0019,\u0017\r^;sK2+\u00070[2p]\u0006\u0011An\u001d\t\u0004\u0005\u001eKU\"A\"\u000b\u0005\u0011+\u0015aB7vi\u0006\u0014G.\u001a\u0006\u0003\r&\n!bY8mY\u0016\u001cG/[8o\u0013\tA5IA\u0006BeJ\f\u0017PQ;gM\u0016\u0014\bC\u0001\u0015K\u0013\tY\u0015FA\u0002J]RL!!\u0014\u0011\u0002\r1\f'-\u001a7t\u0003!1W-\u0019;ve\u0016\u001cX#\u0001)\u0011\u0007\t;\u0015\u000bE\u0002)%&K!aU\u0015\u0003\u000b\u0005\u0013(/Y=\u0002\u0013\u0019,\u0017\r^;sKN\u0004\u0013AC3wS\u0012,gnY3%cA\u0019qK\u0017\u0019\u000e\u0003aS!!W\u0015\u0002\u000fI,g\r\\3di&\u00111\f\u0017\u0002\t\u00072\f7o\u001d+bO\u00061A(\u001b8jiz\"RAX1cG\u0012$\"a\u00181\u0011\t}\u0001!\u0005\r\u0005\u0006+\u001e\u0001\u001dA\u0016\u0005\u0006g\u001d\u0001\r\u0001\u000e\u0005\u0006y\u001d\u0001\r!\u0010\u0005\u0006\u0001\u001e\u0001\r!\u0011\u0005\u0006\u001d\u001e\u0001\r\u0001\u0015\u000b\u0002MR\u0011ql\u001a\u0005\bQ\"\t\t\u0011q\u0001W\u0003))g/\u001b3f]\u000e,GEM\u0001\tIAdWo\u001d\u0013fcR\u00111N\u001c\t\u0003Q1L!!\\\u0015\u0003\tUs\u0017\u000e\u001e\u0005\u0006_&\u0001\r\u0001]\u0001\u0006I\u0006$X/\u001c\t\u0005?E\u0014\u0003'\u0003\u0002s+\t)A)\u0019;v[\u0006ya-Z1ukJ,7\u000fV8BeJ\f\u0017\u0010\u0006\u0002Rk\")aO\u0003a\u0001o\u0006\u0011am\u001d\t\u0005q\u0006\u0005\u0001G\u0004\u0002z}:\u0011!0`\u0007\u0002w*\u0011ApG\u0001\u0007yI|w\u000e\u001e \n\u0003)J!a`\u0015\u0002\u000fA\f7m[1hK&!\u00111AA\u0003\u0005!IE/\u001a:bE2,'BA@*\u0003\u001di7\u000eR1uk6$2\u0001]A\u0006\u0011\u0019\tia\u0003a\u0001\u0013\u0006\u0019!o\\<\u0002\u001f\u0019,\u0017\r^;sKN\u001cu.\u001e8uKJ$B!a\u0005\u0002\u001aA!Q'!\u0006J\u0013\r\t9B\u000e\u0002\b\u0007>,h\u000e^3s\u0011\u0019\tY\u0002\u0004a\u0001\u0013\u0006YA-\u0019;v[>3gm]3u\u00035\u0019w.\u001e8u\r\u0016\fG/\u001e:fgR1\u0011\u0011EA\u0019\u0003g\u0001R!a\t\u0002,%sA!!\n\u0002(A\u0011!0K\u0005\u0004\u0003SI\u0013A\u0002)sK\u0012,g-\u0003\u0003\u0002.\u0005=\"aA*fi*\u0019\u0011\u0011F\u0015\t\u000bYl\u0001\u0019\u0001)\t\r\u0005UR\u00021\u0001J\u0003%!\bN]3tQ>dG-A\u0010sK6|g/\u001a$fCR,(/Z:Cs&sgm\u001c:nCRLwN\\$bS:$2AHA\u001e\u0011\u001d\tiD\u0004a\u0001\u0003\u007f\t\u0011\u0002]2u)>\\U-\u001a9\u0011\u0007!\n\t%C\u0002\u0002D%\u0012a\u0001R8vE2,\u0017aF2p[B,H/Z%oM>\u0014X.\u0019;j_:<\u0015-\u001b8t)\u0019\tI%a\u0017\u0002^A9\u0001&a\u0013\u0002P\u0005U\u0013bAA'S\t1A+\u001e9mKJ\u00022aHA)\u0013\r\t\u0019&\u0006\u0002\u0010\u0013:4wN]7bi&|gnR1j]B9\u00111EA,\u0013\u0006=\u0013\u0002BA-\u0003_\u00111!T1q\u0011\u00151x\u00021\u0001Q\u0011\u0015\u0001u\u00021\u0001B\u0003e\u0011X-\\8wK\u001a+\u0017\r^;sKN\u0014\u0015P\u0012:fcV,gnY=\u0015\u0007y\t\u0019\u0007\u0003\u0004\u00026A\u0001\r!S\u0001\tW\u0016,\u0007o\u00148msR\u0019a$!\u001b\t\u000f\u0005-\u0014\u00031\u0001\u0002\"\u0005qa-Z1ukJ,7\u000fV8LK\u0016\u0004\u0018aC6fKB|e\u000e\\=S_^$R!UA9\u0003kBa!a\u001d\u0013\u0001\u0004\t\u0016!\u00024fCR\u001c\bbBA<%\u0001\u0007\u0011\u0011P\u0001\u0010M\u0016\fG/\u001e:f\u0013:$W\r_'baB)!)a\u001fJ\u0013&\u0019\u0011QP\"\u0003\u000f!\u000b7\u000f['ba\u0006\u0001Bo\\\"pk:$XM\u001d#bi\u0006\u001cX\r^\u000b\u0003\u0003\u0007\u0003RaHACEAJ1!a\"\u0016\u00059\u0019u.\u001e8uKJ$\u0015\r^1tKR\u0004")
/* loaded from: input_file:org/clulab/learning/BVFDataset.class */
public class BVFDataset<L, F> extends Dataset<L, F> {
    private final ArrayBuffer<int[]> features;
    private final ClassTag<F> evidence$1;

    public ArrayBuffer<int[]> features() {
        return this.features;
    }

    @Override // org.clulab.learning.Dataset
    public void $plus$eq(Datum<L, F> datum) {
        if (!(datum instanceof BVFDatum)) {
            throw new RuntimeException("ERROR: you cannot add a non BVFDatum to a BVFDataset!");
        }
        BVFDatum bVFDatum = (BVFDatum) datum;
        labels().$plus$eq(BoxesRunTime.boxToInteger(labelLexicon().add(bVFDatum.label())));
        features().$plus$eq(featuresToArray(bVFDatum.mo120features()));
        BoxedUnit boxedUnit = BoxedUnit.UNIT;
    }

    private int[] featuresToArray(Iterable<F> iterable) {
        ListBuffer listBuffer = new ListBuffer();
        iterable.foreach(obj -> {
            return listBuffer.$plus$eq(BoxesRunTime.boxToInteger(this.featureLexicon().add(obj)));
        });
        return (int[]) ((IterableOnceOps) listBuffer.toList().sorted(Ordering$Int$.MODULE$)).toArray(ClassTag$.MODULE$.Int());
    }

    @Override // org.clulab.learning.Dataset
    public Datum<L, F> mkDatum(int i) {
        return new BVFDatum(labelLexicon().get(BoxesRunTime.unboxToInt(labels().apply(i))), Predef$.MODULE$.genericWrapArray(ArrayOps$.MODULE$.map$extension(Predef$.MODULE$.intArrayOps((int[]) features().apply(i)), obj -> {
            return $anonfun$mkDatum$1(this, BoxesRunTime.unboxToInt(obj));
        }, this.evidence$1)));
    }

    @Override // org.clulab.learning.Dataset
    public Counter<Object> featuresCounter(int i) {
        Counter<Object> counter = new Counter<>();
        ArrayOps$.MODULE$.foreach$extension(Predef$.MODULE$.intArrayOps((int[]) features().apply(i)), i2 -> {
            return counter.incrementCount(BoxesRunTime.boxToInteger(i2), counter.incrementCount$default$2());
        });
        return counter;
    }

    public Set<Object> countFeatures(ArrayBuffer<int[]> arrayBuffer, int i) {
        Counter counter = new Counter();
        arrayBuffer.foreach(iArr -> {
            $anonfun$countFeatures$1(counter, iArr);
            return BoxedUnit.UNIT;
        });
        RVFDataset$.MODULE$.logger().debug(new StringBuilder(40).append("Total unique features before filtering: ").append(counter.size()).toString());
        HashSet hashSet = new HashSet();
        counter.keySet().foreach(obj -> {
            return $anonfun$countFeatures$3(counter, i, hashSet, BoxesRunTime.unboxToInt(obj));
        });
        RVFDataset$.MODULE$.logger().debug(new StringBuilder(55).append("Total unique features after filtering with threshold ").append(i).append(": ").append(hashSet.size()).toString());
        return hashSet.toSet();
    }

    @Override // org.clulab.learning.Dataset
    public Dataset<L, F> removeFeaturesByInformationGain(double d) {
        RVFDataset$.MODULE$.logger().debug("Computing information gain for all features in dataset...");
        Tuple2<InformationGain, Map<Object, InformationGain>> computeInformationGains = computeInformationGains(features(), labels());
        if (computeInformationGains == null) {
            throw new MatchError(computeInformationGains);
        }
        Tuple2 tuple2 = new Tuple2((InformationGain) computeInformationGains._1(), (Map) computeInformationGains._2());
        InformationGain informationGain = (InformationGain) tuple2._1();
        Map map = (Map) tuple2._2();
        RVFDataset$.MODULE$.logger().debug(new StringBuilder(40).append("Total unique features before filtering: ").append(map.size()).toString());
        ListBuffer listBuffer = new ListBuffer();
        map.keySet().foreach(obj -> {
            return $anonfun$removeFeaturesByInformationGain$1(listBuffer, map, informationGain, BoxesRunTime.unboxToInt(obj));
        });
        Tuple2[] tuple2Arr = (Tuple2[]) ((IterableOnceOps) listBuffer.sortBy(tuple22 -> {
            return BoxesRunTime.boxToDouble($anonfun$removeFeaturesByInformationGain$2(tuple22));
        }, Ordering$DeprecatedDoubleOrdering$.MODULE$)).toArray(ClassTag$.MODULE$.apply(Tuple2.class));
        int ceil$extension = (int) RichDouble$.MODULE$.ceil$extension(Predef$.MODULE$.doubleWrapper(d * tuple2Arr.length));
        Predef$.MODULE$.assert(ceil$extension > 0 && ceil$extension <= tuple2Arr.length);
        RVFDataset$.MODULE$.logger().debug(new StringBuilder(42).append("Will keep ").append(ceil$extension).append(" features after filtering by IG.").toString());
        HashSet hashSet = new HashSet();
        RichInt$.MODULE$.until$extension(Predef$.MODULE$.intWrapper(0), ceil$extension).foreach(obj2 -> {
            return $anonfun$removeFeaturesByInformationGain$3(hashSet, tuple2Arr, BoxesRunTime.unboxToInt(obj2));
        });
        return keepOnly(hashSet.toSet());
    }

    public Tuple2<InformationGain, Map<Object, InformationGain>> computeInformationGains(ArrayBuffer<int[]> arrayBuffer, ArrayBuffer<Object> arrayBuffer2) {
        HashMap hashMap = new HashMap();
        InformationGain informationGain = new InformationGain(InformationGain$.MODULE$.$lessinit$greater$default$1(), InformationGain$.MODULE$.$lessinit$greater$default$2());
        arrayBuffer.indices().foreach$mVc$sp(i -> {
            int[] iArr = (int[]) arrayBuffer.apply(i);
            int unboxToInt = BoxesRunTime.unboxToInt(arrayBuffer2.apply(i));
            informationGain.datumCount_$eq(informationGain.datumCount() + 1);
            informationGain.datumsByClass().incrementCount(BoxesRunTime.boxToInteger(unboxToInt), informationGain.datumsByClass().incrementCount$default$2());
            ArrayOps$.MODULE$.foreach$extension(Predef$.MODULE$.intArrayOps(iArr), i -> {
                InformationGain informationGain2 = (InformationGain) hashMap.getOrElseUpdate(BoxesRunTime.boxToInteger(i), () -> {
                    return new InformationGain(InformationGain$.MODULE$.$lessinit$greater$default$1(), InformationGain$.MODULE$.$lessinit$greater$default$2());
                });
                informationGain2.datumCount_$eq(informationGain2.datumCount() + 1);
                return informationGain2.datumsByClass().incrementCount(BoxesRunTime.boxToInteger(unboxToInt), informationGain2.datumsByClass().incrementCount$default$2());
            });
        });
        return new Tuple2<>(informationGain, hashMap.toMap($less$colon$less$.MODULE$.refl()));
    }

    @Override // org.clulab.learning.Dataset
    public Dataset<L, F> removeFeaturesByFrequency(int i) {
        return keepOnly(countFeatures(features(), i));
    }

    @Override // org.clulab.learning.Dataset
    public Dataset<L, F> keepOnly(Set<Object> set) {
        HashMap hashMap = new HashMap();
        IntRef create = IntRef.create(0);
        RichInt$.MODULE$.until$extension(Predef$.MODULE$.intWrapper(0), featureLexicon().size()).foreach$mVc$sp(i -> {
            if (set.contains(BoxesRunTime.boxToInteger(i))) {
                hashMap.$plus$eq(Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(BoxesRunTime.boxToInteger(i)), BoxesRunTime.boxToInteger(create.elem)));
                create.elem++;
            }
        });
        ArrayBuffer arrayBuffer = new ArrayBuffer();
        features().indices().foreach(obj -> {
            return $anonfun$keepOnly$2(this, hashMap, arrayBuffer, BoxesRunTime.unboxToInt(obj));
        });
        return new BVFDataset(labelLexicon(), featureLexicon().mapIndicesTo(hashMap.toMap($less$colon$less$.MODULE$.refl())), labels(), arrayBuffer, this.evidence$1);
    }

    public int[] keepOnlyRow(int[] iArr, HashMap<Object, Object> hashMap) {
        ArrayBuffer arrayBuffer = new ArrayBuffer();
        ArrayOps$.MODULE$.indices$extension(Predef$.MODULE$.intArrayOps(iArr)).foreach(obj -> {
            return $anonfun$keepOnlyRow$1(iArr, hashMap, arrayBuffer, BoxesRunTime.unboxToInt(obj));
        });
        return (int[]) arrayBuffer.toArray(ClassTag$.MODULE$.Int());
    }

    @Override // org.clulab.learning.Dataset
    public CounterDataset<L, F> toCounterDataset() {
        ArrayBuffer arrayBuffer = new ArrayBuffer();
        features().indices().foreach(obj -> {
            return $anonfun$toCounterDataset$1(this, arrayBuffer, BoxesRunTime.unboxToInt(obj));
        });
        return new CounterDataset<>(labelLexicon(), featureLexicon(), labels(), arrayBuffer);
    }

    public static final /* synthetic */ Object $anonfun$mkDatum$1(BVFDataset bVFDataset, int i) {
        return bVFDataset.featureLexicon().get(i);
    }

    public static final /* synthetic */ void $anonfun$countFeatures$1(Counter counter, int[] iArr) {
        ArrayOps$.MODULE$.foreach$extension(Predef$.MODULE$.intArrayOps(iArr), i -> {
            return counter.incrementCount(BoxesRunTime.boxToInteger(i), counter.incrementCount$default$2());
        });
    }

    public static final /* synthetic */ Object $anonfun$countFeatures$3(Counter counter, int i, HashSet hashSet, int i2) {
        return counter.getCount(BoxesRunTime.boxToInteger(i2)) >= ((double) i) ? hashSet.$plus$eq(BoxesRunTime.boxToInteger(i2)) : BoxedUnit.UNIT;
    }

    public static final /* synthetic */ ListBuffer $anonfun$removeFeaturesByInformationGain$1(ListBuffer listBuffer, Map map, InformationGain informationGain, int i) {
        return listBuffer.$plus$eq(new Tuple2.mcID.sp(i, ((InformationGain) map.apply(BoxesRunTime.boxToInteger(i))).ig(informationGain)));
    }

    public static final /* synthetic */ double $anonfun$removeFeaturesByInformationGain$2(Tuple2 tuple2) {
        return -tuple2._2$mcD$sp();
    }

    public static final /* synthetic */ HashSet $anonfun$removeFeaturesByInformationGain$3(HashSet hashSet, Tuple2[] tuple2Arr, int i) {
        return hashSet.$plus$eq(BoxesRunTime.boxToInteger(tuple2Arr[i]._1$mcI$sp()));
    }

    public static final /* synthetic */ ArrayBuffer $anonfun$keepOnly$2(BVFDataset bVFDataset, HashMap hashMap, ArrayBuffer arrayBuffer, int i) {
        return arrayBuffer.$plus$eq(bVFDataset.keepOnlyRow((int[]) bVFDataset.features().apply(i), hashMap));
    }

    public static final /* synthetic */ Object $anonfun$keepOnlyRow$1(int[] iArr, HashMap hashMap, ArrayBuffer arrayBuffer, int i) {
        int i2 = iArr[i];
        return hashMap.contains(BoxesRunTime.boxToInteger(i2)) ? arrayBuffer.$plus$eq(hashMap.apply(BoxesRunTime.boxToInteger(i2))) : BoxedUnit.UNIT;
    }

    public static final /* synthetic */ ArrayBuffer $anonfun$toCounterDataset$1(BVFDataset bVFDataset, ArrayBuffer arrayBuffer, int i) {
        Counter counter = new Counter();
        ArrayOps$.MODULE$.indices$extension(Predef$.MODULE$.intArrayOps((int[]) bVFDataset.features().apply(i))).foreach(i2 -> {
            return counter.incrementCount(BoxesRunTime.boxToInteger(((int[]) bVFDataset.features().apply(i))[i2]), counter.incrementCount$default$2());
        });
        return arrayBuffer.$plus$eq(counter);
    }

    /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
    public BVFDataset(Lexicon<L> lexicon, Lexicon<F> lexicon2, ArrayBuffer<Object> arrayBuffer, ArrayBuffer<int[]> arrayBuffer2, ClassTag<F> classTag) {
        super(lexicon, lexicon2, arrayBuffer);
        this.features = arrayBuffer2;
        this.evidence$1 = classTag;
    }

    public BVFDataset(ClassTag<F> classTag) {
        this(new Lexicon(), new Lexicon(), new ArrayBuffer(), new ArrayBuffer(), classTag);
    }
}
