package org.apache.spark.ml.odkl.texts;

import org.apache.spark.ml.feature.CountVectorizer;
import org.apache.spark.ml.feature.CountVectorizerModel;
import org.apache.spark.ml.odkl.texts.OdklCountVectorizerParams;
import org.apache.spark.ml.param.BooleanParam;
import org.apache.spark.ml.param.Param;
import org.apache.spark.ml.util.Identifiable$;
import org.apache.spark.rdd.RDD$;
import org.apache.spark.sql.DataFrame;
import scala.Predef$;
import scala.Tuple2;
import scala.collection.Map;
import scala.collection.Seq;
import scala.collection.mutable.Queue;
import scala.math.Ordering$Long$;
import scala.math.Ordering$String$;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxesRunTime;

/* compiled from: OdklCountVectorizer.scala */
@ScalaSignature(bytes = "\u0006\u0001M3A!\u0001\u0002\u0001\u001f\t\u0019r\nZ6m\u0007>,h\u000e\u001e,fGR|'/\u001b>fe*\u00111\u0001B\u0001\u0006i\u0016DHo\u001d\u0006\u0003\u000b\u0019\tAa\u001c3lY*\u0011q\u0001C\u0001\u0003[2T!!\u0003\u0006\u0002\u000bM\u0004\u0018M]6\u000b\u0005-a\u0011AB1qC\u000eDWMC\u0001\u000e\u0003\ry'oZ\u0002\u0001'\r\u0001\u0001C\u0006\t\u0003#Qi\u0011A\u0005\u0006\u0003'\u0019\tqAZ3biV\u0014X-\u0003\u0002\u0016%\ty1i\\;oiZ+7\r^8sSj,'\u000f\u0005\u0002\u001815\t!!\u0003\u0002\u001a\u0005\tIr\nZ6m\u0007>,h\u000e\u001e,fGR|'/\u001b>feB\u000b'/Y7t\u0011!Y\u0002A!b\u0001\n\u0003b\u0012aA;jIV\tQ\u0004\u0005\u0002\u001fI9\u0011qDI\u0007\u0002A)\t\u0011%A\u0003tG\u0006d\u0017-\u0003\u0002$A\u00051\u0001K]3eK\u001aL!!\n\u0014\u0003\rM#(/\u001b8h\u0015\t\u0019\u0003\u0005C\u0005)\u0001\t\u0005\t\u0015!\u0003\u001eS\u0005!Q/\u001b3!\u0013\tYB\u0003C\u0003,\u0001\u0011\u0005A&\u0001\u0004=S:LGO\u0010\u000b\u0003[9\u0002\"a\u0006\u0001\t\u000bmQ\u0003\u0019A\u000f\t\u000b-\u0002A\u0011\u0001\u0019\u0015\u00035BqA\r\u0001C\u0002\u0013\u00051'A\nj]\",'/\u001b;fIZ{7-\u00192vY\u0006\u0014\u00180F\u00015!\r)\u0004HO\u0007\u0002m)\u0011qGB\u0001\u0006a\u0006\u0014\u0018-\\\u0005\u0003sY\u0012Q\u0001U1sC6\u0004Ba\u000f \u001e\u00016\tAH\u0003\u0002>A\u0005Q1m\u001c7mK\u000e$\u0018n\u001c8\n\u0005}b$aA'baB\u0011q$Q\u0005\u0003\u0005\u0002\u00121!\u00138u\u0011\u0019!\u0005\u0001)A\u0005i\u0005!\u0012N\u001c5fe&$X\r\u001a,pG\u0006\u0014W\u000f\\1ss\u0002BQA\u0012\u0001\u0005B\u001d\u000b1AZ5u)\tA5\n\u0005\u0002\u0012\u0013&\u0011!J\u0005\u0002\u0015\u0007>,h\u000e\u001e,fGR|'/\u001b>fe6{G-\u001a7\t\u000b1+\u0005\u0019A'\u0002\u000f\u0011\fG/Y:fiB\u0011a*U\u0007\u0002\u001f*\u0011\u0001\u000bC\u0001\u0004gFd\u0017B\u0001*P\u0005%!\u0015\r^1Ge\u0006lW\r")
/* loaded from: input_file:org/apache/spark/ml/odkl/texts/OdklCountVectorizer.class */
public class OdklCountVectorizer extends CountVectorizer implements OdklCountVectorizerParams {
    private final Param<Map<String, Object>> inheritedVocabulary;
    private final Param<String> vocabAttrGroupName;
    private final BooleanParam storeVocabInMetadata;

    @Override // org.apache.spark.ml.odkl.texts.OdklCountVectorizerParams
    public Param<String> vocabAttrGroupName() {
        return this.vocabAttrGroupName;
    }

    @Override // org.apache.spark.ml.odkl.texts.OdklCountVectorizerParams
    public BooleanParam storeVocabInMetadata() {
        return this.storeVocabInMetadata;
    }

    @Override // org.apache.spark.ml.odkl.texts.OdklCountVectorizerParams
    public void org$apache$spark$ml$odkl$texts$OdklCountVectorizerParams$_setter_$vocabAttrGroupName_$eq(Param param) {
        this.vocabAttrGroupName = param;
    }

    @Override // org.apache.spark.ml.odkl.texts.OdklCountVectorizerParams
    public void org$apache$spark$ml$odkl$texts$OdklCountVectorizerParams$_setter_$storeVocabInMetadata_$eq(BooleanParam booleanParam) {
        this.storeVocabInMetadata = booleanParam;
    }

    @Override // org.apache.spark.ml.odkl.texts.OdklCountVectorizerParams
    public String getVocabAttrGroupName() {
        return OdklCountVectorizerParams.Cclass.getVocabAttrGroupName(this);
    }

    @Override // org.apache.spark.ml.odkl.texts.OdklCountVectorizerParams
    public OdklCountVectorizerParams setVocabAttrGroupName(String str) {
        return OdklCountVectorizerParams.Cclass.setVocabAttrGroupName(this, str);
    }

    @Override // org.apache.spark.ml.odkl.texts.OdklCountVectorizerParams
    public boolean getStoreVocabInMetadata() {
        return OdklCountVectorizerParams.Cclass.getStoreVocabInMetadata(this);
    }

    @Override // org.apache.spark.ml.odkl.texts.OdklCountVectorizerParams
    public OdklCountVectorizerParams setStoreVocabInMetadata(boolean z) {
        return OdklCountVectorizerParams.Cclass.setStoreVocabInMetadata(this, z);
    }

    public String uid() {
        return super.uid();
    }

    public Param<Map<String, Object>> inheritedVocabulary() {
        return this.inheritedVocabulary;
    }

    /* renamed from: fit, reason: merged with bridge method [inline-methods] */
    public CountVectorizerModel m555fit(DataFrame dataFrame) {
        String[] strArr;
        transformSchema(dataFrame.schema(), true);
        String[] strArr2 = (String[]) RDD$.MODULE$.rddToPairRDDFunctions(dataFrame.select((String) $(inputCol()), Predef$.MODULE$.wrapRefArray(new String[0])).map(new OdklCountVectorizer$$anonfun$2(this), ClassTag$.MODULE$.apply(Seq.class)).flatMap(new OdklCountVectorizer$$anonfun$3(this), ClassTag$.MODULE$.apply(Tuple2.class)), ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(Tuple2.class), Ordering$String$.MODULE$).reduceByKey(new OdklCountVectorizer$$anonfun$4(this)).filter(new OdklCountVectorizer$$anonfun$5(this, BoxesRunTime.unboxToDouble($(minDF())) >= 1.0d ? BoxesRunTime.unboxToDouble($(minDF())) : BoxesRunTime.unboxToDouble($(minDF())) * r0.count())).map(new OdklCountVectorizer$$anonfun$6(this), ClassTag$.MODULE$.apply(Tuple2.class)).sortBy(new OdklCountVectorizer$$anonfun$7(this), false, 1, Ordering$Long$.MODULE$, ClassTag$.MODULE$.Long()).map(new OdklCountVectorizer$$anonfun$8(this), ClassTag$.MODULE$.apply(String.class)).take(BoxesRunTime.unboxToInt($(vocabSize())));
        Predef$.MODULE$.require(strArr2.length > 0, new OdklCountVectorizer$$anonfun$fit$1(this));
        if (isDefined(inheritedVocabulary())) {
            String[] strArr3 = new String[strArr2.length];
            Map map = (Map) $(inheritedVocabulary());
            Queue queue = new Queue();
            Predef$.MODULE$.refArrayOps(strArr2).foreach(new OdklCountVectorizer$$anonfun$9(this, strArr3, map, queue));
            Predef$.MODULE$.refArrayOps(strArr3).indices().foreach$mVc$sp(new OdklCountVectorizer$$anonfun$1(this, strArr3, queue));
            strArr = strArr3;
        } else {
            strArr = strArr2;
        }
        return copyValues(new OdklCountVectorizerModel(Identifiable$.MODULE$.randomUID("odklCountVectorizerModel"), strArr).setParent(this), copyValues$default$2());
    }

    public OdklCountVectorizer(String str) {
        super(str);
        OdklCountVectorizerParams.Cclass.$init$(this);
        this.inheritedVocabulary = new Param<>(this, "inheritedVocabulary", "Dictionary inherited from the previous epoche. Can be used to try to preserve word indices.");
    }

    public OdklCountVectorizer() {
        this(Identifiable$.MODULE$.randomUID("odklCountVectorizer"));
    }
}
