package streaming.dsl.mmlib.algs.feature;

import breeze.linalg.DenseVector;
import org.apache.spark.SparkContext;
import org.apache.spark.broadcast.Broadcast;
import org.apache.spark.ml.help.HSQLStringIndex$;
import org.apache.spark.ml.linalg.Vector;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.SparkSession$implicits$;
import org.apache.spark.sql.expressions.UserDefinedFunction;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.ArrayType$;
import org.apache.spark.sql.types.DoubleType$;
import org.apache.spark.sql.types.StringType$;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructField$;
import org.apache.spark.sql.types.StructType$;
import org.apache.spark.util.collection.OpenHashMap;
import scala.Array$;
import scala.Function1;
import scala.MatchError;
import scala.Predef$;
import scala.Predef$ArrowAssoc$;
import scala.StringContext;
import scala.Tuple2;
import scala.Tuple3;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.SetLike;
import scala.collection.TraversableLike;
import scala.collection.TraversableOnce;
import scala.collection.immutable.List$;
import scala.collection.immutable.Map;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.Set;
import scala.collection.immutable.Set$;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayBuffer;
import scala.collection.mutable.StringBuilder;
import scala.math.Ordering$Double$;
import scala.reflect.ClassTag$;
import scala.reflect.api.Mirror;
import scala.reflect.api.TypeCreator;
import scala.reflect.api.TypeTags;
import scala.reflect.api.Types;
import scala.reflect.api.Universe;
import scala.reflect.runtime.package$;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.DoubleRef;
import scala.runtime.IntRef;
import scala.runtime.ObjectRef;
import scala.runtime.RichInt$;
import streaming.dsl.mmlib.algs.MetaConst$;
import streaming.dsl.mmlib.algs.SQLDicOrTableToArray;
import streaming.dsl.mmlib.algs.SQLStringIndex;
import streaming.dsl.mmlib.algs.SQLTfIdf;
import streaming.dsl.mmlib.algs.SQLTokenAnalysis;
import streaming.dsl.mmlib.algs.SQLTokenAnalysis$;
import streaming.dsl.mmlib.algs.SQLWord2Vec;
import streaming.dsl.mmlib.algs.feature.BaseFeatureFunctions;
import streaming.dsl.mmlib.algs.meta.OutlierValueMeta;
import streaming.log.WowLog;

/* compiled from: StringFeature.scala */
/* loaded from: input_file:streaming/dsl/mmlib/algs/feature/StringFeature$.class */
public final class StringFeature$ implements BaseFeatureFunctions, WowLog {
    public static final StringFeature$ MODULE$ = null;

    static {
        new StringFeature$();
    }

    @Override // streaming.log.WowLog
    public String format(String str, boolean z) {
        return WowLog.Cclass.format(this, str, z);
    }

    @Override // streaming.log.WowLog
    public String wow_format(String str) {
        return WowLog.Cclass.wow_format(this, str);
    }

    @Override // streaming.log.WowLog
    public String format_exception(Exception exc) {
        return WowLog.Cclass.format_exception(this, exc);
    }

    @Override // streaming.log.WowLog
    public String format_throwable(Throwable th, boolean z) {
        return WowLog.Cclass.format_throwable(this, th, z);
    }

    @Override // streaming.log.WowLog
    public String format_cause(Exception exc) {
        return WowLog.Cclass.format_cause(this, exc);
    }

    @Override // streaming.log.WowLog
    public void format_full_exception(ArrayBuffer<String> arrayBuffer, Exception exc, boolean z) {
        WowLog.Cclass.format_full_exception(this, arrayBuffer, exc, z);
    }

    @Override // streaming.log.WowLog
    public boolean format$default$2() {
        return WowLog.Cclass.format$default$2(this);
    }

    @Override // streaming.log.WowLog
    public boolean format_throwable$default$2() {
        return WowLog.Cclass.format_throwable$default$2(this);
    }

    @Override // streaming.log.WowLog
    public boolean format_full_exception$default$3() {
        return WowLog.Cclass.format_full_exception$default$3(this);
    }

    @Override // streaming.dsl.mmlib.algs.feature.BaseFeatureFunctions
    public Dataset<Row> replaceColumn(Dataset<Row> dataset, String str, UserDefinedFunction userDefinedFunction) {
        return BaseFeatureFunctions.Cclass.replaceColumn(this, dataset, str, userDefinedFunction);
    }

    @Override // streaming.dsl.mmlib.algs.feature.BaseFeatureFunctions
    public Tuple2<Dataset<Row>, OutlierValueMeta> killSingleColumnOutlierValue(Dataset<Row> dataset, String str) {
        return BaseFeatureFunctions.Cclass.killSingleColumnOutlierValue(this, dataset, str);
    }

    @Override // streaming.dsl.mmlib.algs.feature.BaseFeatureFunctions
    public DenseVector<Object> asBreeze(Vector vector) {
        return BaseFeatureFunctions.Cclass.asBreeze(this, vector);
    }

    @Override // streaming.dsl.mmlib.algs.feature.BaseFeatureFunctions
    public String getTempCol() {
        return BaseFeatureFunctions.Cclass.getTempCol(this);
    }

    @Override // streaming.dsl.mmlib.algs.feature.BaseFeatureFunctions
    public String getFieldGroupName(Seq<String> seq) {
        return BaseFeatureFunctions.Cclass.getFieldGroupName(this, seq);
    }

    @Override // streaming.dsl.mmlib.algs.feature.BaseFeatureFunctions
    public Dataset<Row> expandColumnsFromVector(Dataset<Row> dataset, Seq<String> seq, String str) {
        return BaseFeatureFunctions.Cclass.expandColumnsFromVector(this, dataset, seq, str);
    }

    public Set<String> loadStopwords(Dataset<Row> dataset, String str) {
        Set<String> apply;
        if (str == null || str.isEmpty()) {
            apply = Predef$.MODULE$.Set().apply(Nil$.MODULE$);
        } else {
            apply = ((TraversableOnce) Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps((Object[]) new SQLDicOrTableToArray().internal_train(dataset, (Map) Predef$.MODULE$.Map().apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("dic.paths"), str), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("dic.names"), "stopwords")}))).collect()).map(new StringFeature$$anonfun$2(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class)))).toMap(Predef$.MODULE$.$conforms()).apply("stopwords")).toSet();
        }
        return apply;
    }

    public Tuple2<Set<Object>, Function1<Vector, Vector>> loadPriorityWords(Dataset<Row> dataset, String str, double d, Function1<String, Object> function1) {
        Set apply;
        if (str == null || str.isEmpty()) {
            apply = Predef$.MODULE$.Set().apply(Nil$.MODULE$);
        } else {
            apply = ((TraversableOnce) Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps((Object[]) new SQLDicOrTableToArray().internal_train(dataset, (Map) Predef$.MODULE$.Map().apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("dic.paths"), str), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("dic.names"), "prioritywords")}))).collect()).map(new StringFeature$$anonfun$3(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class)))).toMap(Predef$.MODULE$.$conforms()).apply("prioritywords")).toSet();
        }
        Set set = (Set) ((TraversableLike) ((SetLike) apply).map(new StringFeature$$anonfun$4(function1), Set$.MODULE$.canBuildFrom())).filter(new StringFeature$$anonfun$1());
        return new Tuple2<>(set, new StringFeature$$anonfun$5(d, dataset.sparkSession().sparkContext().broadcast(set, ClassTag$.MODULE$.apply(Set.class))));
    }

    public Tuple3<Dataset<Row>, Map<String, Function1<String, Object>>, Object> analysisWords(Dataset<Row> dataset, String str, String str2, String str3, Broadcast<Set<String>> broadcast, Seq<Object> seq, boolean z, String str4, String[] strArr) {
        ObjectRef create = ObjectRef.create(new SQLTokenAnalysis().internal_train(dataset, (Map) Predef$.MODULE$.Map().apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("dic.paths"), str2), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("inputCol"), str3), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("ignoreNature"), "true"), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("wordsArray"), Predef$.MODULE$.refArrayOps(strArr).mkString(",")), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("split"), str4)}))));
        create.elem = replaceColumn((Dataset) create.elem, str3, functions$.MODULE$.udf(new StringFeature$$anonfun$6(broadcast), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.StringFeature$$typecreator1$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe = mirror.universe();
                return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala.collection").asModule().moduleClass()), mirror.staticClass("scala.collection.Seq"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticModule("scala.Predef")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.Predef").asModule().moduleClass(), "String"), Nil$.MODULE$)})));
            }
        }), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.StringFeature$$typecreator2$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe = mirror.universe();
                return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().thisPrefix(mirror.RootClass()), mirror.staticPackage("scala")), mirror.staticModule("scala.package")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.package").asModule().moduleClass(), "Seq"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticModule("scala.Predef")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.Predef").asModule().moduleClass(), "String"), Nil$.MODULE$)})));
            }
        })));
        ((Seq) seq.map(new StringFeature$$anonfun$7(str3, create), Seq$.MODULE$.canBuildFrom())).foreach(new StringFeature$$anonfun$analysisWords$1(str3, create, functions$.MODULE$.udf(new StringFeature$$anonfun$8(), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.StringFeature$$typecreator3$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe = mirror.universe();
                return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala.collection").asModule().moduleClass()), mirror.staticClass("scala.collection.Seq"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticModule("scala.Predef")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.Predef").asModule().moduleClass(), "String"), Nil$.MODULE$)})));
            }
        }), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.StringFeature$$typecreator4$2
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe = mirror.universe();
                return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().thisPrefix(mirror.RootClass()), mirror.staticPackage("scala")), mirror.staticModule("scala.package")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.package").asModule().moduleClass(), "Seq"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticModule("scala.Predef")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.Predef").asModule().moduleClass(), "String"), Nil$.MODULE$)})));
            }
        }), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.StringFeature$$typecreator5$2
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe = mirror.universe();
                return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().thisPrefix(mirror.RootClass()), mirror.staticPackage("scala")), mirror.staticModule("scala.package")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.package").asModule().moduleClass(), "Seq"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticModule("scala.Predef")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.Predef").asModule().moduleClass(), "String"), Nil$.MODULE$)})));
            }
        }))));
        Dataset<Row> createDataFrame = dataset.sparkSession().createDataFrame(((Dataset) create.elem).rdd().flatMap(new StringFeature$$anonfun$9(((Dataset) create.elem).schema().fieldIndex(str3)), ClassTag$.MODULE$.apply(String.class)).distinct().map(new StringFeature$$anonfun$10(), ClassTag$.MODULE$.apply(Row.class)), StructType$.MODULE$.apply(Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new StructField[]{new StructField("words", StringType$.MODULE$, StructField$.MODULE$.apply$default$3(), StructField$.MODULE$.apply$default$4())}))));
        long count = createDataFrame.count();
        format(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"TFIDF: total words in corpus: ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToLong(count)})), format$default$2());
        SQLStringIndex sQLStringIndex = new SQLStringIndex();
        sQLStringIndex.train(createDataFrame, MetaConst$.MODULE$.WORD_INDEX_PATH(str, str3), (Map) Predef$.MODULE$.Map().apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("inputCol"), "words")})));
        Object load = sQLStringIndex.load(dataset.sparkSession(), MetaConst$.MODULE$.WORD_INDEX_PATH(str, str3), (Map) Predef$.MODULE$.Map().apply(Nil$.MODULE$));
        OpenHashMap<String, Object> wordToIndex = HSQLStringIndex$.MODULE$.wordToIndex(dataset.sparkSession(), load);
        format(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"TFIDF: wordToIndex: ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToInteger(wordToIndex.size())})), format$default$2());
        SparkSession sparkSession = dataset.sparkSession();
        sparkSession.createDataFrame(sparkSession.sparkContext().parallelize(wordToIndex.toSeq(), sparkSession.sparkContext().parallelize$default$2(), ClassTag$.MODULE$.apply(Tuple2.class)).map(new StringFeature$$anonfun$analysisWords$2(), ClassTag$.MODULE$.apply(Row.class)), StructType$.MODULE$.apply(Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new StructField[]{new StructField("word", StringType$.MODULE$, StructField$.MODULE$.apply$default$3(), StructField$.MODULE$.apply$default$4()), new StructField("index", DoubleType$.MODULE$, StructField$.MODULE$.apply$default$3(), StructField$.MODULE$.apply$default$4())})))).write().mode(SaveMode.Overwrite).parquet(MetaConst$.MODULE$.WORD_INDEX_PATH(str, str3));
        if (z) {
            format(((TraversableOnce) ((TraversableLike) wordToIndex.toSeq().sortBy(new StringFeature$$anonfun$11(), Ordering$Double$.MODULE$)).map(new StringFeature$$anonfun$12(), Seq$.MODULE$.canBuildFrom())).mkString("\n"), format$default$2());
        } else {
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
        }
        Map<String, Function1<String, Object>> internal_predict = sQLStringIndex.internal_predict(dataset.sparkSession(), load, "wow");
        Function1 function1 = (Function1) internal_predict.apply("wow_array");
        create.elem = replaceColumn((Dataset) create.elem, str3, functions$.MODULE$.udf(function1, package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.StringFeature$$typecreator6$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe = mirror.universe();
                return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticClass("scala.Array"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{mirror.staticClass("scala.Int").asType().toTypeConstructor()})));
            }
        }), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.StringFeature$$typecreator7$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe = mirror.universe();
                return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().thisPrefix(mirror.RootClass()), mirror.staticPackage("scala")), mirror.staticModule("scala.package")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.package").asModule().moduleClass(), "Seq"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticModule("scala.Predef")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.Predef").asModule().moduleClass(), "String"), Nil$.MODULE$)})));
            }
        })));
        return new Tuple3<>((Dataset) create.elem, internal_predict, BoxesRunTime.boxToLong(count));
    }

    public String[] analysisWords$default$9() {
        return (String[]) Array$.MODULE$.apply(Nil$.MODULE$, ClassTag$.MODULE$.apply(String.class));
    }

    public Dataset<Row> tfidf(Dataset<Row> dataset, String str, String str2, String str3, String str4, String str5, double d, Seq<Object> seq, String str6, boolean z) {
        Tuple3<Dataset<Row>, Map<String, Function1<String, Object>>, Object> analysisWords = analysisWords(dataset, str, str2, str3, dataset.sparkSession().sparkContext().broadcast(loadStopwords(dataset, str4), ClassTag$.MODULE$.apply(Set.class)), seq, z, str6, analysisWords$default$9());
        if (analysisWords == null) {
            throw new MatchError(analysisWords);
        }
        Tuple3 tuple3 = new Tuple3((Dataset) analysisWords._1(), (Map) analysisWords._2(), BoxesRunTime.boxToLong(BoxesRunTime.unboxToLong(analysisWords._3())));
        Dataset<Row> dataset2 = (Dataset) tuple3._1();
        Map map = (Map) tuple3._2();
        long unboxToLong = BoxesRunTime.unboxToLong(tuple3._3());
        dataset.sparkSession();
        SQLTfIdf sQLTfIdf = new SQLTfIdf();
        sQLTfIdf.train(dataset2, MetaConst$.MODULE$.TF_IDF_PATH(str, str3), (Map) Predef$.MODULE$.Map().apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("inputCol"), str3), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("numFeatures"), BoxesRunTime.boxToLong(unboxToLong).toString()), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("binary"), "true")})));
        Function1 function1 = (Function1) sQLTfIdf.internal_predict(dataset.sparkSession(), sQLTfIdf.load(dataset.sparkSession(), MetaConst$.MODULE$.TF_IDF_PATH(str, str3), (Map) Predef$.MODULE$.Map().apply(Nil$.MODULE$)), "wow").apply("wow");
        Dataset<Row> replaceColumn = replaceColumn(dataset2, str3, functions$.MODULE$.udf(function1, package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.StringFeature$$typecreator8$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                mirror.universe();
                return mirror.staticClass("org.apache.spark.ml.linalg.Vector").asType().toTypeConstructor();
            }
        }), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.StringFeature$$typecreator9$2
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe = mirror.universe();
                return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().thisPrefix(mirror.RootClass()), mirror.staticPackage("scala")), mirror.staticModule("scala.package")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.package").asModule().moduleClass(), "Seq"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{mirror.staticClass("scala.Int").asType().toTypeConstructor()})));
            }
        })));
        Tuple2<Set<Object>, Function1<Vector, Vector>> loadPriorityWords = loadPriorityWords(replaceColumn, str5, d, (Function1) map.apply("wow"));
        if (loadPriorityWords == null) {
            throw new MatchError(loadPriorityWords);
        }
        Tuple2 tuple2 = new Tuple2((Set) loadPriorityWords._1(), (Function1) loadPriorityWords._2());
        Function1 function12 = (Function1) tuple2._2();
        return replaceColumn(replaceColumn, str3, functions$.MODULE$.udf(function12, package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.StringFeature$$typecreator10$2
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                mirror.universe();
                return mirror.staticClass("org.apache.spark.ml.linalg.Vector").asType().toTypeConstructor();
            }
        }), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.StringFeature$$typecreator11$2
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                mirror.universe();
                return mirror.staticClass("org.apache.spark.ml.linalg.Vector").asType().toTypeConstructor();
            }
        })));
    }

    public boolean tfidf$default$10() {
        return false;
    }

    public Dataset<Row> word2vec(Dataset<Row> dataset, String str, String str2, String str3, String str4, String str5, String str6, String str7, int i, int i2, int i3) {
        Broadcast<Set<String>> broadcast = dataset.sparkSession().sparkContext().broadcast(loadStopwords(dataset, str5), ClassTag$.MODULE$.apply(Set.class));
        SparkSession sparkSession = dataset.sparkSession();
        String[] loadDicsFromWordvec = loadDicsFromWordvec(sparkSession, str3);
        Map<String, double[]> loadWordvecs = loadWordvecs(sparkSession, str3);
        if (loadWordvecs.size() > 0) {
            return replaceColumn(replaceColumn(new SQLTokenAnalysis().internal_train(dataset, (Map) Predef$.MODULE$.Map().apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("dic.paths"), str2), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("inputCol"), str4), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("ignoreNature"), "true"), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("wordsArray"), Predef$.MODULE$.refArrayOps(loadDicsFromWordvec).mkString(","))}))), str4, functions$.MODULE$.udf(new StringFeature$$anonfun$13(broadcast), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.StringFeature$$typecreator1$2
                public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                    Universe universe = mirror.universe();
                    return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala.collection").asModule().moduleClass()), mirror.staticClass("scala.collection.Seq"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticModule("scala.Predef")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.Predef").asModule().moduleClass(), "String"), Nil$.MODULE$)})));
                }
            }), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.StringFeature$$typecreator2$2
                public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                    Universe universe = mirror.universe();
                    return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().thisPrefix(mirror.RootClass()), mirror.staticPackage("scala")), mirror.staticModule("scala.package")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.package").asModule().moduleClass(), "Seq"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticModule("scala.Predef")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.Predef").asModule().moduleClass(), "String"), Nil$.MODULE$)})));
                }
            }))), str4, functions$.MODULE$.udf(new StringFeature$$anonfun$14(i, i2, loadWordvecs), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.StringFeature$$typecreator3$2
                public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                    Universe universe = mirror.universe();
                    return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala.collection").asModule().moduleClass()), mirror.staticClass("scala.collection.Seq"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().thisPrefix(mirror.RootClass()), mirror.staticPackage("scala")), mirror.staticModule("scala.package")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.package").asModule().moduleClass(), "Seq"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{mirror.staticClass("scala.Double").asType().toTypeConstructor()})))})));
                }
            }), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.StringFeature$$typecreator4$3
                public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                    Universe universe = mirror.universe();
                    return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().thisPrefix(mirror.RootClass()), mirror.staticPackage("scala")), mirror.staticModule("scala.package")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.package").asModule().moduleClass(), "Seq"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticModule("scala.Predef")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.Predef").asModule().moduleClass(), "String"), Nil$.MODULE$)})));
                }
            })));
        }
        Tuple3<Dataset<Row>, Map<String, Function1<String, Object>>, Object> analysisWords = analysisWords(dataset, str, str2, str4, broadcast, (Seq) Seq$.MODULE$.apply(Nil$.MODULE$), false, str7, loadDicsFromWordvec);
        if (analysisWords == null) {
            throw new MatchError(analysisWords);
        }
        Tuple3 tuple3 = new Tuple3((Dataset) analysisWords._1(), (Map) analysisWords._2(), BoxesRunTime.boxToLong(BoxesRunTime.unboxToLong(analysisWords._3())));
        Dataset<Row> dataset2 = (Dataset) tuple3._1();
        BoxesRunTime.unboxToLong(tuple3._3());
        SQLWord2Vec sQLWord2Vec = new SQLWord2Vec();
        sQLWord2Vec.train(replaceColumn(dataset2, str4, functions$.MODULE$.udf(new StringFeature$$anonfun$word2vec$1(), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.StringFeature$$typecreator5$3
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe = mirror.universe();
                return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala.collection").asModule().moduleClass()), mirror.staticClass("scala.collection.Seq"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{mirror.staticClass("java.lang.String").asType().toTypeConstructor()})));
            }
        }), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.StringFeature$$typecreator6$2
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe = mirror.universe();
                return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().thisPrefix(mirror.RootClass()), mirror.staticPackage("scala")), mirror.staticModule("scala.package")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.package").asModule().moduleClass(), "Seq"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{mirror.staticClass("scala.Int").asType().toTypeConstructor()})));
            }
        }))), MetaConst$.MODULE$.WORD2VEC_PATH(str, str4), (Map) Predef$.MODULE$.Map().apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("inputCol"), str4), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("vectorSize"), new StringBuilder().append(i).append("").toString()), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("minCount"), BoxesRunTime.boxToInteger(i3).toString())})));
        Map<Object, Object> load = sQLWord2Vec.load(dataset.sparkSession(), MetaConst$.MODULE$.WORD2VEC_PATH(str, str4), (Map<String, String>) Predef$.MODULE$.Map().apply(Nil$.MODULE$));
        if (!str6.equals("index")) {
            Function1 function1 = (Function1) sQLWord2Vec.internal_predict(dataset.sparkSession(), load, "wow").apply("wow_array");
            dataset2 = replaceColumn(dataset2, str4, functions$.MODULE$.udf(function1, package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.StringFeature$$typecreator7$2
                public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                    Universe universe = mirror.universe();
                    return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().thisPrefix(mirror.RootClass()), mirror.staticPackage("scala")), mirror.staticModule("scala.package")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.package").asModule().moduleClass(), "Seq"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().thisPrefix(mirror.RootClass()), mirror.staticPackage("scala")), mirror.staticModule("scala.package")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.package").asModule().moduleClass(), "Seq"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{mirror.staticClass("scala.Double").asType().toTypeConstructor()})))})));
                }
            }), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.StringFeature$$typecreator8$2
                public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                    Universe universe = mirror.universe();
                    return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().thisPrefix(mirror.RootClass()), mirror.staticPackage("scala")), mirror.staticModule("scala.package")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.package").asModule().moduleClass(), "Seq"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticModule("scala.Predef")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.Predef").asModule().moduleClass(), "String"), Nil$.MODULE$)})));
                }
            })));
        }
        return dataset2;
    }

    public int word2vec$default$9() {
        return 100;
    }

    public int word2vec$default$10() {
        return 100;
    }

    public int word2vec$default$11() {
        return 1;
    }

    public String[] loadDicsFromWordvec(SparkSession sparkSession, String str) {
        return (String[]) Predef$.MODULE$.refArrayOps((String[]) Array$.MODULE$.apply(Nil$.MODULE$, ClassTag$.MODULE$.apply(String.class))).$plus$plus(Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps(str.split(",")).filter(new StringFeature$$anonfun$loadDicsFromWordvec$1())).flatMap(new StringFeature$$anonfun$loadDicsFromWordvec$2(sparkSession), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class)))), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class)));
    }

    public Map<String, double[]> loadWordvecs(SparkSession sparkSession, String str) {
        ObjectRef create = ObjectRef.create(Predef$.MODULE$.Map().apply(Nil$.MODULE$));
        Predef$.MODULE$.refArrayOps((Tuple2[]) Predef$.MODULE$.refArrayOps((Tuple2[]) Array$.MODULE$.apply(Nil$.MODULE$, ClassTag$.MODULE$.apply(Tuple2.class))).$plus$plus(Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps(str.split(",")).filter(new StringFeature$$anonfun$loadWordvecs$1())).flatMap(new StringFeature$$anonfun$loadWordvecs$2(sparkSession), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class)))), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class)))).foreach(new StringFeature$$anonfun$loadWordvecs$3(create));
        return (Map) create.elem;
    }

    public Dataset<Row> wordvec(Dataset<Row> dataset, String str, String str2, String str3, int i, int i2) {
        SparkSession sparkSession = dataset.sparkSession();
        Broadcast broadcast = dataset.sparkSession().sparkContext().broadcast(loadStopwords(dataset, str3), ClassTag$.MODULE$.apply(Set.class));
        ObjectRef create = ObjectRef.create(Predef$.MODULE$.Map().apply(Nil$.MODULE$));
        Tuple2[] tuple2Arr = (Tuple2[]) Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps(str.split(",")).filter(new StringFeature$$anonfun$15())).flatMap(new StringFeature$$anonfun$16(sparkSession), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class)));
        String[] strArr = (String[]) Predef$.MODULE$.refArrayOps(tuple2Arr).map(new StringFeature$$anonfun$17(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class)));
        Predef$.MODULE$.refArrayOps(tuple2Arr).foreach(new StringFeature$$anonfun$wordvec$1(create));
        return replaceColumn(replaceColumn(sparkSession.createDataFrame(dataset.rdd().mapPartitions(new StringFeature$$anonfun$18(str2, strArr), dataset.rdd().mapPartitions$default$2(), ClassTag$.MODULE$.apply(Row.class)), StructType$.MODULE$.apply((Seq) ((TraversableLike) dataset.schema().filterNot(new StringFeature$$anonfun$21(str2))).$plus$plus(Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new StructField[]{new StructField(str2, ArrayType$.MODULE$.apply(StringType$.MODULE$), StructField$.MODULE$.apply$default$3(), StructField$.MODULE$.apply$default$4())})), Seq$.MODULE$.canBuildFrom()))), str2, functions$.MODULE$.udf(new StringFeature$$anonfun$22(broadcast), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.StringFeature$$typecreator1$3
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe = mirror.universe();
                return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala.collection").asModule().moduleClass()), mirror.staticClass("scala.collection.Seq"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticModule("scala.Predef")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.Predef").asModule().moduleClass(), "String"), Nil$.MODULE$)})));
            }
        }), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.StringFeature$$typecreator2$3
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe = mirror.universe();
                return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().thisPrefix(mirror.RootClass()), mirror.staticPackage("scala")), mirror.staticModule("scala.package")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.package").asModule().moduleClass(), "Seq"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticModule("scala.Predef")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.Predef").asModule().moduleClass(), "String"), Nil$.MODULE$)})));
            }
        }))), str2, functions$.MODULE$.udf(new StringFeature$$anonfun$23(i, i2, create), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.StringFeature$$typecreator3$3
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe = mirror.universe();
                return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala.collection").asModule().moduleClass()), mirror.staticClass("scala.collection.Seq"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().thisPrefix(mirror.RootClass()), mirror.staticPackage("scala")), mirror.staticModule("scala.package")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.package").asModule().moduleClass(), "Seq"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{mirror.staticClass("scala.Double").asType().toTypeConstructor()})))})));
            }
        }), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.StringFeature$$typecreator4$4
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe = mirror.universe();
                return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().thisPrefix(mirror.RootClass()), mirror.staticPackage("scala")), mirror.staticModule("scala.package")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.package").asModule().moduleClass(), "Seq"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticModule("scala.Predef")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.Predef").asModule().moduleClass(), "String"), Nil$.MODULE$)})));
            }
        })));
    }

    public Tuple2<Dataset<Row>, Map<String, Function1<String, Object>>> strToInt(Dataset<Row> dataset, String str, String str2, boolean z) {
        String stringBuilder = new StringBuilder().append(new StringOps(Predef$.MODULE$.augmentString(str)).stripSuffix("/")).append(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"/wordIndex/", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{str2}))).toString();
        SQLStringIndex sQLStringIndex = new SQLStringIndex();
        sQLStringIndex.train(dataset, stringBuilder, (Map) Predef$.MODULE$.Map().apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("inputCol"), str2)})));
        Object load = sQLStringIndex.load(dataset.sparkSession(), stringBuilder, (Map) Predef$.MODULE$.Map().apply(Nil$.MODULE$));
        if (z) {
            Predef$.MODULE$.println(((TraversableOnce) ((TraversableLike) HSQLStringIndex$.MODULE$.wordToIndex(dataset.sparkSession(), load).toSeq().sortBy(new StringFeature$$anonfun$24(), Ordering$Double$.MODULE$)).map(new StringFeature$$anonfun$25(), Seq$.MODULE$.canBuildFrom())).mkString("\n"));
        }
        Map<String, Function1<String, Object>> internal_predict = sQLStringIndex.internal_predict(dataset.sparkSession(), load, "wow");
        Function1 function1 = (Function1) internal_predict.apply("wow");
        return new Tuple2<>(replaceColumn(dataset, str2, functions$.MODULE$.udf(function1, package$.MODULE$.universe().TypeTag().Int(), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.StringFeature$$typecreator5$4
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe = mirror.universe();
                return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticModule("scala.Predef")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.Predef").asModule().moduleClass(), "String"), Nil$.MODULE$);
            }
        }))), internal_predict);
    }

    public Seq<Object> mergeFunc(Seq<Seq<Object>> seq, int i) {
        if (seq.size() == 0) {
            return Seq$.MODULE$.apply(Nil$.MODULE$);
        }
        double[] dArr = new double[i];
        seq.foreach(new StringFeature$$anonfun$mergeFunc$1(dArr));
        return Predef$.MODULE$.doubleArrayOps(dArr).toSeq();
    }

    public Dataset<Row> analysisRaw(Dataset<Row> dataset, String str, String str2, String str3, String str4, String[] strArr) {
        String[] strArr2 = (String[]) Predef$.MODULE$.refArrayOps(SQLTokenAnalysis$.MODULE$.loadDics(dataset.sparkSession(), (Map) Predef$.MODULE$.Map().apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("dic.paths"), str4)})))).$plus$plus(Predef$.MODULE$.refArrayOps(strArr), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class)));
        SQLTokenAnalysis$.MODULE$.createAnalyzer(strArr2, (Map) Predef$.MODULE$.Map().apply(Nil$.MODULE$));
        return replaceColumn(dataset, str, functions$.MODULE$.udf(new StringFeature$$anonfun$26(str2, str3, strArr2), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.StringFeature$$typecreator9$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe = mirror.universe();
                return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala.collection").asModule().moduleClass()), mirror.staticClass("scala.collection.Seq"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala.collection").asModule().moduleClass()), mirror.staticClass("scala.collection.Seq"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{mirror.staticClass("java.lang.String").asType().toTypeConstructor()})))})));
            }
        }), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.StringFeature$$typecreator10$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe = mirror.universe();
                return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticModule("scala.Predef")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.Predef").asModule().moduleClass(), "String"), Nil$.MODULE$);
            }
        })));
    }

    public String[] analysisRaw$default$6() {
        return (String[]) Array$.MODULE$.apply(Nil$.MODULE$, ClassTag$.MODULE$.apply(String.class));
    }

    public Dataset<Row> raw2vec(Dataset<Row> dataset, String str, String str2, String str3) {
        SparkSession sparkSession = dataset.sparkSession();
        String metaPath = MetaConst$.MODULE$.getMetaPath(str3);
        Predef$ predef$ = Predef$.MODULE$;
        Dataset parquet = sparkSession.read().parquet(MetaConst$.MODULE$.PARAMS_PATH(metaPath, "params"));
        StringFeature$$anonfun$27 stringFeature$$anonfun$27 = new StringFeature$$anonfun$27();
        SparkSession$implicits$ implicits = sparkSession.implicits();
        TypeTags universe = package$.MODULE$.universe();
        Map map = predef$.refArrayOps((Object[]) parquet.map(stringFeature$$anonfun$27, implicits.newProductEncoder(universe.TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.StringFeature$$typecreator4$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe2 = mirror.universe();
                return universe2.internal().reificationSupport().TypeRef(universe2.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticClass("scala.Tuple2"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{universe2.internal().reificationSupport().TypeRef(universe2.internal().reificationSupport().SingleType(universe2.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticModule("scala.Predef")), universe2.internal().reificationSupport().selectType(mirror.staticModule("scala.Predef").asModule().moduleClass(), "String"), Nil$.MODULE$), universe2.internal().reificationSupport().TypeRef(universe2.internal().reificationSupport().SingleType(universe2.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticModule("scala.Predef")), universe2.internal().reificationSupport().selectType(mirror.staticModule("scala.Predef").asModule().moduleClass(), "String"), Nil$.MODULE$)})));
            }
        }))).collect()).toMap(Predef$.MODULE$.$conforms());
        String str4 = (String) map.getOrElse("inputCol", new StringFeature$$anonfun$28());
        String str5 = (String) map.getOrElse("dicPaths", new StringFeature$$anonfun$29());
        String str6 = (String) map.getOrElse("wordvecPaths", new StringFeature$$anonfun$30());
        String str7 = (String) map.getOrElse("split", new StringFeature$$anonfun$31());
        int i = new StringOps(Predef$.MODULE$.augmentString((String) map.getOrElse("vectorSize", new StringFeature$$anonfun$32()))).toInt();
        Broadcast broadcast = sparkSession.sparkContext().broadcast(loadWordvecs(sparkSession, str6), ClassTag$.MODULE$.apply(Map.class));
        String[] loadDicsFromWordvec = loadDicsFromWordvec(sparkSession, str6);
        SQLWord2Vec sQLWord2Vec = new SQLWord2Vec();
        Function1 function1 = (Function1) sQLWord2Vec.internal_predict(dataset.sparkSession(), sQLWord2Vec.load(sparkSession, MetaConst$.MODULE$.WORD2VEC_PATH(metaPath, str4), (Map<String, String>) Predef$.MODULE$.Map().apply(Nil$.MODULE$)), "wow").apply("wow_array");
        SparkContext sparkContext = sparkSession.sparkContext();
        Predef$ predef$2 = Predef$.MODULE$;
        Dataset parquet2 = sparkSession.read().parquet(MetaConst$.MODULE$.WORD_INDEX_PATH(metaPath, str4));
        StringFeature$$anonfun$33 stringFeature$$anonfun$33 = new StringFeature$$anonfun$33();
        SparkSession$implicits$ implicits2 = sparkSession.implicits();
        TypeTags universe2 = package$.MODULE$.universe();
        Broadcast broadcast2 = sparkContext.broadcast(predef$2.refArrayOps((Object[]) parquet2.map(stringFeature$$anonfun$33, implicits2.newProductEncoder(universe2.TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.StringFeature$$typecreator5$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe3 = mirror.universe();
                return universe3.internal().reificationSupport().TypeRef(universe3.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticClass("scala.Tuple2"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{universe3.internal().reificationSupport().TypeRef(universe3.internal().reificationSupport().SingleType(universe3.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticModule("scala.Predef")), universe3.internal().reificationSupport().selectType(mirror.staticModule("scala.Predef").asModule().moduleClass(), "String"), Nil$.MODULE$), mirror.staticClass("scala.Double").asType().toTypeConstructor()})));
            }
        }))).collect()).toMap(Predef$.MODULE$.$conforms()), ClassTag$.MODULE$.apply(Map.class));
        Dataset<Row> analysisRaw = analysisRaw(dataset, str, str2, str7, str5, loadDicsFromWordvec);
        functions$ functions_ = functions$.MODULE$;
        StringFeature$$anonfun$34 stringFeature$$anonfun$34 = new StringFeature$$anonfun$34(i, broadcast, function1, broadcast2);
        TypeTags universe3 = package$.MODULE$.universe();
        TypeTags.TypeTag apply = universe3.TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.StringFeature$$typecreator11$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe4 = mirror.universe();
                return universe4.internal().reificationSupport().TypeRef(universe4.internal().reificationSupport().ThisType(mirror.staticPackage("scala.collection").asModule().moduleClass()), mirror.staticClass("scala.collection.Seq"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{universe4.internal().reificationSupport().TypeRef(universe4.internal().reificationSupport().ThisType(mirror.staticPackage("scala.collection").asModule().moduleClass()), mirror.staticClass("scala.collection.Seq"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{mirror.staticClass("scala.Double").asType().toTypeConstructor()})))})));
            }
        });
        TypeTags universe4 = package$.MODULE$.universe();
        return replaceColumn(analysisRaw, str, functions_.udf(stringFeature$$anonfun$34, apply, universe4.TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.StringFeature$$typecreator12$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe5 = mirror.universe();
                return universe5.internal().reificationSupport().TypeRef(universe5.internal().reificationSupport().SingleType(universe5.internal().reificationSupport().SingleType(universe5.internal().reificationSupport().thisPrefix(mirror.RootClass()), mirror.staticPackage("scala")), mirror.staticModule("scala.package")), universe5.internal().reificationSupport().selectType(mirror.staticModule("scala.package").asModule().moduleClass(), "Seq"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{universe5.internal().reificationSupport().TypeRef(universe5.internal().reificationSupport().SingleType(universe5.internal().reificationSupport().SingleType(universe5.internal().reificationSupport().thisPrefix(mirror.RootClass()), mirror.staticPackage("scala")), mirror.staticModule("scala.package")), universe5.internal().reificationSupport().selectType(mirror.staticModule("scala.package").asModule().moduleClass(), "Seq"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{universe5.internal().reificationSupport().TypeRef(universe5.internal().reificationSupport().SingleType(universe5.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticModule("scala.Predef")), universe5.internal().reificationSupport().selectType(mirror.staticModule("scala.Predef").asModule().moduleClass(), "String"), Nil$.MODULE$)})))})));
            }
        })));
    }

    public double cosineSimilarity(Seq<Object> seq, Seq<Object> seq2) {
        DoubleRef create = DoubleRef.create(0.0d);
        DoubleRef create2 = DoubleRef.create(0.0d);
        DoubleRef create3 = DoubleRef.create(0.0d);
        if (seq.length() == 0 || seq2.length() == 0 || seq.length() != seq2.length()) {
            return 0.0d;
        }
        RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), seq.length()).foreach$mVc$sp(new StringFeature$$anonfun$cosineSimilarity$1(seq, seq2, create, create2, create3));
        create2.elem = Math.sqrt(create2.elem);
        create3.elem = Math.sqrt(create3.elem);
        if (create2.elem == 0.0d || create3.elem == 0.0d) {
            return 0.0d;
        }
        return create.elem / (create2.elem * create3.elem);
    }

    public double rawSimilar(Seq<Seq<Object>> seq, Seq<Seq<Object>> seq2, double d) {
        double d2 = 0.0d;
        if (seq.length() > 0 && seq2.length() > 0) {
            Tuple2 tuple2 = seq.length() < seq2.length() ? new Tuple2(seq, seq2) : new Tuple2(seq2, seq);
            if (tuple2 == null) {
                throw new MatchError(tuple2);
            }
            Tuple2 tuple22 = new Tuple2((Seq) tuple2._1(), (Seq) tuple2._2());
            Seq seq3 = (Seq) tuple22._1();
            Seq seq4 = (Seq) tuple22._2();
            double length = seq3.length();
            seq3.foreach(new StringFeature$$anonfun$rawSimilar$1(d, seq4, IntRef.create(0)));
            d2 = r0.elem / length;
        }
        return d2;
    }

    public double rawSimilar$default$3() {
        return 0.8d;
    }

    private StringFeature$() {
        MODULE$ = this;
        BaseFeatureFunctions.Cclass.$init$(this);
        WowLog.Cclass.$init$(this);
    }
}
