package com.datastax.data.prepare.spark.dataset;

import com.datastax.insight.annonation.InsightComponent;
import com.datastax.insight.annonation.InsightComponentArg;
import com.datastax.insight.core.driver.SparkContextBuilder;
import com.datastax.insight.spec.Operator;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.api.java.UDF2;
import org.apache.spark.sql.types.DataTypes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.collection.Seq;

/* loaded from: input_file:com/datastax/data/prepare/spark/dataset/StockSimilrityOperator.class */
public class StockSimilrityOperator implements Operator {
    private static Logger logger = LoggerFactory.getLogger(StockSimilrityOperator.class);

    @InsightComponent(name = "动态相似度(stock)", description = "相似度计算(stock)")
    public static <T> Dataset<T> computeSim(@InsightComponentArg(externalInput = true, name = "数据集", description = "数据集") Dataset<Row> dataset, @InsightComponentArg(name = "股东帐号列", description = "股东帐号") String str, @InsightComponentArg(name = "股票列", description = "股票列") String str2, @InsightComponentArg(name = "日期列", description = "日期列") String str3, @InsightComponentArg(name = "交易方向列", description = "交易方向列") String str4, @InsightComponentArg(name = "是否同向交易", description = "是否同向交易", defaultValue = "true", items = "true;false") boolean z, @InsightComponentArg(name = "相似度方法", description = "相似度方法", defaultValue = "maximum_best_match", items = "maximum_best_match;average_best_match") String str5, @InsightComponentArg(name = "最小有效相似度", description = "用于过滤掉两股东账户之间相似度小于该值的数据，0到100之间") double d, @InsightComponentArg(name = "最小天数阈值", description = "用于计算相似度的最小天数阈值") int i, @InsightComponentArg(name = "市场类型列", description = "市场类型列名") String str6, @InsightComponentArg(name = "市场类型", description = "市场类型, 0代表沪市，1代表深市", defaultValue = "1", items = "0;1") String str7, @InsightComponentArg(name = "中间文件路径", description = "中间文件路径", defaultValue = "${MISC_FOLDER}") String str8, @InsightComponentArg(name = "批处理数量", description = "批处理数量", defaultValue = "5000") String str9) {
        if (dataset == null) {
            throw new IllegalArgumentException("数据集为空");
        }
        if (str5 == null || str5.length() == 0) {
            throw new IllegalArgumentException("set_similarity_method为空");
        }
        if (d > 100.0d || d < 0.0d) {
            throw new IllegalArgumentException("min_similarity_flt不在0到100之间");
        }
        if (i < 0) {
            throw new IllegalArgumentException("trade_date_threshold小于0");
        }
        if (str6 == null || str6.trim().isEmpty()) {
            throw new IllegalArgumentException("mkt_type的列名为空");
        }
        if (str7 == null || str7.trim().isEmpty()) {
            throw new IllegalArgumentException("mkt_type为空");
        }
        if (str8 == null || str8.trim().isEmpty()) {
            throw new IllegalArgumentException("tempPath为空");
        }
        return (Dataset<T>) r.a(SparkContextBuilder.getSession(), dataset.toDF(), str, str2, str3, str4, z, "maximum_best_match".equals(str5) ? 0 : 1, d, i, str6, str7, str8, Integer.parseInt(str9));
    }

    @InsightComponent(name = "静态相似度(stock)", description = "计算静态的相似度")
    public static <T> Dataset<T> computeStaticSim(@InsightComponentArg(externalInput = true, name = "数据集", description = "数据集") Dataset<T> dataset, @InsightComponentArg(name = "证券账户代码列", description = "证券账户代码列") String str, @InsightComponentArg(name = "手机号码列", description = "手机号码列") String str2, @InsightComponentArg(name = "固定或备用联系电话列", description = "固定或备用联系电话列") String str3, @InsightComponentArg(name = "联系地址列", description = "联系地址列") String str4, @InsightComponentArg(name = "电子邮箱列", description = "电子邮箱列") String str5, @InsightComponentArg(name = "身份证明文件号码列", description = "身份证明文件号码列") String str6, @InsightComponentArg(name = "开户代理机构代码列", description = "开户代理机构代码列") String str7, @InsightComponentArg(name = "开户代理网点代码列", description = "开户代理网点代码列") String str8, @InsightComponentArg(name = "开户日期列", description = "开户日期列") String str9, @InsightComponentArg(name = "最小相似度", description = "用于过滤掉数据集中相似度小于该值的数据") double d) {
        SparkSession session = SparkContextBuilder.getSession();
        session.udf().register("staticSimUDF", new UDF2<Seq<String>, Seq<String>, Double>() { // from class: com.datastax.data.prepare.spark.dataset.StockSimilrityOperator.1
            /* renamed from: a, reason: merged with bridge method [inline-methods] */
            public Double call(Seq<String> seq, Seq<String> seq2) throws Exception {
                return Double.valueOf(r.a(seq, seq2));
            }
        }, DataTypes.DoubleType);
        return (Dataset<T>) r.a(session, (Dataset<Row>) dataset.toDF(), str, str2, str3, str4, str5, str6, str7, str8, str9, d);
    }
}
