package com.datastax.data.prepare.spark.dataset;

import com.datastax.insight.annonation.InsightComponent;
import com.datastax.insight.annonation.InsightComponentArg;
import com.datastax.insight.spec.Operator;
import java.util.HashMap;
import org.apache.spark.ml.feature.StringIndexer;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.types.DataTypes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/datastax/data/prepare/spark/dataset/MultiStringIndexerOperator.class */
public class MultiStringIndexerOperator implements Operator {
    private static final Logger logger = LoggerFactory.getLogger(MultiStringIndexerOperator.class);

    @InsightComponent(name = "StringIndexer", description = "将字符串转换成索引，和标签数值化转换相同，支持多列转换")
    public static <T> Dataset<T> multiStringIndexer(@InsightComponentArg(externalInput = true, name = "数据集", description = "数据集") Dataset<T> dataset, @InsightComponentArg(name = "列名", description = "需要转换的列名，多个列名用分号隔开") String str, @InsightComponentArg(name = "转换后的列名", description = "转换生成的索引列的列名，不能与现有列名重复") String str2) {
        if (dataset == null) {
            logger.info("数据集为空");
            return null;
        }
        if (str == null || str.length() == 0) {
            throw new NullPointerException("StringIndexer组件的参数为空");
        }
        HashMap hashMap = new HashMap();
        com.datastax.data.prepare.util.c.a(dataset.schema().fields(), hashMap);
        String[] split = str.split(";");
        String[] split2 = str2.split(";");
        if (split.length != split2.length) {
            throw new com.datastax.data.prepare.util.a("StringIdexer组件的列名和转换后的列名的数量不等");
        }
        Dataset<T> df = dataset.toDF();
        for (int i = 0; i < split.length; i++) {
            String trim = split[i].trim();
            String trim2 = split2[i].trim();
            if (trim.length() == 0) {
                logger.info("列名参数的第" + (i + 1) + "个参数去掉前后空格后为空，跳过");
            } else if (!hashMap.containsKey(trim)) {
                logger.info("数据集中找不到" + trim + "列，跳过");
            } else {
                if (trim2.length() == 0) {
                    throw new com.datastax.data.prepare.util.a("转换后的列名参数的第" + (i + 1) + "个参数去掉前后空格后为空");
                }
                if (hashMap.containsKey(trim2)) {
                    throw new com.datastax.data.prepare.util.a("转换后生成的列名" + trim2 + "和现有列名冲突");
                }
                df = new StringIndexer().setInputCol(trim).setOutputCol(trim2).fit(df).transform(df);
                hashMap.put(trim2, new Object[]{Integer.valueOf(hashMap.size() + 1), DataTypes.IntegerType});
            }
        }
        return df;
    }
}
