package com.datastax.data.prepare.spark.dataset;

import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.datastax.insight.spec.Operator;
import com.datastax.insight.annonation.InsightComponent;
import com.datastax.insight.annonation.InsightComponentArg;
import com.datastax.data.prepare.spark.dataset.params.ColumnReduceParam;
import com.datastax.data.prepare.util.CustomException;
import org.apache.spark.sql.Dataset;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.datastax.insight.annonation.InsightComponentArg;

public class ColumnReduceOperater implements Operator {
    private static final Logger logger = LoggerFactory.getLogger(ColumnReduceOperater.class);

    @InsightComponent(name = "列合并", description = "将多个列合并")
    public static <T> Dataset<T> columnReduce(
            @InsightComponentArg(externalInput = true, name = "数据集", description = "数据集") Dataset<T> data,
            @InsightComponentArg(name = "参数", description = "参数") JSONArray jsonArray) {
        if(jsonArray.isEmpty()) {
            logger.info("列合并组件参数为空, 返回原数据集");
            return data;
        }
        if(data == null) {
            logger.info("列合并组件中的数据集为空, 返回空");
            return null;
        }
        ColumnReduceParam[] columnReduceParams = new ColumnReduceParam[jsonArray.size()];
        for(int i = 0; i < jsonArray.size(); i++) {
            JSONObject jsonObject = jsonArray.getJSONObject(i);
            String mergeColumns = jsonObject.getString("selector");
            String connectSymbol = jsonObject.getString("selectorValue");
            String newColumnName = jsonObject.getString("method");
            String dataType = jsonObject.getString("methodValue");
            if(mergeColumns == null || mergeColumns.length() == 0) {
                throw new CustomException("合并列的参数为空");
            }
            if(newColumnName == null || newColumnName.length() == 0) {
                throw new CustomException("合并列后生成的新列名参数为空");
            }

            ColumnReduceParam columnReduceParam = new ColumnReduceParam();
            columnReduceParam.setMergeColumns(mergeColumns);
            columnReduceParam.setConnectSymbol(connectSymbol);
            columnReduceParam.setNewColumnName(newColumnName);
            columnReduceParam.setDataType(dataType);
            columnReduceParams[i] = columnReduceParam;
        }
        return (Dataset<T>) ColumnReduce.merge(data.toDF(), columnReduceParams);
    }
}
