package com.datastax.data.prepare.spark.dataset;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.datastax.insight.core.driver.SparkContextBuilder;
import com.datastax.insight.spec.Operator;
import com.datastax.insight.annonation.InsightComponent;
import com.datastax.insight.annonation.InsightComponentArg;
import com.datastax.data.prepare.spark.dataset.params.TypeTransform;
import com.datastax.data.prepare.util.Consts;
import com.datastax.data.prepare.util.SharedMethods;
import com.datastax.data.prepare.util.SharedUDFs;
import com.google.common.base.Strings;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.functions;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructField;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.List;

public class TypeTransformOperator implements Operator {
    private static final Logger logger = LoggerFactory.getLogger(TypeTransformOperator.class);

    protected static <T> Dataset<T> typeTransform(Dataset<T> data, TypeTransform... typeTransforms) {
        if(typeTransforms.length == 0 || data == null) {
            logger.info("Detail parameter of Type-transformation is empty or Dataset is empty");
            return data;
        }
        for(TypeTransform t : typeTransforms) {
            t.setInvertSelection(false);        //反转，数据类型处理暂时不用
            StructField[] fields = SharedMethods.attributeFilter(data, t.getAttributeSelector(), t.isInvertSelection(), t.getAttribute(), t.getRegularExpression(), t.getValueType());
            data = transform(data, fields, t);
        }
        return data;
    }

    protected static <T> Dataset<T> typeTransform(Dataset<T> data, List<TypeTransform> typeTransforms) {
        return typeTransform(data, typeTransforms.toArray(new TypeTransform[typeTransforms.size()]));
    }

    protected static <T> Dataset<T> typeTransform(Dataset<T> data, String json) {
        if(json == null || "".equals(json)) {
            return data;
        }
        JSONArray array = JSON.parseArray(json);
        return typeTransform(data, array);
    }

    @InsightComponent(name = "数据类型处理", type = "com.datastax.insight.dataprprocess.typeTransform", description = "数据类型处理", order = 500213)
    public static <T> Dataset<T> typeTransform(
            @InsightComponentArg(externalInput = true, name = "data", description = "待数据类型处理的数据集") Dataset<T> data,
            @InsightComponentArg(name = "参数", description = "数据类型处理的详细参数") JSONArray array) {
        if(array.isEmpty()) {
            return data;
        }
        TypeTransform typeTransform = new TypeTransform();
        for(int i=0; i<array.size(); i++) {
            JSONObject object = array.getJSONObject(i);
            typeTransform.setAttributeSelector(object.getString("selector"));
            if(Consts.ATTRIBUTE_NAME.equals(object.getString("selector"))) {
                typeTransform.setAttribute(object.getString("selectorValue"));
            }
            if(Consts.REGULAR_EXPRESSION.equals(object.getString("selector"))) {
                typeTransform.setRegularExpression(object.getString("selectorValue"));
            }
            if(Consts.VALUE_TYPE.equals(object.getString("selector"))) {
                typeTransform.setValueType(object.getString("selectorValue"));
            }
            typeTransform.setType(object.getString("method"));
            if(Consts.FORMAT_NUMBER.equals(object.getString("method"))) {
                typeTransform.setNumberFormat(object.getString("methodValue"));
                typeTransform.setRoundingMode(object.getString("selectMethodValue"));
            }
            data = typeTransform(data, typeTransform);
        }
        return data;
    }

    private static <T> Dataset<T> transform(Dataset data, StructField[] fields, TypeTransform t) {
        if(Strings.isNullOrEmpty(t.getType()) || fields == null) {
            logger.info("None of attributes is selected or Type is empty");
            return data;
        }
        SparkSession session = SparkContextBuilder.getSession();
        if(Consts.NUM_TO_DATE.equals(t.getType())) {//todo qianzy
            SharedUDFs.num2date(session.udf());
            for(StructField structField : fields) {
                if(structField.dataType() == DataTypes.LongType) {
                    data = data.withColumn(structField.name(), functions.callUDF("num2date", data.col(structField.name())));
                }
            }
        }
        if(Consts.DATE_TO_NUM.equals(t.getType())) {//todo qianzy
            SharedUDFs.timestamp2num(session.udf());
            SharedUDFs.date2num(session.udf());
            for(StructField structField : fields) {
                if(structField.dataType() == DataTypes.TimestampType) {
                    data = data.withColumn(structField.name(), functions.callUDF("timestamp2num", data.col(structField.name())));
                }
                if(structField.dataType() == DataTypes.DateType) {
                    data = data.withColumn(structField.name(), functions.callUDF("date2num", data.col(structField.name())));
                }
            }
        }
        if(Consts.FORMAT_NUMBER.equals(t.getType())) {
            if(Strings.isNullOrEmpty(t.getNumberFormat())) {
                return data;
            }
            SharedUDFs.rounding(session.udf(), t.getNumberFormat(), t.getRoundingMode());
            for(StructField structField : fields) {
                if(structField.dataType() == DataTypes.IntegerType | structField.dataType() == DataTypes.ByteType | structField.dataType() == DataTypes.LongType |
                        structField.dataType() == DataTypes.FloatType | structField.dataType() == DataTypes.DoubleType | structField.dataType() == DataTypes.ShortType ) {
                    data = data.withColumn(structField.name(), functions.callUDF("rounding", data.col(structField.name())));
                }
            }
        }
        if(Consts.STRING_TO_DATE.equals(t.getType())) {
            for(StructField structField : fields) {
                if(structField.dataType() == DataTypes.StringType) {
                    logger.info(structField.name() + "不符合Date格式的行将被转换为空");
                    SharedUDFs.string2date(session.udf());
//                    将全是数字的String转化为date，可以考虑是否用户输入类似正则的String,匹配整列转换成date，timestamp也一样
                    data = data.withColumn(structField.name(), functions.callUDF("string2dateString", data.col(structField.name())));
                    data = data.withColumn(structField.name(), data.col(structField.name()).cast(DataTypes.DateType));
                }
            }
        }
        if(Consts.STRING_TO_TIMESTAMP.equals(t.getType())) {
            for(StructField structField : fields) {
                if(structField.dataType() == DataTypes.StringType) {
                    logger.info(structField.name() + "不符合Timestamp格式的行将被转换为空");
                    SharedUDFs.string2date(session.udf());
                    data = data.withColumn(structField.name(), functions.callUDF("string2dateString", data.col(structField.name())));
                    data = data.withColumn(structField.name(), data.col(structField.name()).cast(DataTypes.TimestampType));
                }
            }
        }
        if(Consts.STRING_TO_LONG.equals(t.getType())) {
            for(StructField structField : fields) {
                if(structField.dataType() == DataTypes.StringType) {
                    logger.info(structField.name() + "不符合Long格式的行将被转换为空");
                    data = data.withColumn(structField.name(), data.col(structField.name()).cast(DataTypes.LongType));
                }
            }
        }
        if(Consts.STRING_TO_DOUBLE.equals(t.getType())) {
            for(StructField structField : fields) {
                if(structField.dataType() == DataTypes.StringType) {
//                    注释部分为判断整列是否全部符合Double格式
//                    Row[] rows = (Row[]) data.select(structField.name()).collect();
//                    boolean flag = SharedMethods.checkColumnFormat(rows, "\\d+\\.?\\d+");
                    logger.info(structField.name() + "不符合Double格式的行将被转换为空");
                    data = data.withColumn(structField.name(), data.col(structField.name()).cast(DataTypes.DoubleType));
                }
            }
        }
        if(Consts.INT_TO_DOUBLE.equals(t.getType())) {
            for(StructField structField : fields) {
                if(structField.dataType() == DataTypes.IntegerType) {
                    data = data.withColumn(structField.name(), data.col(structField.name()).cast(DataTypes.DoubleType));
                }
            }
        }
        if(Consts.LONG_TO_DOUBLE.equals(t.getType())) {
            for(StructField structField : fields) {
                if(structField.dataType() == DataTypes.LongType) {
                    data = data.withColumn(structField.name(), data.col(structField.name()).cast(DataTypes.DoubleType));
                }
            }
        }
        return data;
    }




}
