package com.datastax.data.prepare.spark.dataset;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.datastax.data.prepare.util.Consts;
import com.datastax.insight.annonation.InsightComponent;
import com.datastax.insight.annonation.InsightComponentArg;
import com.datastax.insight.spec.Operator;
import java.util.HashMap;
import java.util.List;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.functions;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructField;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/datastax/data/prepare/spark/dataset/FillDataOperator.class */
public class FillDataOperator implements Operator {
    private static final Logger logger = LoggerFactory.getLogger(FillDataOperator.class);

    static <T> Dataset<T> a(Dataset<T> dataset, com.datastax.data.prepare.spark.dataset.params.f... fVarArr) {
        if (fVarArr.length == 0 || dataset.count() == 0) {
            logger.info("Detail parameter of Vacancy is empty or Dataset is empty");
            return dataset;
        }
        for (com.datastax.data.prepare.spark.dataset.params.f fVar : fVarArr) {
            dataset = a(dataset, fVar, com.datastax.data.prepare.util.c.a(dataset, fVar.getAttributeSelector(), fVar.b(), fVar.getAttribute(), fVar.getRegularExpression(), fVar.getValueType()));
        }
        return dataset;
    }

    protected static <T> Dataset<T> a(Dataset<T> dataset, List<com.datastax.data.prepare.spark.dataset.params.f> list) {
        return a(dataset, (com.datastax.data.prepare.spark.dataset.params.f[]) list.toArray(new com.datastax.data.prepare.spark.dataset.params.f[list.size()]));
    }

    protected static <T> Dataset<T> b(Dataset<T> dataset, String str) {
        return (str == null || "".equals(str)) ? dataset : imputeMissingValues(dataset, JSON.parseArray(str));
    }

    @InsightComponent(name = "空缺值处理", type = "com.datastax.insight.dataprprocess.imputeMissingValues", description = "空缺值处理", order = 500701)
    public static <T> Dataset<T> imputeMissingValues(@InsightComponentArg(externalInput = true, name = "data", description = "数据集") Dataset<T> dataset, @InsightComponentArg(name = "参数", description = "填充空缺值详细参数") JSONArray jSONArray) {
        if (jSONArray.isEmpty()) {
            return dataset;
        }
        com.datastax.data.prepare.spark.dataset.params.f fVar = new com.datastax.data.prepare.spark.dataset.params.f();
        for (int i = 0; i < jSONArray.size(); i++) {
            JSONObject jSONObject = jSONArray.getJSONObject(i);
            fVar.setAttributeSelector(jSONObject.getString("selector"));
            if (Consts.ATTRIBUTE_NAME.equals(jSONObject.getString("selector"))) {
                fVar.setAttribute(jSONObject.getString("selectorValue"));
            }
            if (Consts.REGULAR_EXPRESSION.equals(jSONObject.getString("selector"))) {
                fVar.setRegularExpression(jSONObject.getString("selectorValue"));
            }
            if (Consts.VALUE_TYPE.equals(jSONObject.getString("selector"))) {
                fVar.setValueType(jSONObject.getString("selectorValue"));
            }
            fVar.setFillData(jSONObject.getString("method"));
            if (Consts.VALUE.equals(jSONObject.getString("method"))) {
                fVar.setFillDataValue(jSONObject.getString("methodValue"));
            }
            dataset = a(dataset, fVar);
        }
        return dataset;
    }

    private static <T> Dataset<T> a(Dataset<T> dataset, com.datastax.data.prepare.spark.dataset.params.f fVar, StructField[] structFieldArr) {
        if (structFieldArr == null) {
            return dataset;
        }
        HashMap hashMap = new HashMap();
        if (Consts.DROPEMPTY.equals(fVar.getFillData())) {
            String[] strArr = new String[structFieldArr.length];
            for (int i = 0; i < structFieldArr.length; i++) {
                strArr[i] = structFieldArr[i].name();
            }
            return dataset.na().drop(strArr);
        }
        if (Consts.MINIMUM.equals(fVar.getFillData())) {
            for (StructField structField : structFieldArr) {
                if ((structField.dataType() != DataTypes.CalendarIntervalType) & (structField.dataType() != DataTypes.NullType)) {
                    Row[] rowArr = (Row[]) dataset.agg(functions.min(dataset.col(structField.name())), new Column[0]).collect();
                    if (rowArr[0].get(0) != null) {
                        hashMap.put(structField.name(), rowArr[0].get(0).toString());
                    }
                }
            }
        }
        if (Consts.MAXIMUM.equals(fVar.getFillData())) {
            for (StructField structField2 : structFieldArr) {
                if ((structField2.dataType() != DataTypes.CalendarIntervalType) & (structField2.dataType() != DataTypes.NullType)) {
                    Row[] rowArr2 = (Row[]) dataset.agg(functions.max(dataset.col(structField2.name())), new Column[0]).collect();
                    if (rowArr2[0].get(0) != null) {
                        hashMap.put(structField2.name(), rowArr2[0].get(0).toString());
                    }
                }
            }
        }
        if (Consts.ZERO.equals(fVar.getFillData())) {
            for (StructField structField3 : structFieldArr) {
                if ((structField3.dataType() != DataTypes.NullType) & (structField3.dataType() != DataTypes.CalendarIntervalType)) {
                    hashMap.put(structField3.name(), "0");
                }
            }
        }
        if (Consts.VALUE.equals(fVar.getFillData())) {
            for (StructField structField4 : structFieldArr) {
                if (structField4.dataType() != DataTypes.NullType && fVar.getFillDataValue() != null && !"".equals(fVar.getFillDataValue())) {
                    hashMap.put(structField4.name(), fVar.getFillDataValue());
                }
            }
        }
        if (Consts.AVERAGE.equals(fVar.getFillData())) {
            for (StructField structField5 : structFieldArr) {
                if ((structField5.dataType() != DataTypes.CalendarIntervalType) & (structField5.dataType() != DataTypes.DateType) & (structField5.dataType() != DataTypes.TimestampType) & (structField5.dataType() != DataTypes.BooleanType) & (structField5.dataType() != DataTypes.StringType) & (structField5.dataType() != DataTypes.NullType) & (structField5.dataType() != DataTypes.BinaryType)) {
                    Row[] rowArr3 = (Row[]) dataset.agg(functions.avg(dataset.col(structField5.name())), new Column[0]).collect();
                    if (rowArr3[0].get(0) != null) {
                        hashMap.put(structField5.name(), rowArr3[0].get(0).toString());
                    }
                }
            }
        }
        if (!hashMap.isEmpty()) {
            dataset = dataset.na().fill(hashMap);
        }
        hashMap.clear();
        return dataset;
    }
}
