/*
 * Decompiled with CFR 0.152.
 */
package com.datastax.data.prepare.spark.dataset;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.datastax.data.prepare.spark.dataset.params.d;
import com.datastax.data.prepare.util.c;
import com.datastax.insight.annonation.InsightComponent;
import com.datastax.insight.annonation.InsightComponentArg;
import com.datastax.insight.core.driver.SparkContextBuilder;
import com.datastax.insight.spec.Operator;
import com.google.common.base.Strings;
import java.text.DecimalFormat;
import java.util.ArrayList;
import org.apache.spark.ml.feature.Bucketizer;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.functions;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class DataBinningOperator
implements Operator {
    private static final Logger logger = LoggerFactory.getLogger(DataBinningOperator.class);

    static <T> Dataset<T> a(Dataset<T> data, d ... dataBinnings) {
        if (data.count() == 0L || dataBinnings.length == 0) {
            logger.info("Detail parameter of DataBinning is empty or Dataset is empty");
            return data;
        }
        StructField[] recordFields = new StructField[data.schema().fields().length];
        for (d dataBinning : dataBinnings) {
            StructField[] fields;
            if (dataBinning == null || (fields = c.a(data, dataBinning.getAttributeSelector(), dataBinning.b(), dataBinning.getAttribute(), dataBinning.getRegularExpression(), dataBinning.getValueType())) == null) continue;
            data = DataBinningOperator.a(data, dataBinning, c.a(recordFields, fields));
        }
        return data;
    }

    static <T> Dataset<T> a(Dataset<T> data, String json) {
        if (json == null || "".equals(json)) {
            return data;
        }
        JSONArray array = JSON.parseArray(json);
        JSONObject object = array.getJSONObject(0);
        if ("\u5927\u5c0f\u79bb\u6563\u5316".equals(object.getString("method")) || "\u9891\u7387\u79bb\u6563\u5316".equals(object.getString("method"))) {
            return DataBinningOperator.basicDiscretize(data, array);
        }
        if ("\u5206\u7ea7\u79bb\u6563\u5316".equals(object.getString("method"))) {
            return DataBinningOperator.binDiscretize(data, array);
        }
        if ("\u81ea\u5b9a\u4e49\u79bb\u6563\u5316".equals(object.getString("method"))) {
            return DataBinningOperator.userDefineDiscretize(data, array);
        }
        return data;
    }

    @InsightComponent(name="\u57fa\u672c\u79bb\u6563\u5316", type="com.datastax.insight.dataprprocess.basicDiscretize", description="\u6570\u636e\u79bb\u6563\u5316", order=500901)
    public static <T> Dataset<T> basicDiscretize(@InsightComponentArg(externalInput=true, name="data", description="\u5f85\u5206\u7bb1\u7684\u6570\u636e\u96c6") Dataset<T> data, @InsightComponentArg(name="\u53c2\u6570", description="\u6570\u636e\u79bb\u6563\u7684json\u53c2\u6570") JSONArray array) {
        if (array.isEmpty()) {
            return data;
        }
        d[] dataBinnings = new d[array.size()];
        int sign = 0;
        for (int i2 = 0; i2 < array.size(); ++i2) {
            JSONObject object = array.getJSONObject(i2);
            d.a builder = new d.a();
            builder.a(object.getString("selector"));
            if ("Attribute Name".equals(object.getString("selector"))) {
                builder.b(object.getString("selectorValue"));
            }
            if ("Regular Expression".equals(object.getString("selector"))) {
                builder.c(object.getString("selectorValue"));
            }
            if ("Value Type".equals(object.getString("selector"))) {
                builder.d(object.getString("selectorValue"));
            }
            builder.e(object.getString("method"));
            if (!Strings.isNullOrEmpty((String)object.getString("methodValue"))) {
                builder.a(Integer.parseInt(object.getString("methodValue")));
            }
            dataBinnings[sign++] = builder.a();
        }
        return DataBinningOperator.a(data, dataBinnings);
    }

    @InsightComponent(name="\u5206\u7ea7\u79bb\u6563\u5316", type="com.datastax.insight.dataprprocess.binDiscretize", description="\u6570\u636e\u79bb\u6563\u5316", order=500902)
    public static <T> Dataset<T> binDiscretize(@InsightComponentArg(externalInput=true, name="data", description="\u5f85\u5206\u7bb1\u7684\u6570\u636e\u96c6") Dataset<T> data, @InsightComponentArg(name="\u53c2\u6570", description="\u6570\u636e\u79bb\u6563\u7684json\u53c2\u6570") JSONArray array) {
        if (array.isEmpty()) {
            return data;
        }
        d[] dataBinnings = new d[array.size()];
        int sign = 0;
        for (int i2 = 0; i2 < array.size(); ++i2) {
            JSONObject object = array.getJSONObject(i2);
            d.a builder = new d.a();
            builder.a(object.getString("selector"));
            if ("Attribute Name".equals(object.getString("selector"))) {
                builder.b(object.getString("selectorValue"));
            }
            if ("Regular Expression".equals(object.getString("selector"))) {
                builder.c(object.getString("selectorValue"));
            }
            if ("Value Type".equals(object.getString("selector"))) {
                builder.d(object.getString("selectorValue"));
            }
            builder.e(object.getString("method"));
            if (!Strings.isNullOrEmpty((String)object.get("methodValue").toString())) {
                builder.a(Integer.parseInt(object.get("methodValue").toString()));
            }
            if ("true".equals(object.getString("selectMethodValue"))) {
                builder.b(true);
                JSONObject temp = object.getJSONObject("optionValue");
                if (!Strings.isNullOrEmpty((String)temp.getString("minValue"))) {
                    builder.a(Double.parseDouble(temp.getString("minValue")));
                }
                if (!Strings.isNullOrEmpty((String)temp.getString("maxValue"))) {
                    builder.b(Double.parseDouble(temp.getString("maxValue")));
                }
            }
            dataBinnings[sign++] = builder.a();
        }
        return DataBinningOperator.a(data, dataBinnings);
    }

    @InsightComponent(name="\u81ea\u5b9a\u4e49\u79bb\u6563\u5316", type="com.datastax.insight.dataprprocess.userDefineDiscretize", description="\u6570\u636e\u79bb\u6563\u5316", order=500903)
    public static <T> Dataset<T> userDefineDiscretize(@InsightComponentArg(externalInput=true, name="data", description="\u5f85\u5206\u7bb1\u7684\u6570\u636e\u96c6") Dataset<T> data, @InsightComponentArg(name="\u53c2\u6570", description="\u6570\u636e\u79bb\u6563\u7684json\u53c2\u6570") JSONArray array) {
        if (array.isEmpty()) {
            return data;
        }
        d[] dataBinnings = new d[array.size()];
        int sign = 0;
        for (int i2 = 0; i2 < array.size(); ++i2) {
            JSONObject object = array.getJSONObject(i2);
            d.a builder = new d.a();
            builder.a(object.getString("selector"));
            if ("Attribute Name".equals(object.getString("selector"))) {
                builder.b(object.getString("selectorValue"));
            }
            if ("Regular Expression".equals(object.getString("selector"))) {
                builder.c(object.getString("selectorValue"));
            }
            if ("Value Type".equals(object.getString("selector"))) {
                builder.d(object.getString("selectorValue"));
            }
            builder.e(object.getString("method"));
            JSONArray temp = object.getJSONArray("optionValue");
            String[] classNames = new String[temp.size()];
            String[] upperLimits = new String[temp.size()];
            int position = 0;
            for (int j2 = 0; j2 < temp.size(); ++j2) {
                JSONObject param = temp.getJSONObject(j2);
                classNames[position] = param.getString("className");
                upperLimits[position++] = param.getString("upperLimit");
            }
            builder.a(classNames, upperLimits);
            dataBinnings[sign++] = builder.a();
        }
        return DataBinningOperator.a(data, dataBinnings);
    }

    private static <T> Dataset<T> a(Dataset<T> data, d dataBinning, StructField[] fields) {
        if (fields == null) {
            logger.info("\u6ca1\u6709\u5c5e\u6027\u88ab\u9009\u4e2d\uff0c\u8fd4\u56de\u539f\u6570\u636e\u96c6");
            return data;
        }
        if (dataBinning.getBinningType() == null || "".equals(dataBinning.getBinningType())) {
            logger.info("\u6570\u636e\u79bb\u6563\u7c7b\u578b\u4e3a\u7a7a\uff0c\u8fd4\u56de\u539f\u6570\u636e\u96c6");
            return data;
        }
        if ("\u5927\u5c0f\u79bb\u6563\u5316".equals(dataBinning.getBinningType())) {
            if (dataBinning.getBinSize() < 1 || (long)dataBinning.getBinSize() >= data.count()) {
                logger.info("\u5927\u5c0f\u79bb\u6563\u5316\u7684binSize\u5c0f\u4e8e1\u6216\u8005\u5927\u4e8e\u6570\u636e\u96c6\u7684\u884c\u6570\uff0c\u8fd4\u56de\u539f\u6570\u636e");
                return data;
            }
            for (StructField field : fields) {
                if (!c.b(field)) continue;
                data = DataBinningOperator.a(data, field, dataBinning);
            }
        }
        if ("\u5206\u7ea7\u79bb\u6563\u5316".equals(dataBinning.getBinningType())) {
            if (dataBinning.getBinSize() < 1) {
                logger.info("\u5206\u7ea7\u79bb\u6563\u5316\u7684binSize\u5c0f\u4e8e1\uff0c\u8fd4\u56de\u539f\u6570\u636e");
                return data;
            }
            for (StructField field : fields) {
                if (!c.b(field)) continue;
                data = DataBinningOperator.b(data, field, dataBinning);
            }
        }
        if ("\u9891\u7387\u79bb\u6563\u5316".equals(dataBinning.getBinningType())) {
            if (dataBinning.getBinSize() < 1 || (long)dataBinning.getBinSize() >= data.count()) {
                logger.info("\u9891\u7387\u79bb\u6563\u5316\u7684binSize\u5c0f\u4e8e1\u6216\u8005\u5927\u4e8e\u6570\u636e\u96c6\u7684\u884c\u6570,\u8fd4\u56de\u539f\u6570\u636e\u96c6");
                return data;
            }
            for (StructField field : fields) {
                if (!c.b(field)) continue;
                data = DataBinningOperator.c(data, field, dataBinning);
            }
        }
        if ("\u81ea\u5b9a\u4e49\u79bb\u6563\u5316".equals(dataBinning.getBinningType())) {
            for (StructField field : fields) {
                if (!c.b(field)) continue;
                data = DataBinningOperator.d(data, field, dataBinning);
            }
        }
        if ("\u6700\u5c0f\u71b5\u79bb\u6563\u5316".equals(dataBinning.getBinningType())) {
            for (StructField field : fields) {
                if (!c.b(field)) continue;
                data = DataBinningOperator.a(data, field);
            }
        }
        return data;
    }

    private static <T> Dataset<T> a(Dataset<T> data, StructField field, d dataBinning) {
        int binSize = dataBinning.getBinSize();
        Row[] rows = (Row[])data.sort(field.name(), new String[0]).select(field.name(), new String[0]).collect();
        double[] columnData = DataBinningOperator.a(rows);
        if (columnData == null) {
            logger.info(field.name() + "\u5217\u5168\u90e8\u4e3a\u7a7a\uff0c\u8fd4\u56de\u539f\u6570\u636e\u96c6");
            return data;
        }
        DecimalFormat format = new DecimalFormat("#.000");
        int mod = (int)data.count() % binSize;
        int size = (int)data.count() / binSize + (mod == 0 ? 0 : 1) + 2;
        int position = mod == 0 ? binSize - 1 : mod - 1;
        int preposition = -1;
        int i2 = 0;
        int point = 0;
        double[] doubles = new double[size];
        doubles[point] = Double.NEGATIVE_INFINITY;
        while (columnData[i2] == columnData[i2 + 1]) {
            ++i2;
        }
        if (i2 >= mod) {
            doubles[++point] = Double.parseDouble(format.format((columnData[i2] + columnData[i2 + 1]) / 2.0));
            position = i2 + binSize - (i2 + 1 - mod) % binSize;
        }
        while (position < columnData.length - 1) {
            i2 = position;
            while (columnData[i2] == columnData[position + 1] && --i2 != preposition) {
            }
            if (i2 == preposition) {
                preposition = position;
                position += binSize;
                continue;
            }
            double temp = Double.parseDouble(format.format((columnData[i2] + columnData[i2 + 1]) / 2.0));
            if (temp != doubles[point]) {
                doubles[++point] = temp;
            }
            preposition = position;
            position += binSize;
        }
        if (point == 0) {
            logger.info("\u6570\u636e\u96c6\u7684" + field.name() + "\u5217\u5168\u90e8\u76f8\u540c\uff0c\u8fd4\u56de\u539f\u6570\u636e\u96c6");
            return data;
        }
        doubles[++point] = Double.POSITIVE_INFINITY;
        return DataBinningOperator.a(data, DataBinningOperator.a(doubles, point), field);
    }

    private static <T> Dataset<T> b(Dataset<T> data, StructField field, d dataBinning) {
        double max;
        Row[] rows = (Row[])data.select(new Column[]{functions.min((String)field.name()), functions.max((String)field.name())}).collect();
        if (rows[0].get(0) == null && rows[0].get(1) == null) {
            logger.info(field.name() + "\u5217\u7684\u6700\u5927\u503c\u6700\u5c0f\u503c\u90fd\u4e3a\u7a7a\uff0c\u8868\u793a\u8be5\u5217\u4e3a\u7a7a\u5217\uff0c\u8fd4\u56de\u539f\u6570\u636e\u96c6");
            return data;
        }
        double min = Double.parseDouble(rows[0].get(0).toString());
        if (min == (max = Double.parseDouble(rows[0].get(1).toString()))) {
            logger.info(field.name() + "\u5217\u6700\u5927\u503c\u548c\u6700\u5c0f\u503c\u76f8\u7b49\uff0c\u8fd4\u56de\u4e00\u4e2abucket");
            return DataBinningOperator.a(data, new double[]{Double.NEGATIVE_INFINITY, min, Double.POSITIVE_INFINITY}, field);
        }
        if (dataBinning.c()) {
            if (dataBinning.getMinValue() >= dataBinning.getMaxValue()) {
                logger.info("\u5206\u7ea7\u79bb\u6563\u5316\u7528\u6237\u81ea\u5b9a\u7684\u8fb9\u754c\u7684\u6700\u5c0f\u503c\u5927\u4e8e\u6216\u7b49\u4e8e\u6700\u5927\u503c\uff0c\u8fb9\u754c\u5e94\u5728[" + min + ", " + max + "]\u8303\u56f4\u5185\uff0c\u8fd4\u56de\u539f\u6570\u636e\u96c6");
                return data;
            }
            if (dataBinning.getMaxValue() < min) {
                logger.info("\u5206\u7ea7\u79bb\u6563\u5316\u7528\u6237\u81ea\u5b9a\u7684\u8fb9\u754c\u7684\u6700\u5927\u503c\u5c0f\u4e8e\u6570\u636e\u96c6\u6700\u5c0f\u503c\uff0c\u8fb9\u754c\u5e94\u5728[" + min + ", " + max + "]\u8303\u56f4\u5185\uff0c\u8fd4\u56de\u539f\u6570\u636e\u96c6");
                return data;
            }
            if (dataBinning.getMinValue() > max) {
                logger.info("\u5206\u7ea7\u79bb\u6563\u5316\u7528\u6237\u81ea\u5b9a\u7684\u8fb9\u754c\u7684\u6700\u5c0f\u503c\u5927\u4e8e\u6570\u636e\u96c6\u6700\u5927\u503c\uff0c\u8fb9\u754c\u5e94\u5728[" + min + ", " + max + "]\u8303\u56f4\u5185\uff0c\u8fd4\u56de\u539f\u6570\u636e\u96c6");
                return data;
            }
            if (dataBinning.getMinValue() > min) {
                min = dataBinning.getMinValue();
            }
            if (dataBinning.getMaxValue() < max) {
                max = dataBinning.getMaxValue();
            }
        }
        double[] doubles = new double[dataBinning.getBinSize() + 3];
        int position = 0;
        doubles[position] = Double.NEGATIVE_INFINITY;
        doubles[++position] = min;
        if (dataBinning.getBinSize() != 1) {
            double interval = (max - min) / (double)dataBinning.getBinSize();
            for (int i2 = 1; i2 < dataBinning.getBinSize(); ++i2) {
                doubles[++position] = min + (double)i2 * interval;
            }
        }
        doubles[++position] = max;
        doubles[++position] = Double.POSITIVE_INFINITY;
        return DataBinningOperator.a(data, doubles, field);
    }

    private static <T> Dataset<T> c(Dataset<T> data, StructField field, d dataBinning) {
        int position;
        Row[] rows = (Row[])data.dropDuplicates(field.name(), new String[0]).sort(field.name(), new String[0]).select(field.name(), new String[0]).collect();
        double[] columnData = DataBinningOperator.a(rows);
        if (columnData == null) {
            logger.info(field.name() + "\u5217\u5168\u90e8\u4e3a\u7a7a\uff0c\u8fd4\u56de\u539f\u6570\u636e\u96c6");
            return data;
        }
        if (dataBinning.getBinSize() >= columnData.length) {
            logger.info("\u9891\u7387\u79bb\u6563\u5316\u7684binSize\u5927\u4e8e\u6216\u7b49\u4e8e" + field.name() + "\u5217\u53bb\u91cd\u548c\u53bb\u7a7a\u4e4b\u540e\u7684\u957f\u5ea6,\u8fd4\u56de\u8be5\u5217\u5185\u5bb9\u4e3a0.0");
            return DataBinningOperator.a(data, new double[]{Double.NEGATIVE_INFINITY, columnData[columnData.length - 1] + 1.0, Double.POSITIVE_INFINITY}, field);
        }
        int interval = position = columnData.length / dataBinning.getBinSize();
        int mod = columnData.length % dataBinning.getBinSize();
        double[] doubles = new double[dataBinning.getBinSize() + 2];
        int i2 = 0;
        int j2 = 1;
        doubles[i2] = Double.NEGATIVE_INFINITY;
        while (true) {
            if (j2 != 1) {
                if (mod != 0) {
                    position = position + interval + 1;
                    --mod;
                } else {
                    position += interval;
                }
            }
            if (position >= columnData.length) break;
            doubles[++i2] = (columnData[position] + columnData[position - 1]) / 2.0;
            ++j2;
        }
        doubles[++i2] = Double.POSITIVE_INFINITY;
        return DataBinningOperator.a(data, DataBinningOperator.a(doubles, i2), field);
    }

    private static <T> Dataset<T> d(Dataset<T> data, StructField field, d dataBinning) {
        if (dataBinning.getUpperLimits() == null || dataBinning.getClassNames() == null) {
            logger.info("\u7528\u6237\u81ea\u5b9a\u79bb\u6563\u5316\u53c2\u6570\u4e3a\u7a7a\uff0c\u8fd4\u56de\u539f\u6570\u636e\u96c6");
            return data;
        }
        return DataBinningOperator.a(data, dataBinning.getUpperLimits(), field, dataBinning.getClassNames());
    }

    private static <T> Dataset<T> a(Dataset<T> data, StructField field) {
        return data;
    }

    private static double[] a(Row[] rows) {
        int count = 0;
        while (rows[count].get(0) == null) {
            ++count;
        }
        if (rows.length + 1 == count) {
            return null;
        }
        double[] doubles = new double[rows.length - count];
        int j2 = 0;
        for (int i2 = 0; i2 < rows.length; ++i2) {
            if (rows[i2].get(0) == null) continue;
            doubles[j2++] = Double.parseDouble(rows[i2].get(0).toString());
        }
        return doubles;
    }

    private static double[] a(double[] doubles, int realLength) {
        double[] result = new double[realLength + 1];
        System.arraycopy(doubles, 0, result, 0, realLength + 1);
        return result;
    }

    private static <T> Dataset<T> a(Dataset<T> data, double[] doubles, StructField field) {
        if (DataBinningOperator.a(doubles)) {
            logger.info("bucketizer\u8303\u56f4\u4e3a[-Infinity, Infinity]");
            return data;
        }
        String bucketedName = "bucketed-" + field.name();
        Bucketizer bucketizer = new Bucketizer().setInputCol(field.name()).setOutputCol(bucketedName).setSplits(doubles);
        return bucketizer.transform(data).withColumn(field.name(), functions.col((String)bucketedName)).drop(bucketedName);
    }

    private static <T> Dataset<T> a(Dataset<T> data, double[] doubles, StructField field, String[] strings) {
        if (DataBinningOperator.a(doubles)) {
            logger.info("bucketizer\u8303\u56f4\u4e3a[-Infinity, Infinity]");
            return data;
        }
        data = DataBinningOperator.a(data, doubles, field);
        String id = "auto_increasing_id";
        String joinColumn = "join_column_for_type_change";
        SparkSession session = SparkContextBuilder.getSession();
        Row[] temp = (Row[])data.select(field.name(), new String[0]).collect();
        ArrayList<Row> result = new ArrayList<Row>(temp.length);
        for (int i2 = 0; i2 < temp.length; ++i2) {
            int position = Integer.parseInt(temp[i2].get(0).toString().substring(0, 1)) + 1;
            result.add(RowFactory.create((Object[])new Object[]{i2, strings[position]}));
        }
        StructType type = DataTypes.createStructType((StructField[])new StructField[]{DataTypes.createStructField((String)id, (DataType)DataTypes.IntegerType, (boolean)true), DataTypes.createStructField((String)joinColumn, (DataType)DataTypes.StringType, (boolean)true)});
        Dataset data1 = session.createDataFrame(result, type);
        return data.withColumn(id, functions.monotonically_increasing_id()).join(data1, id).withColumn(field.name(), functions.col((String)joinColumn)).drop(new String[]{id, joinColumn});
    }

    private static boolean a(double[] doubles) {
        return doubles[0] == Double.NEGATIVE_INFINITY && doubles[1] == Double.POSITIVE_INFINITY;
    }
}

