package com.datastax.data.prepare.spark.dataset;

import com.alibaba.fastjson.JSONArray;
import com.datastax.insight.annonation.InsightComponent;
import com.datastax.insight.annonation.InsightComponentArg;
import com.datastax.insight.core.driver.SparkContextBuilder;
import com.datastax.insight.spec.Operator;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.types.StructType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/datastax/data/prepare/spark/dataset/DataSampleOperator.class */
public class DataSampleOperator implements Operator {
    private static final Logger logger = LoggerFactory.getLogger(DataSampleOperator.class);

    @InsightComponent(name = "数据采样", description = "数据采样", order = 500304)
    public static <T> Dataset<T> dataSample(@InsightComponentArg(externalInput = true, name = "data", description = "数据集") Dataset<T> dataset, @InsightComponentArg(name = "withReplacement", description = "是否放回抽样") boolean z, @InsightComponentArg(name = "sampleType", description = "Sample类型", items = "absolute,relative,probability", defaultValue = "absolute") String str, @InsightComponentArg(name = "editList", description = "配置列表") String str2) throws IOException {
        Dataset<T> a;
        if (dataset == null) {
            logger.info("dataset is empty!");
            return null;
        }
        StructType schema = dataset.schema();
        List<List> a2 = a(str2);
        boolean z2 = -1;
        switch (str.hashCode()) {
            case -1290561483:
                if (str.equals("probability")) {
                    z2 = 2;
                    break;
                }
                break;
            case -554435892:
                if (str.equals("relative")) {
                    z2 = true;
                    break;
                }
                break;
            case 1728122231:
                if (str.equals("absolute")) {
                    z2 = false;
                    break;
                }
                break;
        }
        switch (z2) {
            case false:
                a = a(dataset, z, schema, a2);
                break;
            case true:
                a = b(dataset, z, schema, a2);
                break;
            case true:
                a = a(dataset, z, a2);
                break;
            default:
                return dataset;
        }
        return a;
    }

    private static List<List> a(String str) throws IOException {
        ArrayList arrayList = new ArrayList();
        for (Map map : (List) new ObjectMapper().readValue(str, List.class)) {
            ArrayList arrayList2 = new ArrayList();
            arrayList2.add(map.get("selector"));
            arrayList2.add(map.get("selectorValue"));
            arrayList.add(arrayList2);
        }
        return arrayList;
    }

    private static <T> Dataset<T> a(Dataset<T> dataset, boolean z, StructType structType, List<List> list) {
        ArrayList arrayList = new ArrayList();
        if (!list.get(0).get(0).equals("balance")) {
            if (Double.parseDouble(list.get(0).get(1).toString()) % 1.0d != 0.0d) {
                logger.info("absoluteSample num must be int type!");
                return dataset;
            }
            int parseInt = Integer.parseInt(list.get(0).get(1).toString());
            if (parseInt > 0) {
                arrayList.addAll(dataset.javaRDD().takeSample(z, parseInt));
                return SparkContextBuilder.getSession().createDataFrame(arrayList, structType);
            }
            logger.info("absoluteSample num is less than 0!");
            return dataset;
        }
        List<List> subList = list.subList(1, list.size());
        for (int i = 0; i < subList.size(); i++) {
            if (Double.parseDouble(subList.get(i).get(1).toString()) % 1.0d != 0.0d) {
                logger.info("absoluteSample num must be int type!");
                return dataset;
            }
            int parseInt2 = Integer.parseInt(subList.get(i).get(1).toString());
            Dataset filter = dataset.filter(subList.get(i).get(0).toString());
            if (parseInt2 <= 0) {
                logger.info("absoluteSample num is less than 0!");
                return dataset;
            }
            arrayList.addAll(filter.javaRDD().takeSample(z, parseInt2));
        }
        return SparkContextBuilder.getSession().createDataFrame(arrayList, structType);
    }

    private static <T> Dataset<T> b(Dataset<T> dataset, boolean z, StructType structType, List<List> list) {
        ArrayList arrayList = new ArrayList();
        if (!list.get(0).get(0).equals("balance")) {
            int parseDouble = (int) (Double.parseDouble(list.get(0).get(1).toString()) * dataset.collectAsList().size());
            if (parseDouble > 0) {
                arrayList.addAll(dataset.javaRDD().takeSample(z, parseDouble));
                return SparkContextBuilder.getSession().createDataFrame(arrayList, structType);
            }
            logger.info("relativeSample ratio is less than 0!");
            return dataset;
        }
        for (List list2 : list.subList(1, list.size())) {
            Dataset filter = dataset.filter(list2.get(0).toString());
            int parseDouble2 = (int) (Double.parseDouble(list2.get(1).toString()) * filter.collectAsList().size());
            if (parseDouble2 <= 0) {
                logger.info("relativeSample ratio is less than 0!");
                return dataset;
            }
            arrayList.addAll(filter.javaRDD().takeSample(z, parseDouble2));
        }
        return SparkContextBuilder.getSession().createDataFrame(arrayList, structType);
    }

    private static <T> Dataset<T> a(Dataset<T> dataset, boolean z, List<List> list) {
        Dataset<T> dataset2 = null;
        if (!list.get(0).get(0).equals("balance")) {
            double parseDouble = Double.parseDouble(list.get(0).get(1).toString());
            if (parseDouble > 0.0d && parseDouble < 1.0d) {
                return dataset.sample(z, parseDouble);
            }
            logger.info("probabilitySample probability is out of range!");
            return dataset;
        }
        List<List> subList = list.subList(1, list.size());
        int i = 0;
        while (i < subList.size()) {
            double parseDouble2 = Double.parseDouble(subList.get(i).get(1).toString());
            if (parseDouble2 <= 0.0d || parseDouble2 >= 1.0d) {
                logger.info("probabilitySample probability is out of range!");
                return dataset;
            }
            dataset2 = i == 0 ? dataset.filter(subList.get(i).get(0).toString()).sample(z, parseDouble2) : dataset2.union(dataset.filter(subList.get(i).get(0).toString()).sample(z, parseDouble2));
            i++;
        }
        return dataset2;
    }

    @InsightComponent(name = "数据采样", description = "数据采样", order = 500304)
    public static <T> Dataset<T> dataSample(@InsightComponentArg(externalInput = true, name = "data", description = "数据集") Dataset<T> dataset, @InsightComponentArg(name = "withReplacement", description = "是否放回抽样") boolean z, @InsightComponentArg(name = "sampleType", description = "Sample类型", items = "absolute,relative,probability", defaultValue = "absolute") String str, @InsightComponentArg(name = "propertylist", description = "参数列表") JSONArray jSONArray) throws IOException {
        return dataSample(dataset, z, str, jSONArray.toString());
    }
}
