package ai.databand.parameters;

import ai.databand.DbndPropertyNames;
import ai.databand.log.HistogramRequest;
import ai.databand.schema.histograms.ColumnSummary;
import ai.databand.schema.histograms.NumericSummary;
import ai.databand.schema.histograms.Summary;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.functions;
import org.apache.spark.sql.types.BooleanType;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.FractionalType;
import org.apache.spark.sql.types.IntegralType;
import org.apache.spark.sql.types.NumericType;
import org.apache.spark.sql.types.StringType;
import org.apache.spark.sql.types.StructField;
import scala.collection.Iterable;
import scala.collection.JavaConverters;
import scala.collection.Seq;

/* loaded from: input_file:ai/databand/parameters/Histogram.class */
public class Histogram {
    private static final int MAX_NUMERIC_BUCKETS_COUNT = 20;
    private static final int MAX_CATEGORICAL_BUCKETS_COUNT = 50;
    private final String dfKey;
    private final Dataset<?> dataset;
    private final HistogramRequest req;
    private final Map<String, Object> result = new HashMap(1);
    private final Map<String, Summary> summaries = new HashMap(1);

    public Histogram(String str, Dataset<?> dataset, HistogramRequest histogramRequest) {
        this.dfKey = str;
        this.dataset = dataset.alias(String.format("%s_%s", DbndPropertyNames.DBND_INTERNAL_ALIAS, "HISTOGRAM"));
        this.req = histogramRequest;
    }

    protected <T> Seq<T> seq(List<T> list) {
        return ((Iterable) JavaConverters.collectionAsScalaIterableConverter(list).asScala()).toSeq();
    }

    public Map<String, Object> metricValues() {
        this.result.put(String.format("%s.stats", this.dfKey), summary());
        if (this.req.isEnabled() && !this.req.isOnlyStats()) {
            HashMap hashMap = new HashMap(1);
            if (this.req.isIncludeAllNumeric()) {
                hashMap.putAll(numericHistograms());
            }
            if (this.req.isIncludeAllString()) {
                hashMap.putAll(categoricalHistograms(StringType.class));
            }
            if (this.req.isIncludeAllBoolean()) {
                hashMap.putAll(categoricalHistograms(BooleanType.class));
            }
            this.result.put(String.format("%s.histograms", this.dfKey), hashMap);
        }
        return this.result;
    }

    public Map<String, Map<String, Object>> summary() {
        Dataset summary = this.dataset.summary(new String[0]);
        HashMap hashMap = new HashMap();
        for (int i = 0; i < summary.columns().length; i++) {
            hashMap.put(summary.columns()[i], Integer.valueOf(i));
        }
        List<Row> collectAsList = summary.collectAsList();
        HashMap hashMap2 = new HashMap(1);
        for (Row row : collectAsList) {
            hashMap2.put(row.get(0).toString(), row);
        }
        ArrayList arrayList = new ArrayList(1);
        for (StructField structField : this.dataset.schema().fields()) {
            if (isSimpleType(structField.dataType()) && !this.req.isExcluded(structField.name())) {
                Column col = functions.col(structField.name());
                arrayList.add(String.format("count(DISTINCT `%s`) AS `%s_%s`", structField.name(), structField.name(), "distinct"));
                arrayList.add(functions.count(col).alias(String.format("%s_%s", structField.name(), "non-null")).toString());
                arrayList.add(functions.count(functions.when(col.isNull(), 1)).alias(String.format("%s_%s", structField.name(), "count_null")).toString());
            }
        }
        Dataset selectExpr = this.dataset.selectExpr(seq(arrayList));
        Row row2 = (Row) selectExpr.collectAsList().get(0);
        String[] columns = selectExpr.columns();
        HashMap hashMap3 = new HashMap(1);
        for (int i2 = 0; i2 < columns.length; i2++) {
            hashMap3.put(columns[i2], row2.get(i2));
        }
        HashMap hashMap4 = new HashMap(1);
        for (StructField structField2 : this.dataset.schema().fields()) {
            if (isSimpleType(structField2.dataType()) && !this.req.isExcluded(structField2.name())) {
                Summary summary2 = null;
                long parseLong = Long.parseLong(hashMap3.get(String.format("%s_%s", structField2.name(), "non-null")).toString());
                long parseLong2 = Long.parseLong(hashMap3.get(String.format("%s_%s", structField2.name(), "count_null")).toString());
                if (structField2.dataType() instanceof NumericType) {
                    int intValue = ((Integer) hashMap.get(structField2.name())).intValue();
                    summary2 = new NumericSummary(new ColumnSummary(parseLong + parseLong2, Long.parseLong(hashMap3.get(String.format("%s_%s", structField2.name(), "distinct")).toString()), parseLong, parseLong2, structField2.dataType() instanceof FractionalType ? "double" : "integer"), Double.parseDouble(((Row) hashMap2.get("max")).get(intValue).toString()), Double.parseDouble(((Row) hashMap2.get("mean")).get(intValue).toString()), Double.parseDouble(((Row) hashMap2.get("min")).get(intValue).toString()), Double.parseDouble(((Row) hashMap2.get("stddev")).get(intValue).toString()), Double.parseDouble(((Row) hashMap2.get("25%")).get(intValue).toString()), Double.parseDouble(((Row) hashMap2.get("50%")).get(intValue).toString()), Double.parseDouble(((Row) hashMap2.get("75%")).get(intValue).toString()));
                } else if ((structField2.dataType() instanceof StringType) || (structField2.dataType() instanceof BooleanType)) {
                    summary2 = new ColumnSummary(parseLong + parseLong2, Long.parseLong(hashMap3.get(String.format("%s_%s", structField2.name(), "distinct")).toString()), parseLong, parseLong2, structField2.dataType() instanceof StringType ? "string" : "boolean");
                }
                Map<String, Object> map = summary2.toMap();
                hashMap4.put(structField2.name(), map);
                for (Map.Entry<String, Object> entry : map.entrySet()) {
                    this.result.put(String.format("%s.%s.%s", this.dfKey, structField2.name(), entry.getKey()), entry.getValue());
                }
                this.summaries.put(structField2.name(), summary2);
            }
        }
        return hashMap4;
    }

    public Map<String, Summary> getSummaries() {
        return this.summaries;
    }

    protected boolean isSimpleType(DataType dataType) {
        return (dataType instanceof NumericType) || (dataType instanceof StringType) || (dataType instanceof BooleanType);
    }

    protected Map<String, Object[][]> numericHistograms() {
        ArrayList arrayList = new ArrayList(1);
        ArrayList arrayList2 = new ArrayList(1);
        HashMap hashMap = new HashMap(1);
        for (StructField structField : this.dataset.schema().fields()) {
            if ((structField.dataType() instanceof NumericType) && !this.req.isExcluded(structField.name())) {
                arrayList.add(functions.col(structField.name()));
                long longValue = ((Long) this.result.get(String.format("%s.%s.%s", this.dfKey, structField.name(), "distinct"))).longValue();
                double doubleValue = ((Double) this.result.get(String.format("%s.%s.%s", this.dfKey, structField.name(), "min"))).doubleValue();
                double doubleValue2 = ((Double) this.result.get(String.format("%s.%s.%s", this.dfKey, structField.name(), "max"))).doubleValue();
                int min = (int) Math.min(longValue, 20L);
                double d = structField.dataType() instanceof IntegralType ? (int) ((doubleValue2 - doubleValue) / min) : ((doubleValue2 - doubleValue) * 1.0d) / min;
                Object[] objArr = new Object[min + 1];
                for (int i = 0; i < min; i++) {
                    objArr[i] = Double.valueOf((i * d) + doubleValue);
                }
                objArr[min] = Double.valueOf(doubleValue2);
                hashMap.put(structField.name(), objArr);
                int i2 = 0;
                while (i2 < objArr.length - 1) {
                    arrayList2.add(functions.count(functions.when(functions.col(structField.name()).geq(objArr[i2]).and(i2 == objArr.length - 2 ? functions.col(structField.name()).leq(objArr[i2 + 1]) : functions.col(structField.name()).lt(objArr[i2 + 1])), 1)).alias(String.format("%s_%s", structField.name(), Integer.valueOf(i2))).toString());
                    i2++;
                }
            }
        }
        Row row = (Row) this.dataset.select(seq(arrayList)).selectExpr(seq(arrayList2)).collectAsList().get(0);
        HashMap hashMap2 = new HashMap(1);
        for (String str : hashMap.keySet()) {
            Object[] objArr2 = (Object[]) hashMap.get(str);
            Object[] objArr3 = new Object[objArr2.length];
            for (int i3 = 0; i3 < objArr2.length - 1; i3++) {
                objArr3[i3] = row.getAs(String.format("%s_%s", str, Integer.valueOf(i3)));
            }
            hashMap2.put(str, new Object[]{objArr3, objArr2});
        }
        return hashMap2;
    }

    List<Dataset<Row>> columnsOfType(Class<?> cls) {
        return (List) Arrays.stream(this.dataset.schema().fields()).filter(structField -> {
            return cls.isInstance(structField.dataType());
        }).filter(structField2 -> {
            return !this.req.isExcluded(structField2.name());
        }).map(structField3 -> {
            return this.dataset.select(structField3.name(), new String[0]);
        }).collect(Collectors.toList());
    }

    protected Map<String, List<List<Object>>> categoricalHistograms(Class<?> cls) {
        List<Dataset<Row>> columnsOfType = columnsOfType(cls);
        if (columnsOfType.isEmpty()) {
            return Collections.emptyMap();
        }
        Dataset dataset = null;
        for (Dataset<Row> dataset2 : columnsOfType) {
            String str = dataset2.schema().names()[0];
            Dataset limit = dataset2.groupBy(str, new String[0]).count().orderBy(new Column[]{functions.desc("count")}).withColumn("column_name", functions.lit(str)).limit(49);
            dataset = dataset == null ? limit : dataset.union(limit);
        }
        HashMap hashMap = new HashMap(1);
        for (Row row : dataset.collectAsList()) {
            if (row.get(0) != null) {
                String obj = row.get(0).toString();
                long j = row.getLong(1);
                List list = (List) hashMap.computeIfAbsent(row.getString(2), str2 -> {
                    ArrayList arrayList = new ArrayList(2);
                    arrayList.add(new ArrayList(1));
                    arrayList.add(new ArrayList(1));
                    return arrayList;
                });
                ((List) list.get(0)).add(Long.valueOf(j));
                ((List) list.get(1)).add(obj);
            }
        }
        for (Map.Entry entry : hashMap.entrySet()) {
            Summary summary = this.summaries.get(entry.getKey());
            if (summary.getDistinct() >= 50) {
                ((List) ((List) entry.getValue()).get(0)).add(Long.valueOf(summary.getCount() - ((List) ((List) entry.getValue()).get(0)).stream().mapToLong(obj2 -> {
                    return ((Long) obj2).longValue();
                }).sum()));
                ((List) ((List) entry.getValue()).get(1)).add("_others");
            }
        }
        return hashMap;
    }
}
