package ai.deepsense.deeplang.doperables.dataframe.report;

import ai.deepsense.commons.types.ColumnType$;
import ai.deepsense.deeplang.doperables.dataframe.report.distribution.DistributionCalculator$;
import ai.deepsense.deeplang.doperables.report.Report;
import ai.deepsense.reportlib.model.Distribution;
import ai.deepsense.reportlib.model.ReportContent;
import ai.deepsense.reportlib.model.ReportType$;
import ai.deepsense.reportlib.model.Table;
import org.apache.spark.mllib.linalg.Vector;
import org.apache.spark.mllib.stat.MultivariateStatisticalSummary;
import org.apache.spark.mllib.stat.Statistics$;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.types.StructType;
import scala.Array$;
import scala.Enumeration;
import scala.None$;
import scala.Predef$;
import scala.Some;
import scala.StringContext;
import scala.Tuple2;
import scala.collection.Seq$;
import scala.collection.TraversableOnce;
import scala.collection.immutable.List;
import scala.collection.immutable.List$;
import scala.collection.immutable.Map;
import scala.collection.immutable.Nil$;
import scala.collection.mutable.StringBuilder;
import scala.reflect.ClassTag$;
import scala.runtime.BoxesRunTime;

/* compiled from: DataFrameReportGenerator.scala */
/* loaded from: input_file:ai/deepsense/deeplang/doperables/dataframe/report/DataFrameReportGenerator$.class */
public final class DataFrameReportGenerator$ {
    public static final DataFrameReportGenerator$ MODULE$ = null;
    private final String ReportContentName;
    private final String DataSampleTableName;
    private final String DataSchemaTableName;
    private final String DataFrameSizeTableName;
    private final int MaxRowsNumberInReport;
    private final int ColumnNumberToGenerateSimplerReportThreshold;
    private final int StringPreviewMaxLength;

    static {
        new DataFrameReportGenerator$();
    }

    public String ReportContentName() {
        return this.ReportContentName;
    }

    public String DataSampleTableName() {
        return this.DataSampleTableName;
    }

    public String DataSchemaTableName() {
        return this.DataSchemaTableName;
    }

    public String DataFrameSizeTableName() {
        return this.DataFrameSizeTableName;
    }

    public int MaxRowsNumberInReport() {
        return this.MaxRowsNumberInReport;
    }

    public int ColumnNumberToGenerateSimplerReportThreshold() {
        return this.ColumnNumberToGenerateSimplerReportThreshold;
    }

    public int StringPreviewMaxLength() {
        return this.StringPreviewMaxLength;
    }

    public Report report(Dataset<Row> dataset) {
        return dataset.schema().length() >= ColumnNumberToGenerateSimplerReportThreshold() ? simplifiedReport(dataset) : fullReport(dataset);
    }

    private Report fullReport(Dataset<Row> dataset) {
        MultivariateStatisticalSummary calculateMultiColStats = calculateMultiColStats(dataset);
        return new Report(new ReportContent(ReportContentName(), ReportType$.MODULE$.DataFrameFull(), Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Table[]{sampleTable(dataset), sizeTable(dataset.schema(), calculateMultiColStats.count())})), DistributionCalculator$.MODULE$.distributionByColumn(dataset, calculateMultiColStats)));
    }

    private MultivariateStatisticalSummary calculateMultiColStats(Dataset<Row> dataset) {
        return Statistics$.MODULE$.colStats(dataset.rdd().map(new DataFrameReportGenerator$$anonfun$1(), ClassTag$.MODULE$.apply(Vector.class)));
    }

    private Report simplifiedReport(Dataset<Row> dataset) {
        return new Report(new ReportContent(ReportContentName(), ReportType$.MODULE$.DataFrameSimplified(), Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Table[]{sizeTable(dataset.schema(), dataset.count()), schemaTable(dataset.schema())})), noDistributionsForSimplifiedReport(dataset.schema())));
    }

    private Map<String, Distribution> noDistributionsForSimplifiedReport(StructType structType) {
        return Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps(structType.fields()).map(new DataFrameReportGenerator$$anonfun$noDistributionsForSimplifiedReport$1(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class)))).toMap(Predef$.MODULE$.$conforms());
    }

    private Table schemaTable(StructType structType) {
        return new Table(DataSchemaTableName(), new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Preview of columns and their types in dataset"})).s(Nil$.MODULE$), new Some(List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new String[]{"Column index", "Column name", "Column type"}))), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Enumeration.Value[]{ColumnType$.MODULE$.numeric(), ColumnType$.MODULE$.string(), ColumnType$.MODULE$.string()})), None$.MODULE$, Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps(structType.fields()).zipWithIndex(Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class)))).map(new DataFrameReportGenerator$$anonfun$2(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(List.class)))).toList());
    }

    private Table sampleTable(Dataset<Row> dataset) {
        List list = Predef$.MODULE$.refArrayOps(dataset.schema().fieldNames()).toList();
        int size = list.size();
        Row[] rowArr = (Row[]) dataset.take(MaxRowsNumberInReport());
        List list2 = Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps(rowArr).map(new DataFrameReportGenerator$$anonfun$3(size), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(List.class)))).toList();
        return new Table(DataSampleTableName(), new StringBuilder().append(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"", ". "})).s(Predef$.MODULE$.genericWrapArray(new Object[]{DataSampleTableName()}))).append(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Randomly selected ", " rows"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToInteger(rowArr.length)}))).toString(), new Some(list), ((TraversableOnce) dataset.schema().map(new DataFrameReportGenerator$$anonfun$4(), Seq$.MODULE$.canBuildFrom())).toList(), None$.MODULE$, list2);
    }

    private Table sizeTable(StructType structType, long j) {
        return new Table(DataFrameSizeTableName(), new StringBuilder().append(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"", ". "})).s(Predef$.MODULE$.genericWrapArray(new Object[]{DataFrameSizeTableName()}))).append(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Number of columns and number of rows in the DataFrame."})).s(Nil$.MODULE$)).toString(), new Some(List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new String[]{"Number of columns", "Number of rows"}))), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Enumeration.Value[]{ColumnType$.MODULE$.numeric(), ColumnType$.MODULE$.numeric()})), None$.MODULE$, List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new List[]{List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Some[]{new Some(BoxesRunTime.boxToInteger(structType.length()).toString()), new Some(BoxesRunTime.boxToLong(j).toString())}))})));
    }

    private DataFrameReportGenerator$() {
        MODULE$ = this;
        this.ReportContentName = "DataFrame Report";
        this.DataSampleTableName = "Data Sample";
        this.DataSchemaTableName = "Column Names and Types";
        this.DataFrameSizeTableName = "DataFrame Size";
        this.MaxRowsNumberInReport = 20;
        this.ColumnNumberToGenerateSimplerReportThreshold = 20;
        this.StringPreviewMaxLength = 300;
    }
}
