/*
 * Decompiled with CFR 0.152.
 */
package io.projectglow.transformers.blockvariantsandsamples;

import com.typesafe.scalalogging.LazyLogging;
import com.typesafe.scalalogging.Logger;
import io.projectglow.common.GlowLogging;
import io.projectglow.common.VariantSchemas$;
import io.projectglow.functions$;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.expressions.Window$;
import org.apache.spark.sql.expressions.WindowSpec;
import org.apache.spark.sql.types.ArrayType$;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.IntegerType$;
import org.apache.spark.sql.types.StringType$;
import scala.Predef$;
import scala.collection.Seq;
import scala.collection.immutable.StringOps;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;

public final class VariantSampleBlockMaker$
implements GlowLogging {
    public static VariantSampleBlockMaker$ MODULE$;
    private Logger logger;
    private volatile boolean bitmap$0;

    static {
        new VariantSampleBlockMaker$();
    }

    private Logger logger$lzycompute() {
        VariantSampleBlockMaker$ variantSampleBlockMaker$ = this;
        synchronized (variantSampleBlockMaker$) {
            if (!this.bitmap$0) {
                this.logger = LazyLogging.logger$((LazyLogging)this);
                this.bitmap$0 = true;
            }
        }
        return this.logger;
    }

    public Logger logger() {
        return !this.bitmap$0 ? this.logger$lzycompute() : this.logger;
    }

    public Dataset<Row> validateNumValues(Dataset<Row> df) {
        int expectedNumValues = BoxesRunTime.unboxToInt((Object)((Row[])df.selectExpr((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"size(values) as numValues"})).take(1))[0].getAs("numValues"));
        String errMsg = new StringBuilder(130).append("At least one row has an inconsistent number of values (expected ").append(expectedNumValues).append("). ").append("Please verify that each row contains the same number of values.").toString();
        return df.filter(org.apache.spark.sql.functions$.MODULE$.expr(new StringBuilder(49).append("isnull(assert_true_or_error(size(values) = ").append(expectedNumValues).append(", '").append(errMsg).append("'))").toString()));
    }

    public Dataset<Row> filterOneDistinctValue(Dataset<Row> df) {
        BoxedUnit boxedUnit;
        if (this.logger().underlying().isInfoEnabled()) {
            this.logger().underlying().info("Filtering variants whose values are all the same.");
            boxedUnit = BoxedUnit.UNIT;
        } else {
            boxedUnit = BoxedUnit.UNIT;
        }
        return df.filter(org.apache.spark.sql.functions$.MODULE$.size(org.apache.spark.sql.functions$.MODULE$.array_distinct(org.apache.spark.sql.functions$.MODULE$.col(VariantSchemas$.MODULE$.valuesField().name()))).$greater((Object)BoxesRunTime.boxToInteger((int)1)));
    }

    public Dataset<Row> makeSampleBlocks(Dataset<Row> df, int sampleBlockCount) {
        return df.withColumn("fractionalSampleBlockSize", org.apache.spark.sql.functions$.MODULE$.size(org.apache.spark.sql.functions$.MODULE$.col(VariantSchemas$.MODULE$.valuesField().name())).$div((Object)BoxesRunTime.boxToInteger((int)sampleBlockCount))).withColumn(VariantSchemas$.MODULE$.sampleBlockIdField().name(), org.apache.spark.sql.functions$.MODULE$.explode(org.apache.spark.sql.functions$.MODULE$.sequence(org.apache.spark.sql.functions$.MODULE$.lit((Object)BoxesRunTime.boxToInteger((int)1)), org.apache.spark.sql.functions$.MODULE$.lit((Object)BoxesRunTime.boxToInteger((int)sampleBlockCount))).cast((DataType)ArrayType$.MODULE$.apply((DataType)StringType$.MODULE$)))).withColumn(VariantSchemas$.MODULE$.valuesField().name(), org.apache.spark.sql.functions$.MODULE$.expr(new StringOps(Predef$.MODULE$.augmentString(new StringBuilder(202).append("slice(\n             |   ").append(VariantSchemas$.MODULE$.valuesField().name()).append(",\n             |   round((").append(VariantSchemas$.MODULE$.sampleBlockIdField().name()).append(" - 1) * fractionalSampleBlockSize) + 1,\n             |   round(").append(VariantSchemas$.MODULE$.sampleBlockIdField().name()).append(" * fractionalSampleBlockSize) - round((").append(VariantSchemas$.MODULE$.sampleBlockIdField().name()).append(" - 1) * fractionalSampleBlockSize)\n             |)").toString())).stripMargin()));
    }

    public Dataset<Row> makeVariantAndSampleBlocks(Dataset<Row> variantDf, int variantsPerBlock, int sampleBlockCount) {
        WindowSpec windowSpec = Window$.MODULE$.partitionBy(VariantSchemas$.MODULE$.contigNameField().name(), (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{VariantSchemas$.MODULE$.sampleBlockIdField().name()})).orderBy(VariantSchemas$.MODULE$.startField().name(), (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{VariantSchemas$.MODULE$.refAlleleField().name(), VariantSchemas$.MODULE$.alternateAllelesField().name()}));
        Dataset baseDf = this.filterOneDistinctValue(this.validateNumValues(variantDf)).withColumn(VariantSchemas$.MODULE$.sortKeyField().name(), org.apache.spark.sql.functions$.MODULE$.col(VariantSchemas$.MODULE$.startField().name()).cast((DataType)IntegerType$.MODULE$)).withColumn(VariantSchemas$.MODULE$.headerField().name(), org.apache.spark.sql.functions$.MODULE$.concat_ws(":", (Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{org.apache.spark.sql.functions$.MODULE$.col(VariantSchemas$.MODULE$.contigNameField().name()), org.apache.spark.sql.functions$.MODULE$.col(VariantSchemas$.MODULE$.startField().name()), org.apache.spark.sql.functions$.MODULE$.col(VariantSchemas$.MODULE$.refAlleleField().name()), org.apache.spark.sql.functions$.MODULE$.col(VariantSchemas$.MODULE$.alternateAllelesField().name())}))).withColumn("stats", functions$.MODULE$.subset_struct(functions$.MODULE$.array_summary_stats(org.apache.spark.sql.functions$.MODULE$.col(VariantSchemas$.MODULE$.valuesField().name())), (Seq<String>)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"mean", "stdDev"}))).withColumn(VariantSchemas$.MODULE$.meanField().name(), org.apache.spark.sql.functions$.MODULE$.col("stats.mean")).withColumn(VariantSchemas$.MODULE$.stdDevField().name(), org.apache.spark.sql.functions$.MODULE$.col("stats.stdDev"));
        return this.makeSampleBlocks((Dataset<Row>)baseDf, sampleBlockCount).withColumn(VariantSchemas$.MODULE$.sizeField().name(), org.apache.spark.sql.functions$.MODULE$.size(org.apache.spark.sql.functions$.MODULE$.col(VariantSchemas$.MODULE$.valuesField().name()))).withColumn(VariantSchemas$.MODULE$.headerBlockIdField().name(), org.apache.spark.sql.functions$.MODULE$.concat_ws("_", (Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{org.apache.spark.sql.functions$.MODULE$.lit((Object)"chr"), org.apache.spark.sql.functions$.MODULE$.col(VariantSchemas$.MODULE$.contigNameField().name()), org.apache.spark.sql.functions$.MODULE$.lit((Object)"block"), org.apache.spark.sql.functions$.MODULE$.row_number().over(windowSpec).$minus((Object)BoxesRunTime.boxToInteger((int)1)).$div((Object)BoxesRunTime.boxToInteger((int)variantsPerBlock)).cast((DataType)IntegerType$.MODULE$)}))).select((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{org.apache.spark.sql.functions$.MODULE$.col(VariantSchemas$.MODULE$.headerField().name()), org.apache.spark.sql.functions$.MODULE$.col(VariantSchemas$.MODULE$.sizeField().name()), org.apache.spark.sql.functions$.MODULE$.col(VariantSchemas$.MODULE$.valuesField().name()), org.apache.spark.sql.functions$.MODULE$.col(VariantSchemas$.MODULE$.headerBlockIdField().name()), org.apache.spark.sql.functions$.MODULE$.col(VariantSchemas$.MODULE$.sampleBlockIdField().name()), org.apache.spark.sql.functions$.MODULE$.col(VariantSchemas$.MODULE$.sortKeyField().name()), org.apache.spark.sql.functions$.MODULE$.col(VariantSchemas$.MODULE$.meanField().name()), org.apache.spark.sql.functions$.MODULE$.col(VariantSchemas$.MODULE$.stdDevField().name())}));
    }

    private VariantSampleBlockMaker$() {
        MODULE$ = this;
        LazyLogging.$init$((LazyLogging)this);
    }
}

