package com.databricks.labs.automl.sanitize;

import com.databricks.labs.automl.exceptions.BooleanFieldFillException;
import com.databricks.labs.automl.exceptions.BooleanFieldFillException$;
import com.databricks.labs.automl.inference.NaFillConfig;
import com.databricks.labs.automl.inference.NaFillPayload;
import com.databricks.labs.automl.model.tools.split.PerformanceSettings$;
import com.databricks.labs.automl.utils.DataValidation;
import com.databricks.labs.automl.utils.SchemaUtils$;
import com.databricks.labs.automl.utils.SparkSessionWrapper;
import com.databricks.labs.automl.utils.ValidatedCategoricalFields;
import com.databricks.labs.automl.utils.structures.FeatureEngineeringAllowables$;
import com.databricks.labs.automl.utils.structures.FeatureEngineeringEnums;
import com.databricks.labs.automl.utils.structures.FeatureEngineeringEnums$;
import com.databricks.labs.automl.utils.structures.FieldTypes;
import org.apache.log4j.Logger;
import org.apache.spark.SparkContext;
import org.apache.spark.ml.feature.OneHotEncoder;
import org.apache.spark.ml.feature.StringIndexer;
import org.apache.spark.ml.feature.VectorAssembler;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.BinaryType$;
import org.apache.spark.sql.types.BooleanType$;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.StringType$;
import scala.Array$;
import scala.MatchError;
import scala.None$;
import scala.Predef$;
import scala.Predef$ArrowAssoc$;
import scala.Tuple2;
import scala.Tuple3;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.TraversableOnce;
import scala.collection.immutable.$colon;
import scala.collection.immutable.Iterable$;
import scala.collection.immutable.List;
import scala.collection.immutable.List$;
import scala.collection.immutable.Map;
import scala.collection.immutable.Map$;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayBuffer;
import scala.collection.mutable.ArrayBuffer$;
import scala.collection.mutable.ArrayOps;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.RichInt$;

/* compiled from: DataSanitizer.scala */
@ScalaSignature(bytes = "\u0006\u0001\u00115a\u0001\u00022d\u00019D\u0001B \u0001\u0003\u0002\u0003\u0006Ia \u0005\b\u0003g\u0001A\u0011AA\u001b\u0011%\ti\u0004\u0001a\u0001\n\u0013\ty\u0004C\u0005\u0002R\u0001\u0001\r\u0011\"\u0003\u0002T!A\u0011q\f\u0001!B\u0013\t\t\u0005C\u0005\u0002b\u0001\u0001\r\u0011\"\u0003\u0002@!I\u00111\r\u0001A\u0002\u0013%\u0011Q\r\u0005\t\u0003S\u0002\u0001\u0015)\u0003\u0002B!I\u00111\u000e\u0001A\u0002\u0013%\u0011q\b\u0005\n\u0003[\u0002\u0001\u0019!C\u0005\u0003_B\u0001\"a\u001d\u0001A\u0003&\u0011\u0011\t\u0005\n\u0003k\u0002\u0001\u0019!C\u0005\u0003\u007fA\u0011\"a\u001e\u0001\u0001\u0004%I!!\u001f\t\u0011\u0005u\u0004\u0001)Q\u0005\u0003\u0003B\u0011\"a \u0001\u0001\u0004%I!!!\t\u0013\u0005%\u0005\u00011A\u0005\n\u0005-\u0005\u0002CAH\u0001\u0001\u0006K!a!\t\u0013\u0005E\u0005\u00011A\u0005\n\u0005M\u0005\"CAU\u0001\u0001\u0007I\u0011BAV\u0011!\ty\u000b\u0001Q!\n\u0005U\u0005\"CAY\u0001\u0001\u0007I\u0011BAZ\u0011%\tY\f\u0001a\u0001\n\u0013\ti\f\u0003\u0005\u0002B\u0002\u0001\u000b\u0015BA[\u0011%\t\u0019\r\u0001a\u0001\n\u0013\t\t\tC\u0005\u0002F\u0002\u0001\r\u0011\"\u0003\u0002H\"A\u00111\u001a\u0001!B\u0013\t\u0019\tC\u0005\u0002N\u0002\u0001\r\u0011\"\u0003\u0002P\"I\u0011q\u001b\u0001A\u0002\u0013%\u0011\u0011\u001c\u0005\t\u0003;\u0004\u0001\u0015)\u0003\u0002R\"I\u0011q\u001c\u0001A\u0002\u0013%\u0011\u0011\u001d\u0005\n\u0003W\u0004\u0001\u0019!C\u0005\u0003[D\u0001\"!=\u0001A\u0003&\u00111\u001d\u0005\n\u0003g\u0004\u0001\u0019!C\u0005\u0003kD\u0011\"a>\u0001\u0001\u0004%I!!?\t\u0011\u0005u\b\u0001)Q\u0005\u00037C\u0011\"a@\u0001\u0001\u0004%I!a-\t\u0013\t\u0005\u0001\u00011A\u0005\n\t\r\u0001\u0002\u0003B\u0004\u0001\u0001\u0006K!!.\t\u0013\t%\u0001\u00011A\u0005\n\u0005U\b\"\u0003B\u0006\u0001\u0001\u0007I\u0011\u0002B\u0007\u0011!\u0011\t\u0002\u0001Q!\n\u0005m\u0005\"\u0003B\n\u0001\t\u0007IQ\u0002B\u000b\u0011!\u00119\u0003\u0001Q\u0001\u000e\t]\u0001b\u0002B\u0015\u0001\u0011\u0005!1\u0006\u0005\b\u0005g\u0001A\u0011\u0001B\u001b\u0011\u001d\u0011I\u0004\u0001C\u0001\u0005wAqAa\u0010\u0001\t\u0003\u0011\t\u0005C\u0004\u0003F\u0001!\tAa\u0012\t\u000f\t-\u0003\u0001\"\u0001\u0003N!9!\u0011\u000b\u0001\u0005\u0002\tM\u0003b\u0002B,\u0001\u0011\u0005!\u0011\f\u0005\b\u0005;\u0002A\u0011\u0001B0\u0011\u001d\u0011\u0019\u0007\u0001C\u0001\u0005KBqA!\u001b\u0001\t\u0003\u0011Y\u0007C\u0004\u0003p\u0001!\tA!\u001d\t\u000f\tU\u0004\u0001\"\u0001\u0003x!9!1\u0012\u0001\u0005\u0002\u0005U\bb\u0002BG\u0001\u0011\u0005\u0011Q\u001f\u0005\b\u0005\u001f\u0003A\u0011AA{\u0011\u001d\u0011\t\n\u0001C\u0001\u0003kDqAa%\u0001\t\u0003\t\t\tC\u0004\u0003\u0016\u0002!\t!a%\t\u000f\t]\u0005\u0001\"\u0001\u0002\u0002\"9!\u0011\u0014\u0001\u0005\u0002\u0005M\u0006b\u0002BN\u0001\u0011\u0005\u0011q\u001a\u0005\b\u0005;\u0003A\u0011AAq\u0011\u001d\u0011y\n\u0001C\u0001\u0003kDqA!)\u0001\t\u0003\t\u0019\fC\u0004\u0003$\u0002!\t!!>\t\u0013\t\u0015\u0006\u00011A\u0005\n\t\u001d\u0006\"\u0003BX\u0001\u0001\u0007I\u0011\u0002BY\u0011!\u0011)\f\u0001Q!\n\t%\u0006b\u0002B\\\u0001\u0011\u0005!\u0011\u0018\u0005\b\u0005w\u0003A\u0011\u0002B_\u0011\u001d\u0011\u0019\r\u0001C\u0001\u0005\u000bDqA!7\u0001\t\u0013\u0011Y\u000eC\u0004\u0003d\u0002!IA!:\t\u000f\t-\b\u0001\"\u0003\u0003n\"9!Q\u001f\u0001\u0005\n\t]\bbBB\u0002\u0001\u0011%1Q\u0001\u0005\b\u0007?\u0001A\u0011BB\u0011\u0011\u001d\u0019y\u0005\u0001C\u0005\u0007#Bqaa\u001c\u0001\t\u0013\u0019\t\bC\u0004\u0004\u0002\u0002!Iaa!\t\u000f\r-\u0005\u0001\"\u0003\u0004\u000e\"91\u0011\u0013\u0001\u0005\n\rM\u0005bBBO\u0001\u0011%1q\u0014\u0005\b\u0007G\u0003A\u0011BBS\u0011\u001d\u0019I\u000b\u0001C\u0005\u0007WCqaa,\u0001\t\u0013\u0019\t\fC\u0004\u0004D\u0002!Ia!2\t\u000f\r%\u0007\u0001\"\u0003\u0004L\"91q\u001a\u0001\u0005\u0002\rE\u0007bBBj\u0001\u0011\u00051Q\u001b\u0005\n\u0007S\u0004\u0011\u0013!C\u0001\u0007WD\u0011\u0002\"\u0001\u0001#\u0003%\t\u0001b\u0001\t\u0013\u0011\u001d\u0001!%A\u0005\u0002\u0011%!!\u0004#bi\u0006\u001c\u0016M\\5uSj,'O\u0003\u0002eK\u0006A1/\u00198ji&TXM\u0003\u0002gO\u00061\u0011-\u001e;p[2T!\u0001[5\u0002\t1\f'm\u001d\u0006\u0003U.\f!\u0002Z1uC\n\u0014\u0018nY6t\u0015\u0005a\u0017aA2p[\u000e\u00011\u0003\u0002\u0001pkn\u0004\"\u0001]:\u000e\u0003ET\u0011A]\u0001\u0006g\u000e\fG.Y\u0005\u0003iF\u0014a!\u00118z%\u00164\u0007C\u0001<z\u001b\u00059(B\u0001=f\u0003\u0015)H/\u001b7t\u0013\tQxO\u0001\bECR\fg+\u00197jI\u0006$\u0018n\u001c8\u0011\u0005Yd\u0018BA?x\u0005M\u0019\u0006/\u0019:l'\u0016\u001c8/[8o/J\f\u0007\u000f]3s\u0003\u0011!\u0017\r^1\u0011\t\u0005\u0005\u0011Q\u0006\b\u0005\u0003\u0007\t9C\u0004\u0003\u0002\u0006\u0005\u0005b\u0002BA\u0004\u00037qA!!\u0003\u0002\u00169!\u00111BA\t\u001b\t\tiAC\u0002\u0002\u00105\fa\u0001\u0010:p_Rt\u0014BAA\n\u0003\ry'oZ\u0005\u0005\u0003/\tI\"\u0001\u0004ba\u0006\u001c\u0007.\u001a\u0006\u0003\u0003'IA!!\b\u0002 \u0005)1\u000f]1sW*!\u0011qCA\r\u0013\u0011\t\u0019#!\n\u0002\u0007M\fHN\u0003\u0003\u0002\u001e\u0005}\u0011\u0002BA\u0015\u0003W\tq\u0001]1dW\u0006<WM\u0003\u0003\u0002$\u0005\u0015\u0012\u0002BA\u0018\u0003c\u0011\u0011\u0002R1uC\u001a\u0013\u0018-\\3\u000b\t\u0005%\u00121F\u0001\u0007y%t\u0017\u000e\u001e \u0015\t\u0005]\u00121\b\t\u0004\u0003s\u0001Q\"A2\t\u000by\u0014\u0001\u0019A@\u0002\u0013}c\u0017MY3m\u0007>dWCAA!!\u0011\t\u0019%!\u0014\u000e\u0005\u0005\u0015#\u0002BA$\u0003\u0013\nA\u0001\\1oO*\u0011\u00111J\u0001\u0005U\u00064\u0018-\u0003\u0003\u0002P\u0005\u0015#AB*ue&tw-A\u0007`Y\u0006\u0014W\r\\\"pY~#S-\u001d\u000b\u0005\u0003+\nY\u0006E\u0002q\u0003/J1!!\u0017r\u0005\u0011)f.\u001b;\t\u0013\u0005uC!!AA\u0002\u0005\u0005\u0013a\u0001=%c\u0005Qq\f\\1cK2\u001cu\u000e\u001c\u0011\u0002\u0017}3W-\u0019;ve\u0016\u001cu\u000e\\\u0001\u0010?\u001a,\u0017\r^;sK\u000e{Gn\u0018\u0013fcR!\u0011QKA4\u0011%\tifBA\u0001\u0002\u0004\t\t%\u0001\u0007`M\u0016\fG/\u001e:f\u0007>d\u0007%\u0001\t`]VlWM]5d\r&dGn\u0015;bi\u0006!rL\\;nKJL7MR5mYN#\u0018\r^0%KF$B!!\u0016\u0002r!I\u0011Q\f\u0006\u0002\u0002\u0003\u0007\u0011\u0011I\u0001\u0012?:,X.\u001a:jG\u001aKG\u000e\\*uCR\u0004\u0013AE0dQ\u0006\u0014\u0018m\u0019;fe\u001aKG\u000e\\*uCR\facX2iCJ\f7\r^3s\r&dGn\u0015;bi~#S-\u001d\u000b\u0005\u0003+\nY\bC\u0005\u0002^5\t\t\u00111\u0001\u0002B\u0005\u0019rl\u00195be\u0006\u001cG/\u001a:GS2d7\u000b^1uA\u0005\u0001s,\\8eK2\u001cV\r\\3di&|g\u000eR5ti&t7\r\u001e+ie\u0016\u001c\bn\u001c7e+\t\t\u0019\tE\u0002q\u0003\u000bK1!a\"r\u0005\rIe\u000e^\u0001%?6|G-\u001a7TK2,7\r^5p]\u0012K7\u000f^5oGR$\u0006N]3tQ>dGm\u0018\u0013fcR!\u0011QKAG\u0011%\ti\u0006EA\u0001\u0002\u0004\t\u0019)A\u0011`[>$W\r\\*fY\u0016\u001cG/[8o\t&\u001cH/\u001b8diRC'/Z:i_2$\u0007%A\f`M&,G\u000eZ:U_&;gn\u001c:f\u0013:4Vm\u0019;peV\u0011\u0011Q\u0013\t\u0006a\u0006]\u00151T\u0005\u0004\u00033\u000b(!B!se\u0006L\b\u0003BAO\u0003KsA!a(\u0002\"B\u0019\u00111B9\n\u0007\u0005\r\u0016/\u0001\u0004Qe\u0016$WMZ\u0005\u0005\u0003\u001f\n9KC\u0002\u0002$F\f1d\u00184jK2$7\u000fV8JO:|'/Z%o-\u0016\u001cGo\u001c:`I\u0015\fH\u0003BA+\u0003[C\u0011\"!\u0018\u0014\u0003\u0003\u0005\r!!&\u00021}3\u0017.\u001a7egR{\u0017j\u001a8pe\u0016LeNV3di>\u0014\b%\u0001\t`M&dG/\u001a:Qe\u0016\u001c\u0017n]5p]V\u0011\u0011Q\u0017\t\u0004a\u0006]\u0016bAA]c\n1Ai\\;cY\u0016\fAc\u00184jYR,'\u000f\u0015:fG&\u001c\u0018n\u001c8`I\u0015\fH\u0003BA+\u0003\u007fC\u0011\"!\u0018\u0017\u0003\u0003\u0005\r!!.\u0002#}3\u0017\u000e\u001c;feB\u0013XmY5tS>t\u0007%\u0001\u0007`a\u0006\u0014\u0018\r\u001c7fY&\u001cX.\u0001\t`a\u0006\u0014\u0018\r\u001c7fY&\u001cXn\u0018\u0013fcR!\u0011QKAe\u0011%\ti&GA\u0001\u0002\u0004\t\u0019)A\u0007`a\u0006\u0014\u0018\r\u001c7fY&\u001cX\u000eI\u0001\u0016?\u000e\fG/Z4pe&\u001c\u0017\r\u001c(B\r&dG.T1q+\t\t\t\u000e\u0005\u0005\u0002\u001e\u0006M\u00171TAN\u0013\u0011\t).a*\u0003\u00075\u000b\u0007/A\r`G\u0006$XmZ8sS\u000e\fGNT!GS2dW*\u00199`I\u0015\fH\u0003BA+\u00037D\u0011\"!\u0018\u001d\u0003\u0003\u0005\r!!5\u0002-}\u001b\u0017\r^3h_JL7-\u00197O\u0003\u001aKG\u000e\\'ba\u0002\n\u0011c\u00188v[\u0016\u0014\u0018n\u0019(B\r&dG.T1q+\t\t\u0019\u000f\u0005\u0005\u0002\u001e\u0006M\u00171TAs!\r\u0001\u0018q]\u0005\u0004\u0003S\f(AB!osZ\u000bG.A\u000b`]VlWM]5d\u001d\u00063\u0015\u000e\u001c7NCB|F%Z9\u0015\t\u0005U\u0013q\u001e\u0005\n\u0003;z\u0012\u0011!a\u0001\u0003G\f!c\u00188v[\u0016\u0014\u0018n\u0019(B\r&dG.T1qA\u00059rl\u00195be\u0006\u001cG/\u001a:O\u0003\nc\u0017M\\6fi\u001aKG\u000e\\\u000b\u0003\u00037\u000b1dX2iCJ\f7\r^3s\u001d\u0006\u0013E.\u00198lKR4\u0015\u000e\u001c7`I\u0015\fH\u0003BA+\u0003wD\u0011\"!\u0018#\u0003\u0003\u0005\r!a'\u00021}\u001b\u0007.\u0019:bGR,'OT!CY\u0006t7.\u001a;GS2d\u0007%A\u000b`]VlWM]5d\u001d\u0006\u0013E.\u00198lKR4\u0015\u000e\u001c7\u00023}sW/\\3sS\u000et\u0015I\u00117b].,GOR5mY~#S-\u001d\u000b\u0005\u0003+\u0012)\u0001C\u0005\u0002^\u0015\n\t\u00111\u0001\u00026\u00061rL\\;nKJL7MT!CY\u0006t7.\u001a;GS2d\u0007%A\u0006`]\u00064\u0015\u000e\u001c7N_\u0012,\u0017aD0oC\u001aKG\u000e\\'pI\u0016|F%Z9\u0015\t\u0005U#q\u0002\u0005\n\u0003;B\u0013\u0011!a\u0001\u00037\u000bAb\u00188b\r&dG.T8eK\u0002\nQcX1mY><\u0018M\u00197f\u001d\u00063\u0015\u000e\u001c7N_\u0012,7/\u0006\u0002\u0003\u0018A1!\u0011\u0004B\u0011\u00037sAAa\u0007\u0003 9!\u00111\u0002B\u000f\u0013\u0005\u0011\u0018bAA\u0015c&!!1\u0005B\u0013\u0005\u0011a\u0015n\u001d;\u000b\u0007\u0005%\u0012/\u0001\f`C2dwn^1cY\u0016t\u0015IR5mY6{G-Z:!\u0003-\u0019X\r\u001e'bE\u0016d7i\u001c7\u0015\t\t5\"qF\u0007\u0002\u0001!9!\u0011\u0007\u0017A\u0002\u0005m\u0015!\u0002<bYV,\u0017!D:fi\u001a+\u0017\r^;sK\u000e{G\u000e\u0006\u0003\u0003.\t]\u0002b\u0002B\u0019[\u0001\u0007\u00111T\u0001\u0013g\u0016$h*^7fe&\u001cg)\u001b7m'R\fG\u000f\u0006\u0003\u0003.\tu\u0002b\u0002B\u0019]\u0001\u0007\u00111T\u0001\u0015g\u0016$8\t[1sC\u000e$XM\u001d$jY2\u001cF/\u0019;\u0015\t\t5\"1\t\u0005\b\u0005cy\u0003\u0019AAN\u0003\t\u001aX\r^'pI\u0016d7+\u001a7fGRLwN\u001c#jgRLgn\u0019;UQJ,7\u000f[8mIR!!Q\u0006B%\u0011\u001d\u0011\t\u0004\ra\u0001\u0003\u0007\u000b\u0011d]3u\r&,G\u000eZ:U_&;gn\u001c:f\u0013:4Vm\u0019;peR!!Q\u0006B(\u0011\u001d\u0011\t$\ra\u0001\u0003+\u000bab]3u!\u0006\u0014\u0018\r\u001c7fY&\u001cX\u000e\u0006\u0003\u0003.\tU\u0003b\u0002B\u0019e\u0001\u0007\u00111Q\u0001\u0013g\u0016$h)\u001b7uKJ\u0004&/Z2jg&|g\u000e\u0006\u0003\u0003.\tm\u0003b\u0002B\u0019g\u0001\u0007\u0011QW\u0001\u0018g\u0016$8)\u0019;fO>\u0014\u0018nY1m\u001d\u00063\u0015\u000e\u001c7NCB$BA!\f\u0003b!9!\u0011\u0007\u001bA\u0002\u0005E\u0017aE:fi:+X.\u001a:jG:\u000be)\u001b7m\u001b\u0006\u0004H\u0003\u0002B\u0017\u0005OBqA!\r6\u0001\u0004\t\u0019/\u0001\u0010tKR\u001c\u0005.\u0019:bGR,'OT!CY\u0006t7.\u001a;GS2dg+\u00197vKR!!Q\u0006B7\u0011\u001d\u0011\tD\u000ea\u0001\u00037\u000bAd]3u\u001dVlWM]5d\u001d\u0006\u0013E.\u00198lKR4\u0015\u000e\u001c7WC2,X\r\u0006\u0003\u0003.\tM\u0004b\u0002B\u0019o\u0001\u0007\u0011QW\u0001\u000eg\u0016$h*\u0011$jY2lu\u000eZ3\u0015\t\t5\"\u0011\u0010\u0005\b\u0005cA\u0004\u0019AANQ\u0015A$Q\u0010BE!\u0015\u0001(q\u0010BB\u0013\r\u0011\t)\u001d\u0002\u0007i\"\u0014xn^:\u0011\t\te!QQ\u0005\u0005\u0005\u000f\u0013)C\u0001\rJY2,w-\u00197Be\u001e,X.\u001a8u\u000bb\u001cW\r\u001d;j_:\u001c#Aa!\u0002\u0011\u001d,G\u000fT1cK2\fQbZ3u\r\u0016\fG/\u001e:f\u0007>d\u0017AE4fi:+X.\u001a:jG\u001aKG\u000e\\*uCR\fAcZ3u\u0007\"\f'/Y2uKJ4\u0015\u000e\u001c7Ti\u0006$\u0018AI4fi6{G-\u001a7TK2,7\r^5p]\u0012K7\u000f^5oGR$\u0006N]3tQ>dG-A\rhKR4\u0015.\u001a7egR{\u0017j\u001a8pe\u0016LeNV3di>\u0014\u0018AD4fiB\u000b'/\u00197mK2L7/\\\u0001\u0013O\u0016$h)\u001b7uKJ\u0004&/Z2jg&|g.A\fhKR\u001c\u0015\r^3h_JL7-\u00197O\u0003\u001aKG\u000e\\'ba\u0006\u0019r-\u001a;Ok6,'/[2O\u0003\u001aKG\u000e\\'ba\u0006qr-\u001a;DQ\u0006\u0014\u0018m\u0019;fe:\u000b%\t\\1oW\u0016$h)\u001b7m-\u0006dW/Z\u0001\u001dO\u0016$h*^7fe&\u001cg*\u0011\"mC:\\W\r\u001e$jY24\u0016\r\\;f\u000359W\r\u001e(b\r&dG.T8eK\u0006\u0001r\f\\1cK24\u0016\r\\5eCRLwN\\\u000b\u0003\u0005S\u00032\u0001\u001dBV\u0013\r\u0011i+\u001d\u0002\b\u0005>|G.Z1o\u0003QyF.\u00192fYZ\u000bG.\u001b3bi&|gn\u0018\u0013fcR!\u0011Q\u000bBZ\u0011%\tifRA\u0001\u0002\u0004\u0011I+A\t`Y\u0006\u0014W\r\u001c,bY&$\u0017\r^5p]\u0002\n\u0011\u0003\\1cK24\u0016\r\\5eCRLwN\\(o)\t\u0011i#\u0001\u0007d_:4XM\u001d;MC\n,G\u000eF\u0002��\u0005\u007fCaA!1K\u0001\u0004y\u0018A\u00013g\u0003=9W\r\u001e'bE\u0016d\u0017J\u001c3fq\u0016\u0014H\u0003\u0002Bd\u0005/\u0004BA!3\u0003T6\u0011!1\u001a\u0006\u0005\u0005\u001b\u0014y-A\u0004gK\u0006$XO]3\u000b\t\tE\u0017QE\u0001\u0003[2LAA!6\u0003L\ni1\u000b\u001e:j]\u001eLe\u000eZ3yKJDaA!1L\u0001\u0004y\u0018!\u0004:fM\u0006\u001cGo\u001c:MC\n,G\u000eF\u0003��\u0005;\u0014y\u000e\u0003\u0004\u0003B2\u0003\ra \u0005\b\u0005Cd\u0005\u0019AAN\u0003-a\u0017MY3m\u0007>dW/\u001c8\u0002!5,GO]5d\u0007>tg/\u001a:tS>tG\u0003BAN\u0005ODqA!;N\u0001\u0004\tY*\u0001\u0004nKR\u0014\u0018nY\u0001\u000bO\u0016$()\u0019;dQ\u0016\u001cH\u0003\u0002Bx\u0005c\u0004R\u0001]AL\u0005/AqAa=O\u0001\u0004\u00119\"A\u0003ji\u0016l7/\u0001\u000bhKR4\u0015.\u001a7eg\u0006sGMR5mY\u0006\u0014G.\u001a\u000b\b\u007f\ne(1 B��\u0011\u0019\u0011\tm\u0014a\u0001\u007f\"9!Q`(A\u0002\t]\u0011AC2pYVlg\u000eT5ti\"91\u0011A(A\u0002\u0005m\u0015AC:uCRL7\u000f^5dg\u0006y\u0011m]:f[\ndW\rU1zY>\fG\r\u0006\u0005\u0004\b\rU1qCB\u000e!\u0015\u0001\u0018qSB\u0005!\u001d\u000181BAN\u0007\u001fI1a!\u0004r\u0005\u0019!V\u000f\u001d7feA\u0019\u0001o!\u0005\n\u0007\rM\u0011OA\u0002B]fDaA!1Q\u0001\u0004y\bbBB\r!\u0002\u0007!qC\u0001\nM&,G\u000e\u001a'jgRDqa!\bQ\u0001\u0004\tY*A\bgS2$XM]\"p]\u0012LG/[8o\u0003Y9W\r^\"bi\u0016<wN]5dC24\u0015\u000e\u001c7UsB,G\u0003BB\u0012\u0007\u001b\u0002Ba!\n\u0004H9!1qEB!\u001d\u0011\u0019Ic!\u0010\u000f\t\r-21\b\b\u0005\u0007[\u0019ID\u0004\u0003\u00040\r]b\u0002BB\u0019\u0007kqA!a\u0003\u00044%\tA.\u0003\u0002kW&\u0011\u0001.[\u0005\u0003M\u001eL!\u0001_3\n\u0007\r}r/\u0001\u0006tiJ,8\r^;sKNLAaa\u0011\u0004F\u00059b)Z1ukJ,WI\\4j]\u0016,'/\u001b8h\u000b:,Xn\u001d\u0006\u0004\u0007\u007f9\u0018\u0002BB%\u0007\u0017\u0012qCR3biV\u0014X-\u00128hS:,WM]5oO\u0016sW/\\:\u000b\t\r\r3Q\t\u0005\b\u0005c\t\u0006\u0019AAN\u000399W\r\u001e\"p_2,\u0017M\u001c$jY2$\u0002ba\u0015\u0004X\re31\f\t\u0006a\u0006]5Q\u000b\t\ba\u000e-\u00111\u0014BU\u0011\u0019\u0011\tM\u0015a\u0001\u007f\"91\u0011\u0004*A\u0002\t]\u0001bBB\u000f%\u0002\u0007\u00111\u0014\u0015\u0006%\u000e}3Q\u000e\t\u0006a\n}4\u0011\r\t\u0005\u0007G\u001aI'\u0004\u0002\u0004f)\u00191qM3\u0002\u0015\u0015D8-\u001a9uS>t7/\u0003\u0003\u0004l\r\u0015$!\u0007\"p_2,\u0017M\u001c$jK2$g)\u001b7m\u000bb\u001cW\r\u001d;j_:\u001c#a!\u0019\u0002#A\f\u0017\u0010\\8bI\u0016CHO]1di&|g\u000e\u0006\u0003\u0004t\r}\u0004\u0003BB;\u0007wj!aa\u001e\u000b\u0007\reT-A\u0005j]\u001a,'/\u001a8dK&!1QPB<\u00055q\u0015MR5mYB\u000b\u0017\u0010\\8bI\"1!\u0011Y*A\u0002}\fQB\\;nKJL7-T1qa\u0016\u0014H\u0003BBC\u0007\u000f\u0003\u0002\"!(\u0002T\u0006m\u0015Q\u0017\u0005\b\u0007\u0013#\u0006\u0019AB\u0004\u0003\u001d\u0001\u0018-\u001f7pC\u0012\fqb\u00195be\u0006\u001cG/\u001a:NCB\u0004XM\u001d\u000b\u0005\u0003#\u001cy\tC\u0004\u0004\nV\u0003\raa\u0002\u0002\u0017\u0019LG\u000e\\'jgNLgn\u001a\u000b\u0005\u0007+\u001bY\n\u0005\u0003\u0004v\r]\u0015\u0002BBM\u0007o\u0012ABT1GS2d7i\u001c8gS\u001eDaA!1W\u0001\u0004y\u0018!\u00042mC:\\W\r\u001e(B\r&dG\u000e\u0006\u0003\u0004\u0016\u000e\u0005\u0006B\u0002Ba/\u0002\u0007q0A\ncY\u0006t7.\u001a;GS2d7\t[1s\u001f:d\u0017\u0010\u0006\u0003\u0004\u0016\u000e\u001d\u0006B\u0002Ba1\u0002\u0007q0\u0001\ncY\u0006t7.\u001a;GS2dg*^7P]2LH\u0003BBK\u0007[CaA!1Z\u0001\u0004y\u0018a\u0007<bY&$\u0017\r^3NCB\u001c6\r[3nC6+WNY3sg\"L\u0007\u000f\u0006\u0003\u0002V\rM\u0006B\u0002Ba5\u0002\u0007q\u0010K\u0003[\u0005{\u0012I\tK\u0003[\u0007s\u001b\t\rE\u0003q\u0005\u007f\u001aY\f\u0005\u0003\u0003\u001a\ru\u0016\u0002BB`\u0005K\u0011Q$\u00168tkB\u0004xN\u001d;fI>\u0003XM]1uS>tW\t_2faRLwN\\\u0012\u0003\u0007w\u000b\u0011\"\\1q\u001d\u00063\u0015\u000e\u001c7\u0015\t\rU5q\u0019\u0005\u0007\u0005\u0003\\\u0006\u0019A@\u0002\r\u0019LG\u000e\u001c(B)\u0011\u0019)j!4\t\r\t\u0005G\f1\u0001��\u0003-!WmY5eK6{G-\u001a7\u0015\u0005\u0005m\u0015!E4f]\u0016\u0014\u0018\r^3DY\u0016\fg\u000eR1uCRA1q[Bo\u0007C\u001c)\u000f\u0005\u0005q\u00073|8QSAN\u0013\r\u0019Y.\u001d\u0002\u0007)V\u0004H.Z\u001a\t\u0013\r}g\f%AA\u0002\rU\u0015\u0001\u00048b\r&dGnQ8oM&<\u0007\"CBr=B\u0005\t\u0019\u0001BU\u0003E\u0011XMZ1di>\u0014H*\u00192fY\u001ac\u0017m\u001a\u0005\n\u0007Ot\u0006\u0013!a\u0001\u00037\u000bA\u0002Z3dS\u0012,G-T8eK2\f1dZ3oKJ\fG/Z\"mK\u0006tG)\u0019;bI\u0011,g-Y;mi\u0012\nTCABwU\u0011\u0019)ja<,\u0005\rE\b\u0003BBz\u0007{l!a!>\u000b\t\r]8\u0011`\u0001\nk:\u001c\u0007.Z2lK\u0012T1aa?r\u0003)\tgN\\8uCRLwN\\\u0005\u0005\u0007\u007f\u001c)PA\tv]\u000eDWmY6fIZ\u000b'/[1oG\u0016\f1dZ3oKJ\fG/Z\"mK\u0006tG)\u0019;bI\u0011,g-Y;mi\u0012\u0012TC\u0001C\u0003U\u0011\u0011Ika<\u00027\u001d,g.\u001a:bi\u0016\u001cE.Z1o\t\u0006$\u0018\r\n3fM\u0006,H\u000e\u001e\u00134+\t!YA\u000b\u0003\u0002\u001c\u000e=\b")
/* loaded from: input_file:com/databricks/labs/automl/sanitize/DataSanitizer.class */
public class DataSanitizer implements DataValidation, SparkSessionWrapper {
    private final Dataset<Row> data;
    private String _labelCol;
    private String _featureCol;
    private String _numericFillStat;
    private String _characterFillStat;
    private int _modelSelectionDistinctThreshold;
    private String[] _fieldsToIgnoreInVector;
    private double _filterPrecision;
    private int _parallelism;
    private Map<String, String> _categoricalNAFillMap;
    private Map<String, Object> _numericNAFillMap;
    private String _characterNABlanketFill;
    private double _numericNABlanketFill;
    private String _naFillMode;
    private final List<String> _allowableNAFillModes;
    private boolean _labelValidation;
    private SparkSession spark;
    private SparkContext sc;
    private transient Logger com$databricks$labs$automl$utils$DataValidation$$logger;
    private volatile transient boolean bitmap$trans$0;
    private volatile byte bitmap$0;

    @Override // com.databricks.labs.automl.utils.DataValidation
    public List<String> _allowableDateTimeConversions() {
        List<String> _allowableDateTimeConversions;
        _allowableDateTimeConversions = _allowableDateTimeConversions();
        return _allowableDateTimeConversions;
    }

    @Override // com.databricks.labs.automl.utils.DataValidation
    public List<String> _allowableCategoricalFilterModes() {
        List<String> _allowableCategoricalFilterModes;
        _allowableCategoricalFilterModes = _allowableCategoricalFilterModes();
        return _allowableCategoricalFilterModes;
    }

    @Override // com.databricks.labs.automl.utils.DataValidation
    public List<String> _allowableCardinalilties() {
        List<String> _allowableCardinalilties;
        _allowableCardinalilties = _allowableCardinalilties();
        return _allowableCardinalilties;
    }

    @Override // com.databricks.labs.automl.utils.DataValidation
    public String invalidateSelection(String str, Seq<String> seq) {
        String invalidateSelection;
        invalidateSelection = invalidateSelection(str, seq);
        return invalidateSelection;
    }

    @Override // com.databricks.labs.automl.utils.DataValidation
    public Tuple2<OneHotEncoder, String[]> oneHotEncodeStrings(List<String> list) {
        Tuple2<OneHotEncoder, String[]> oneHotEncodeStrings;
        oneHotEncodeStrings = oneHotEncodeStrings(list);
        return oneHotEncodeStrings;
    }

    @Override // com.databricks.labs.automl.utils.DataValidation
    public Tuple2<StringIndexer[], String[]> indexStrings(List<String> list) {
        Tuple2<StringIndexer[], String[]> indexStrings;
        indexStrings = indexStrings(list);
        return indexStrings;
    }

    @Override // com.databricks.labs.automl.utils.DataValidation
    public Tuple2<Dataset<Row>, List<String>> convertDateAndTime(Dataset<Row> dataset, List<String> list, List<String> list2, String str) {
        Tuple2<Dataset<Row>, List<String>> convertDateAndTime;
        convertDateAndTime = convertDateAndTime(dataset, list, list2, str);
        return convertDateAndTime;
    }

    @Override // com.databricks.labs.automl.utils.DataValidation
    public Tuple3<StringIndexer[], String[], VectorAssembler> generateAssembly(List<String> list, List<String> list2, String str) {
        Tuple3<StringIndexer[], String[], VectorAssembler> generateAssembly;
        generateAssembly = generateAssembly(list, list2, str);
        return generateAssembly;
    }

    @Override // com.databricks.labs.automl.utils.DataValidation
    public void validateLabelAndFeatures(Dataset<Row> dataset, String str, String str2) {
        validateLabelAndFeatures(dataset, str, str2);
    }

    @Override // com.databricks.labs.automl.utils.DataValidation
    public void validateFieldPresence(Dataset<Row> dataset, String str) {
        validateFieldPresence(dataset, str);
    }

    @Override // com.databricks.labs.automl.utils.DataValidation
    public void validateInputDataframe(Dataset<Row> dataset) {
        validateInputDataframe(dataset);
    }

    @Override // com.databricks.labs.automl.utils.DataValidation
    public ValidatedCategoricalFields validateCardinality(Dataset<Row> dataset, List<String> list, int i, int i2) {
        ValidatedCategoricalFields validateCardinality;
        validateCardinality = validateCardinality(dataset, list, i, i2);
        return validateCardinality;
    }

    @Override // com.databricks.labs.automl.utils.DataValidation
    public int validateCardinality$default$3() {
        int validateCardinality$default$3;
        validateCardinality$default$3 = validateCardinality$default$3();
        return validateCardinality$default$3;
    }

    @Override // com.databricks.labs.automl.utils.DataValidation
    public int validateCardinality$default$4() {
        int validateCardinality$default$4;
        validateCardinality$default$4 = validateCardinality$default$4();
        return validateCardinality$default$4;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v0 */
    /* JADX WARN: Type inference failed for: r0v1, types: [java.lang.Throwable] */
    /* JADX WARN: Type inference failed for: r0v10, types: [com.databricks.labs.automl.sanitize.DataSanitizer] */
    private SparkSession spark$lzycompute() {
        SparkSession spark;
        ?? r0 = this;
        synchronized (r0) {
            if (((byte) (this.bitmap$0 & 1)) == 0) {
                spark = spark();
                this.spark = spark;
                r0 = this;
                r0.bitmap$0 = (byte) (this.bitmap$0 | 1);
            }
        }
        return this.spark;
    }

    @Override // com.databricks.labs.automl.utils.SparkSessionWrapper
    public SparkSession spark() {
        return ((byte) (this.bitmap$0 & 1)) == 0 ? spark$lzycompute() : this.spark;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v0 */
    /* JADX WARN: Type inference failed for: r0v1, types: [java.lang.Throwable] */
    /* JADX WARN: Type inference failed for: r0v10, types: [com.databricks.labs.automl.sanitize.DataSanitizer] */
    private SparkContext sc$lzycompute() {
        SparkContext sc;
        ?? r0 = this;
        synchronized (r0) {
            if (((byte) (this.bitmap$0 & 2)) == 0) {
                sc = sc();
                this.sc = sc;
                r0 = this;
                r0.bitmap$0 = (byte) (this.bitmap$0 | 2);
            }
        }
        return this.sc;
    }

    @Override // com.databricks.labs.automl.utils.SparkSessionWrapper
    public SparkContext sc() {
        return ((byte) (this.bitmap$0 & 2)) == 0 ? sc$lzycompute() : this.sc;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v0 */
    /* JADX WARN: Type inference failed for: r0v1, types: [java.lang.Throwable] */
    /* JADX WARN: Type inference failed for: r0v8, types: [com.databricks.labs.automl.sanitize.DataSanitizer] */
    private Logger com$databricks$labs$automl$utils$DataValidation$$logger$lzycompute() {
        Logger com$databricks$labs$automl$utils$DataValidation$$logger;
        ?? r0 = this;
        synchronized (r0) {
            if (!this.bitmap$trans$0) {
                com$databricks$labs$automl$utils$DataValidation$$logger = com$databricks$labs$automl$utils$DataValidation$$logger();
                this.com$databricks$labs$automl$utils$DataValidation$$logger = com$databricks$labs$automl$utils$DataValidation$$logger;
                r0 = this;
                r0.bitmap$trans$0 = true;
            }
        }
        return this.com$databricks$labs$automl$utils$DataValidation$$logger;
    }

    @Override // com.databricks.labs.automl.utils.DataValidation
    public Logger com$databricks$labs$automl$utils$DataValidation$$logger() {
        return !this.bitmap$trans$0 ? com$databricks$labs$automl$utils$DataValidation$$logger$lzycompute() : this.com$databricks$labs$automl$utils$DataValidation$$logger;
    }

    private String _labelCol() {
        return this._labelCol;
    }

    private void _labelCol_$eq(String str) {
        this._labelCol = str;
    }

    private String _featureCol() {
        return this._featureCol;
    }

    private void _featureCol_$eq(String str) {
        this._featureCol = str;
    }

    private String _numericFillStat() {
        return this._numericFillStat;
    }

    private void _numericFillStat_$eq(String str) {
        this._numericFillStat = str;
    }

    private String _characterFillStat() {
        return this._characterFillStat;
    }

    private void _characterFillStat_$eq(String str) {
        this._characterFillStat = str;
    }

    private int _modelSelectionDistinctThreshold() {
        return this._modelSelectionDistinctThreshold;
    }

    private void _modelSelectionDistinctThreshold_$eq(int i) {
        this._modelSelectionDistinctThreshold = i;
    }

    private String[] _fieldsToIgnoreInVector() {
        return this._fieldsToIgnoreInVector;
    }

    private void _fieldsToIgnoreInVector_$eq(String[] strArr) {
        this._fieldsToIgnoreInVector = strArr;
    }

    private double _filterPrecision() {
        return this._filterPrecision;
    }

    private void _filterPrecision_$eq(double d) {
        this._filterPrecision = d;
    }

    private int _parallelism() {
        return this._parallelism;
    }

    private void _parallelism_$eq(int i) {
        this._parallelism = i;
    }

    private Map<String, String> _categoricalNAFillMap() {
        return this._categoricalNAFillMap;
    }

    private void _categoricalNAFillMap_$eq(Map<String, String> map) {
        this._categoricalNAFillMap = map;
    }

    private Map<String, Object> _numericNAFillMap() {
        return this._numericNAFillMap;
    }

    private void _numericNAFillMap_$eq(Map<String, Object> map) {
        this._numericNAFillMap = map;
    }

    private String _characterNABlanketFill() {
        return this._characterNABlanketFill;
    }

    private void _characterNABlanketFill_$eq(String str) {
        this._characterNABlanketFill = str;
    }

    private double _numericNABlanketFill() {
        return this._numericNABlanketFill;
    }

    private void _numericNABlanketFill_$eq(double d) {
        this._numericNABlanketFill = d;
    }

    private String _naFillMode() {
        return this._naFillMode;
    }

    private void _naFillMode_$eq(String str) {
        this._naFillMode = str;
    }

    private final List<String> _allowableNAFillModes() {
        return this._allowableNAFillModes;
    }

    public DataSanitizer setLabelCol(String str) {
        _labelCol_$eq(str);
        return this;
    }

    public DataSanitizer setFeatureCol(String str) {
        _featureCol_$eq(str);
        return this;
    }

    public DataSanitizer setNumericFillStat(String str) {
        _numericFillStat_$eq(str);
        return this;
    }

    public DataSanitizer setCharacterFillStat(String str) {
        _characterFillStat_$eq(str);
        return this;
    }

    public DataSanitizer setModelSelectionDistinctThreshold(int i) {
        _modelSelectionDistinctThreshold_$eq(i);
        return this;
    }

    public DataSanitizer setFieldsToIgnoreInVector(String[] strArr) {
        _fieldsToIgnoreInVector_$eq(strArr);
        return this;
    }

    public DataSanitizer setParallelism(int i) {
        _parallelism_$eq(i);
        return this;
    }

    public DataSanitizer setFilterPrecision(double d) {
        if (d == 0.0d) {
            Predef$.MODULE$.println("Warning! Precision of 0 is an exact calculation of quantiles and may not be performant!");
        }
        _filterPrecision_$eq(d);
        return this;
    }

    public DataSanitizer setCategoricalNAFillMap(Map<String, String> map) {
        _categoricalNAFillMap_$eq(map);
        return this;
    }

    public DataSanitizer setNumericNAFillMap(Map<String, Object> map) {
        _numericNAFillMap_$eq(map);
        return this;
    }

    public DataSanitizer setCharacterNABlanketFillValue(String str) {
        _characterNABlanketFill_$eq(str);
        return this;
    }

    public DataSanitizer setNumericNABlanketFillValue(double d) {
        _numericNABlanketFill_$eq(d);
        return this;
    }

    public DataSanitizer setNAFillMode(String str) throws IllegalArgumentException {
        Predef$.MODULE$.require(_allowableNAFillModes().contains(str), () -> {
            return new StringBuilder(49).append("NA fill mode ").append(str).append(" is not supported. Must be one of : ").append(this._allowableNAFillModes().mkString(", ")).toString();
        });
        _naFillMode_$eq(str);
        return this;
    }

    public String getLabel() {
        return _labelCol();
    }

    public String getFeatureCol() {
        return _featureCol();
    }

    public String getNumericFillStat() {
        return _numericFillStat();
    }

    public String getCharacterFillStat() {
        return _characterFillStat();
    }

    public int getModelSelectionDistinctThreshold() {
        return _modelSelectionDistinctThreshold();
    }

    public String[] getFieldsToIgnoreInVector() {
        return _fieldsToIgnoreInVector();
    }

    public int getParallelism() {
        return _parallelism();
    }

    public double getFilterPrecision() {
        return _filterPrecision();
    }

    public Map<String, String> getCategoricalNAFillMap() {
        return _categoricalNAFillMap();
    }

    public Map<String, Object> getNumericNAFillMap() {
        return _numericNAFillMap();
    }

    public String getCharacterNABlanketFillValue() {
        return _characterNABlanketFill();
    }

    public double getNumericNABlanketFillValue() {
        return _numericNABlanketFill();
    }

    public String getNaFillMode() {
        return _naFillMode();
    }

    private boolean _labelValidation() {
        return this._labelValidation;
    }

    private void _labelValidation_$eq(boolean z) {
        this._labelValidation = z;
    }

    public DataSanitizer labelValidationOn() {
        _labelValidation_$eq(true);
        return this;
    }

    private Dataset<Row> convertLabel(Dataset<Row> dataset) {
        return getLabelIndexer(dataset).fit(this.data).transform(this.data).withColumn(_labelCol(), functions$.MODULE$.col(new StringBuilder(3).append(_labelCol()).append("_si").toString())).drop(new StringBuilder(3).append(_labelCol()).append("_si").toString());
    }

    public StringIndexer getLabelIndexer(Dataset<Row> dataset) {
        return new StringIndexer().setInputCol(_labelCol()).setOutputCol(new StringBuilder(3).append(_labelCol()).append("_si").toString());
    }

    private Dataset<Row> refactorLabel(Dataset<Row> dataset, String str) {
        new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(SchemaUtils$.MODULE$.extractSchema(dataset.schema()))).foreach(fieldDefinitions -> {
            DataSanitizer dataSanitizer;
            String fieldName = fieldDefinitions.fieldName();
            if (str != null ? !str.equals(fieldName) : fieldName != null) {
                dataSanitizer = None$.MODULE$;
            } else {
                DataType dataType = fieldDefinitions.dataType();
                dataSanitizer = StringType$.MODULE$.equals(dataType) ? this.labelValidationOn() : BooleanType$.MODULE$.equals(dataType) ? this.labelValidationOn() : BinaryType$.MODULE$.equals(dataType) ? this.labelValidationOn() : None$.MODULE$;
            }
            return dataSanitizer;
        });
        return _labelValidation() ? convertLabel(dataset) : dataset;
    }

    private String metricConversion(String str) {
        String[] strArr = {"min", "25p", "mean", "median", "75p", "max"};
        Predef$.MODULE$.assert(new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(strArr)).contains(str), () -> {
            return new StringBuilder(35).append("The metric supplied, '").append(str).append("' is not in: ").append(this.invalidateSelection(str, Predef$.MODULE$.wrapRefArray(strArr))).toString();
        });
        return "25p".equals(str) ? "25%" : "median".equals(str) ? "50%" : "75p".equals(str) ? "75%" : str;
    }

    private List<String>[] getBatches(List<String> list) {
        ArrayBuffer apply = ArrayBuffer$.MODULE$.apply(Nil$.MODULE$);
        int length = list.length() / _parallelism();
        RichInt$.MODULE$.to$extension0(Predef$.MODULE$.intWrapper(0), list.length()).by(length).foreach$mVc$sp(i -> {
            apply.append(Predef$.MODULE$.wrapRefArray(new List[]{list.slice(i, i + length)}));
        });
        return (List[]) apply.toArray(ClassTag$.MODULE$.apply(List.class));
    }

    private Dataset<Row> getFieldsAndFillable(Dataset<Row> dataset, List<String> list, String str) {
        Dataset<Row> select;
        Dataset cache = dataset.repartition(PerformanceSettings$.MODULE$.parTasks()).cache();
        cache.foreach(row -> {
            $anonfun$getFieldsAndFillable$1(row);
            return BoxedUnit.UNIT;
        });
        List list2 = (List) list.$plus$colon("Summary", List$.MODULE$.canBuildFrom());
        if (str.isEmpty()) {
            select = (Dataset) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(getBatches(list))).map(list3 -> {
                return cache.select((Seq) list3.map(str2 -> {
                    return functions$.MODULE$.col(str2);
                }, List$.MODULE$.canBuildFrom())).summary(Nil$.MODULE$).select((Seq) ((List) list3.$plus$colon("Summary", List$.MODULE$.canBuildFrom())).map(str3 -> {
                    return functions$.MODULE$.col(str3);
                }, List$.MODULE$.canBuildFrom()));
            }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Dataset.class))))).seq().toArray(ClassTag$.MODULE$.apply(Dataset.class)))).reduce((dataset2, dataset3) -> {
                return dataset2.join(functions$.MODULE$.broadcast(dataset3), Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new String[]{"Summary"})));
            });
        } else {
            select = cache.summary(Predef$.MODULE$.wrapRefArray(str.replaceAll(" ", "").split(","))).select((Seq) list2.map(str2 -> {
                return functions$.MODULE$.col(str2);
            }, List$.MODULE$.canBuildFrom()));
        }
        Dataset<Row> dataset4 = select;
        cache.unpersist(true);
        return dataset4;
    }

    private Tuple2<String, Object>[] assemblePayload(Dataset<Row> dataset, List<String> list, String str) {
        Dataset drop = getFieldsAndFillable(dataset, list, str).drop(functions$.MODULE$.col("Summary"));
        return (Tuple2[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(drop.columns())).zip(Predef$.MODULE$.genericWrapArray((Object[]) ((Row[]) drop.collect())[0].toSeq().toArray(ClassTag$.MODULE$.Any())), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class)));
    }

    private FeatureEngineeringEnums.FeatureEngineeringConstants getCategoricalFillType(String str) {
        FeatureEngineeringEnums.FeatureEngineeringConstants MAX;
        if ("min".equals(str)) {
            MAX = FeatureEngineeringEnums$.MODULE$.MIN();
        } else {
            if (!"max".equals(str)) {
                throw new MatchError(str);
            }
            MAX = FeatureEngineeringEnums$.MODULE$.MAX();
        }
        return MAX;
    }

    private Tuple2<String, Object>[] getBooleanFill(Dataset<Row> dataset, List<String> list, String str) throws BooleanFieldFillException {
        FeatureEngineeringEnums.FeatureEngineeringConstants categoricalFillType = getCategoricalFillType(str);
        return (Tuple2[]) ((TraversableOnce) list.map(str2 -> {
            Row[] rowArr;
            Dataset agg = dataset.select(str2, Predef$.MODULE$.wrapRefArray(new String[0])).groupBy(str2, Predef$.MODULE$.wrapRefArray(new String[0])).agg(functions$.MODULE$.count(functions$.MODULE$.col(str2)).alias(FeatureEngineeringEnums$.MODULE$.COUNT_COL().value()), Predef$.MODULE$.wrapRefArray(new Column[0]));
            FeatureEngineeringEnums.FeatureEngineeringConstants MIN = FeatureEngineeringEnums$.MODULE$.MIN();
            if (MIN != null ? !MIN.equals(categoricalFillType) : categoricalFillType != null) {
                FeatureEngineeringEnums.FeatureEngineeringConstants MAX = FeatureEngineeringEnums$.MODULE$.MAX();
                if (MAX != null ? !MAX.equals(categoricalFillType) : categoricalFillType != null) {
                    throw new BooleanFieldFillException(str2, str, FeatureEngineeringAllowables$.MODULE$.ALLOWED_CATEGORICAL_FILL_MODES().values(), BooleanFieldFillException$.MODULE$.apply$default$4());
                }
                rowArr = (Row[]) agg.orderBy(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col(FeatureEngineeringEnums$.MODULE$.COUNT_COL().value()).desc()})).head(1);
            } else {
                rowArr = (Row[]) agg.orderBy(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col(FeatureEngineeringEnums$.MODULE$.COUNT_COL().value()).asc()})).head(1);
            }
            return new Tuple2(str2, BoxesRunTime.boxToBoolean(((Row) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(rowArr)).head()).getBoolean(0)));
        }, List$.MODULE$.canBuildFrom())).toArray(ClassTag$.MODULE$.apply(Tuple2.class));
    }

    private NaFillPayload payloadExtraction(Dataset<Row> dataset) {
        FieldTypes extractTypes = SchemaUtils$.MODULE$.extractTypes(dataset, _labelCol(), _fieldsToIgnoreInVector());
        return new NaFillPayload(assemblePayload(dataset, extractTypes.categoricalFields(), metricConversion(_characterFillStat())), assemblePayload(dataset, extractTypes.numericFields(), metricConversion(_numericFillStat())), getBooleanFill(dataset, extractTypes.booleanFields(), metricConversion(_characterFillStat())));
    }

    private Map<String, Object> numericMapper(Tuple2<String, Object>[] tuple2Arr) {
        ArrayBuffer arrayBuffer = new ArrayBuffer();
        new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(tuple2Arr)).map(tuple2 -> {
            ArrayBuffer arrayBuffer2;
            ArrayBuffer arrayBuffer3;
            String str = (String) tuple2._1();
            Object _1 = tuple2._1();
            if (_1 != null ? _1.equals(str) : str == null) {
                Object _12 = tuple2._1();
                String _labelCol = this._labelCol();
                if (_12 != null ? !_12.equals(_labelCol) : _labelCol != null) {
                    try {
                        arrayBuffer3 = arrayBuffer.$plus$eq(new Tuple2(tuple2._1(), BoxesRunTime.boxToDouble(new StringOps(Predef$.MODULE$.augmentString(tuple2._2().toString())).toDouble())));
                    } catch (Exception unused) {
                        arrayBuffer3 = None$.MODULE$;
                    }
                    arrayBuffer2 = arrayBuffer3;
                    return arrayBuffer2;
                }
            }
            arrayBuffer2 = None$.MODULE$;
            return arrayBuffer2;
        }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Object.class)));
        return new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) arrayBuffer.toArray(ClassTag$.MODULE$.apply(Tuple2.class)))).toMap(Predef$.MODULE$.$conforms());
    }

    private Map<String, String> characterMapper(Tuple2<String, Object>[] tuple2Arr) {
        ArrayBuffer arrayBuffer = new ArrayBuffer();
        new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(tuple2Arr)).map(tuple2 -> {
            ArrayBuffer arrayBuffer2;
            ArrayBuffer arrayBuffer3;
            String str = (String) tuple2._1();
            Object _1 = tuple2._1();
            if (_1 != null ? _1.equals(str) : str == null) {
                Object _12 = tuple2._1();
                String _labelCol = this._labelCol();
                if (_12 != null ? !_12.equals(_labelCol) : _labelCol != null) {
                    try {
                        arrayBuffer3 = arrayBuffer.$plus$eq(new Tuple2(tuple2._1(), tuple2._2().toString()));
                    } catch (Exception unused) {
                        arrayBuffer3 = None$.MODULE$;
                    }
                    arrayBuffer2 = arrayBuffer3;
                    return arrayBuffer2;
                }
            }
            arrayBuffer2 = None$.MODULE$;
            return arrayBuffer2;
        }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Object.class)));
        return new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) arrayBuffer.toArray(ClassTag$.MODULE$.apply(Tuple2.class)))).toMap(Predef$.MODULE$.$conforms());
    }

    private NaFillConfig fillMissing(Dataset<Row> dataset) {
        NaFillPayload payloadExtraction = payloadExtraction(dataset);
        return new NaFillConfig(characterMapper(payloadExtraction.categorical()), numericMapper(payloadExtraction.numeric()), new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(payloadExtraction.m183boolean())).toMap(Predef$.MODULE$.$conforms()));
    }

    private NaFillConfig blanketNAFill(Dataset<Row> dataset) {
        FieldTypes extractTypes = SchemaUtils$.MODULE$.extractTypes(dataset, _labelCol(), _fieldsToIgnoreInVector());
        ArrayBuffer arrayBuffer = new ArrayBuffer();
        ArrayBuffer arrayBuffer2 = new ArrayBuffer();
        extractTypes.numericFields().foreach(str -> {
            return arrayBuffer2.$plus$eq(new Tuple2(str, BoxesRunTime.boxToDouble(this._numericNABlanketFill())));
        });
        extractTypes.categoricalFields().foreach(str2 -> {
            return arrayBuffer.$plus$eq(new Tuple2(str2, this._characterNABlanketFill()));
        });
        return new NaFillConfig(characterMapper((Tuple2[]) arrayBuffer.toArray(ClassTag$.MODULE$.apply(Tuple2.class))), numericMapper((Tuple2[]) arrayBuffer2.toArray(ClassTag$.MODULE$.apply(Tuple2.class))), ((TraversableOnce) extractTypes.booleanFields().map(str3 -> {
            return new Tuple2(str3, BoxesRunTime.boxToBoolean(false));
        }, List$.MODULE$.canBuildFrom())).toMap(Predef$.MODULE$.$conforms()));
    }

    private NaFillConfig blanketFillCharOnly(Dataset<Row> dataset) {
        NaFillConfig fillMissing = fillMissing(dataset);
        ArrayBuffer arrayBuffer = new ArrayBuffer();
        fillMissing.categoricalColumns().map(tuple2 -> {
            return arrayBuffer.$plus$eq(new Tuple2(tuple2._1(), this._characterNABlanketFill()));
        }, Iterable$.MODULE$.canBuildFrom());
        return new NaFillConfig(characterMapper((Tuple2[]) arrayBuffer.toArray(ClassTag$.MODULE$.apply(Tuple2.class))), fillMissing.numericColumns(), fillMissing.booleanColumns());
    }

    private NaFillConfig blanketFillNumOnly(Dataset<Row> dataset) {
        NaFillConfig fillMissing = fillMissing(dataset);
        ArrayBuffer arrayBuffer = new ArrayBuffer();
        fillMissing.numericColumns().map(tuple2 -> {
            return arrayBuffer.$plus$eq(new Tuple2(tuple2._1(), BoxesRunTime.boxToDouble(this._numericNABlanketFill())));
        }, Iterable$.MODULE$.canBuildFrom());
        return new NaFillConfig(fillMissing.categoricalColumns(), numericMapper((Tuple2[]) arrayBuffer.toArray(ClassTag$.MODULE$.apply(Tuple2.class))), fillMissing.booleanColumns());
    }

    private void validateMapSchemaMembership(Dataset<Row> dataset) throws UnsupportedOperationException, IllegalArgumentException {
        String[] names = dataset.schema().names();
        if (_numericNAFillMap().nonEmpty()) {
            _numericNAFillMap().keys().foreach(str -> {
                $anonfun$validateMapSchemaMembership$1(names, str);
                return BoxedUnit.UNIT;
            });
        }
        if (_categoricalNAFillMap().nonEmpty()) {
            _categoricalNAFillMap().keys().foreach(str2 -> {
                $anonfun$validateMapSchemaMembership$3(names, str2);
                return BoxedUnit.UNIT;
            });
        }
        if (_categoricalNAFillMap().isEmpty() && _numericNAFillMap().isEmpty()) {
            throw new UnsupportedOperationException(new StringBuilder(191).append("Map Fill mode has been defined for NA Fill but ").append("no map overrides have been specified.  Check configuration and ensure that either categoricalNAFillMap ").append("or numericNAFillMap values have been set.").toString());
        }
    }

    private NaFillConfig mapNAFill(Dataset<Row> dataset) {
        validateMapSchemaMembership(dataset);
        NaFillConfig fillMissing = fillMissing(dataset);
        ArrayBuffer arrayBuffer = new ArrayBuffer();
        ArrayBuffer arrayBuffer2 = new ArrayBuffer();
        fillMissing.categoricalColumns().map(tuple2 -> {
            ArrayBuffer $plus$eq;
            String str = (String) tuple2._1();
            Object _1 = tuple2._1();
            if (_1 != null ? _1.equals(str) : str == null) {
                if (this._categoricalNAFillMap().contains(tuple2._1())) {
                    $plus$eq = arrayBuffer2.$plus$eq(new Tuple2(tuple2._1(), ((String) this._categoricalNAFillMap().apply(tuple2._1())).toString()));
                    return $plus$eq;
                }
            }
            $plus$eq = arrayBuffer2.$plus$eq(tuple2);
            return $plus$eq;
        }, Iterable$.MODULE$.canBuildFrom());
        fillMissing.numericColumns().map(tuple22 -> {
            ArrayBuffer $plus$eq;
            String str = (String) tuple22._1();
            Object _1 = tuple22._1();
            if (_1 != null ? _1.equals(str) : str == null) {
                if (this._numericNAFillMap().contains(tuple22._1())) {
                    $plus$eq = arrayBuffer.$plus$eq(new Tuple2(tuple22._1(), BoxesRunTime.boxToDouble(new StringOps(Predef$.MODULE$.augmentString(this._numericNAFillMap().apply(tuple22._1()).toString())).toDouble())));
                    return $plus$eq;
                }
            }
            $plus$eq = arrayBuffer.$plus$eq(tuple22);
            return $plus$eq;
        }, Iterable$.MODULE$.canBuildFrom());
        return new NaFillConfig(characterMapper((Tuple2[]) arrayBuffer2.toArray(ClassTag$.MODULE$.apply(Tuple2.class))), numericMapper((Tuple2[]) arrayBuffer.toArray(ClassTag$.MODULE$.apply(Tuple2.class))), (Map) fillMissing.booleanColumns().map(tuple23 -> {
            return Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(tuple23._1()), BoxesRunTime.boxToBoolean(false));
        }, Map$.MODULE$.canBuildFrom()));
    }

    private NaFillConfig fillNA(Dataset<Row> dataset) {
        NaFillConfig mapNAFill;
        String _naFillMode = _naFillMode();
        if ("auto".equals(_naFillMode)) {
            mapNAFill = fillMissing(dataset);
        } else if ("blanketFillAll".equals(_naFillMode)) {
            mapNAFill = blanketNAFill(dataset);
        } else if ("blanketFillCharOnly".equals(_naFillMode)) {
            mapNAFill = blanketFillCharOnly(dataset);
        } else if ("blanketFillNumOnly".equals(_naFillMode)) {
            mapNAFill = blanketFillNumOnly(dataset);
        } else {
            if (!"mapFill".equals(_naFillMode)) {
                throw new UnsupportedOperationException(new StringBuilder(51).append("The naFill Mode ").append(_naFillMode()).append(" is not supported. ").append("Must be one of: ").append(_allowableNAFillModes().mkString(", ")).toString());
            }
            mapNAFill = mapNAFill(dataset);
        }
        return mapNAFill;
    }

    public String decideModel() {
        return ((long[]) this.data.select(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.approx_count_distinct(_labelCol(), _filterPrecision())})).rdd().map(row -> {
            return BoxesRunTime.boxToLong(row.getLong(0));
        }, ClassTag$.MODULE$.Long()).take(1))[0] <= ((long) _modelSelectionDistinctThreshold()) ? "classifier" : "regressor";
    }

    public Tuple3<Dataset<Row>, NaFillConfig, String> generateCleanData(NaFillConfig naFillConfig, boolean z, String str) {
        Dataset<Row> refactorLabel = z ? refactorLabel(this.data, _labelCol()) : this.data;
        NaFillConfig fillNA = naFillConfig == null ? fillNA(refactorLabel) : naFillConfig;
        Dataset df = refactorLabel.na().fill(fillNA.numericColumns()).na().fill(fillNA.categoricalColumns()).na().fill(fillNA.booleanColumns()).filter(functions$.MODULE$.col(_labelCol()).isNotNull()).filter(functions$.MODULE$.col(_labelCol()).isNaN().unary_$bang()).toDF();
        return (str == null || !new StringOps(Predef$.MODULE$.augmentString(str)).nonEmpty()) ? new Tuple3<>(df, fillNA, decideModel()) : new Tuple3<>(df, fillNA, str);
    }

    public NaFillConfig generateCleanData$default$1() {
        return null;
    }

    public boolean generateCleanData$default$2() {
        return true;
    }

    public String generateCleanData$default$3() {
        return "";
    }

    public static final /* synthetic */ void $anonfun$getFieldsAndFillable$1(Row row) {
    }

    public static final /* synthetic */ void $anonfun$validateMapSchemaMembership$1(String[] strArr, String str) {
        Predef$.MODULE$.require(new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(strArr)).contains(str), () -> {
            return new StringBuilder(86).append("Field ").append(str).append(" supplied in .setNumericNAFillMap() is not a valid column name in the DataFrame.").toString();
        });
    }

    public static final /* synthetic */ void $anonfun$validateMapSchemaMembership$3(String[] strArr, String str) {
        Predef$.MODULE$.require(new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(strArr)).contains(str), () -> {
            return new StringBuilder(90).append("Field ").append(str).append(" supplied in .setCategoricalNAFillMap() is not a valid column name in the DataFrame.").toString();
        });
    }

    public DataSanitizer(Dataset<Row> dataset) {
        this.data = dataset;
        DataValidation.$init$(this);
        SparkSessionWrapper.$init$(this);
        this._labelCol = "label";
        this._featureCol = "features";
        this._numericFillStat = "mean";
        this._characterFillStat = "max";
        this._modelSelectionDistinctThreshold = 10;
        this._fieldsToIgnoreInVector = (String[]) Array$.MODULE$.empty(ClassTag$.MODULE$.apply(String.class));
        this._filterPrecision = 0.01d;
        this._parallelism = 20;
        this._categoricalNAFillMap = Predef$.MODULE$.Map().empty();
        this._numericNAFillMap = Predef$.MODULE$.Map().empty();
        this._characterNABlanketFill = "";
        this._numericNABlanketFill = 0.0d;
        this._naFillMode = "auto";
        this._allowableNAFillModes = new $colon.colon("auto", new $colon.colon("mapFill", new $colon.colon("blanketFillAll", new $colon.colon("blanketFillCharOnly", new $colon.colon("blanketFillNumOnly", Nil$.MODULE$)))));
        this._labelValidation = false;
    }
}
