package com.databricks.labs.automl.pipeline;

import com.databricks.labs.automl.inference.NaFillConfig;
import com.databricks.labs.automl.sanitize.DataSanitizer;
import com.databricks.labs.automl.utils.AutoMlPipelineMlFlowUtils$;
import com.databricks.labs.automl.utils.SchemaUtils$;
import java.io.IOException;
import org.apache.spark.ml.param.BooleanParam;
import org.apache.spark.ml.param.DoubleArrayParam;
import org.apache.spark.ml.param.DoubleParam;
import org.apache.spark.ml.param.IntParam;
import org.apache.spark.ml.param.Param;
import org.apache.spark.ml.param.ParamMap;
import org.apache.spark.ml.param.StringArrayParam;
import org.apache.spark.ml.util.DefaultParamsWritable;
import org.apache.spark.ml.util.Identifiable$;
import org.apache.spark.ml.util.MLReader;
import org.apache.spark.ml.util.MLWritable;
import org.apache.spark.ml.util.MLWriter;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.StructType;
import scala.Array$;
import scala.MatchError;
import scala.None$;
import scala.Option;
import scala.Predef$;
import scala.Predef$ArrowAssoc$;
import scala.Some;
import scala.Tuple2;
import scala.Tuple3;
import scala.collection.immutable.Map;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayOps;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;

/* compiled from: DataSanitizerTransformer.scala */
@ScalaSignature(bytes = "\u0006\u0001\rUc\u0001\u00021b\u00011D!\"a\u0003\u0001\u0005\u000b\u0007I\u0011IA\u0007\u0011)\tI\u0003\u0001B\u0001B\u0003%\u0011q\u0002\u0005\b\u0003W\u0001A\u0011AA\u0017\u0011%\t\u0019\u0004\u0001b\u0001\n\u000b\t)\u0004\u0003\u0005\u0002D\u0001\u0001\u000bQBA\u001c\u0011%\t)\u0005\u0001b\u0001\n\u000b\t)\u0004\u0003\u0005\u0002H\u0001\u0001\u000bQBA\u001c\u0011%\tI\u0005\u0001b\u0001\n\u000b\tY\u0005\u0003\u0005\u0002T\u0001\u0001\u000bQBA'\u0011%\t)\u0006\u0001b\u0001\n\u000b\t9\u0006\u0003\u0005\u0002`\u0001\u0001\u000bQBA-\u0011%\t\t\u0007\u0001b\u0001\n\u000b\tY\u0005\u0003\u0005\u0002d\u0001\u0001\u000bQBA'\u0011%\t)\u0007\u0001b\u0001\n\u000b\t9\u0007\u0003\u0005\u0002p\u0001\u0001\u000bQBA5\u0011%\t\t\b\u0001b\u0001\n\u000b\t\u0019\b\u0003\u0005\u0002|\u0001\u0001\u000bQBA;\u0011%\ti\b\u0001b\u0001\n\u000b\t\u0019\b\u0003\u0005\u0002��\u0001\u0001\u000bQBA;\u0011%\t\t\t\u0001b\u0001\n\u000b\t\u0019\b\u0003\u0005\u0002\u0004\u0002\u0001\u000bQBA;\u0011%\t)\t\u0001b\u0001\n\u000b\t9\t\u0003\u0005\u0002\u0010\u0002\u0001\u000bQBAE\u0011%\t\t\n\u0001b\u0001\n\u000b\t\u0019\b\u0003\u0005\u0002\u0014\u0002\u0001\u000bQBA;\u0011%\t)\n\u0001b\u0001\n\u000b\t\u0019\b\u0003\u0005\u0002\u0018\u0002\u0001\u000bQBA;\u0011%\tI\n\u0001b\u0001\n\u000b\t)\u0004\u0003\u0005\u0002\u001c\u0002\u0001\u000bQBA\u001c\u0011%\ti\n\u0001b\u0001\n\u000b\t)\u0004\u0003\u0005\u0002 \u0002\u0001\u000bQBA\u001c\u0011%\t\t\u000b\u0001b\u0001\n\u000b\t)\u0004\u0003\u0005\u0002$\u0002\u0001\u000bQBA\u001c\u0011%\t)\u000b\u0001b\u0001\n\u000b\t9\u0006\u0003\u0005\u0002(\u0002\u0001\u000bQBA-\u0011%\tI\u000b\u0001b\u0001\n\u000b\t\u0019\b\u0003\u0005\u0002,\u0002\u0001\u000bQBA;\u0011%\ti\u000b\u0001b\u0001\n\u000b\t\u0019\b\u0003\u0005\u00020\u0002\u0001\u000bQBA;\u0011%\t\t\f\u0001b\u0001\n\u000b\t\u0019\b\u0003\u0005\u00024\u0002\u0001\u000bQBA;\u0011%\t)\f\u0001b\u0001\n\u000b\t9\t\u0003\u0005\u00028\u0002\u0001\u000bQBAE\u0011\u001d\tI\f\u0001C\u0001\u0003wCq!a1\u0001\t\u0003\ti\u0001C\u0004\u0002F\u0002!\t!a2\t\u000f\u0005-\u0007\u0001\"\u0001\u0002\u000e!9\u0011Q\u001a\u0001\u0005\u0002\u0005=\u0007bBAn\u0001\u0011\u0005\u0011Q\u001c\u0005\b\u0003?\u0004A\u0011AAq\u0011\u001d\tY\u000f\u0001C\u0001\u0003[Dq!a<\u0001\t\u0003\t\t\u0010C\u0004\u0002v\u0002!\t!!8\t\u000f\u0005]\b\u0001\"\u0001\u0002z\"9!1\u0001\u0001\u0005\u0002\t\u0015\u0001b\u0002B\u0004\u0001\u0011\u0005!\u0011\u0002\u0005\b\u0005'\u0001A\u0011\u0001B\u000b\u0011\u001d\u00119\u0002\u0001C\u0001\u00053AqA!\b\u0001\t\u0003\u0011)\u0002C\u0004\u0003 \u0001!\tA!\t\t\u000f\t\u0015\u0002\u0001\"\u0001\u0003\u0016!9!q\u0005\u0001\u0005\u0002\t%\u0002b\u0002B\u0017\u0001\u0011\u0005!Q\u0003\u0005\b\u0005_\u0001A\u0011\u0001B\u0019\u0011\u001d\u00119\u0004\u0001C\u0001\u0005sAqAa\u000f\u0001\t\u0003\u0011i\u0004C\u0004\u0003D\u0001!\tA!\u0012\t\u000f\t\u001d\u0003\u0001\"\u0001\u0003J!9!Q\n\u0001\u0005\u0002\u00055\u0001b\u0002B(\u0001\u0011\u0005!\u0011\u000b\u0005\b\u0005+\u0002A\u0011AA\u0007\u0011\u001d\u00119\u0006\u0001C\u0001\u00053BqA!\u0018\u0001\t\u0003\ti\u0001C\u0004\u0003`\u0001!\tA!\u0019\t\u000f\t\u0015\u0004\u0001\"\u0001\u0002n\"9!q\r\u0001\u0005\u0002\t%\u0004b\u0002B7\u0001\u0011\u0005!Q\u0003\u0005\b\u0005_\u0002A\u0011\u0001B9\u0011\u001d\u0011)\b\u0001C\u0001\u0005+AqAa\u001e\u0001\t\u0003\u0011I\bC\u0004\u0003~\u0001!\tA!\u0006\t\u000f\t}\u0004\u0001\"\u0001\u0003\u0002\"9!Q\u0011\u0001\u0005\u0002\t\u0015\u0003b\u0002BD\u0001\u0011\u0005!\u0011\u0012\u0005\b\u0005'\u0003A\u0011\u0001BK\u0011\u001d\tY\u0003\u0001C\u0001\u00057CqA!(\u0001\t\u0003\u0012y\nC\u0004\u0003h\u0002!IA!;\t\u000f\tu\b\u0001\"\u0011\u0003��\"91\u0011\u0003\u0001\u0005B\rMqaBB\u0010C\"\u00051\u0011\u0005\u0004\u0007A\u0006D\taa\t\t\u000f\u0005-B\f\"\u0001\u00048!91\u0011\b/\u0005B\rm\u0002\"CB!9\u0006\u0005I\u0011BB\"\u0005a!\u0015\r^1TC:LG/\u001b>feR\u0013\u0018M\\:g_JlWM\u001d\u0006\u0003E\u000e\f\u0001\u0002]5qK2Lg.\u001a\u0006\u0003I\u0016\fa!Y;u_6d'B\u00014h\u0003\u0011a\u0017MY:\u000b\u0005!L\u0017A\u00033bi\u0006\u0014'/[2lg*\t!.A\u0002d_6\u001c\u0001a\u0005\u0004\u0001[F|\u0018Q\u0001\t\u0003]>l\u0011!Y\u0005\u0003a\u0006\u00141#\u00112tiJ\f7\r\u001e+sC:\u001chm\u001c:nKJ\u0004\"A]?\u000e\u0003MT!\u0001^;\u0002\tU$\u0018\u000e\u001c\u0006\u0003m^\f!!\u001c7\u000b\u0005aL\u0018!B:qCJ\\'B\u0001>|\u0003\u0019\t\u0007/Y2iK*\tA0A\u0002pe\u001eL!A`:\u0003+\u0011+g-Y;miB\u000b'/Y7t/JLG/\u00192mKB\u0019a.!\u0001\n\u0007\u0005\r\u0011M\u0001\bICNd\u0015MY3m\u0007>dW/\u001c8\u0011\u00079\f9!C\u0002\u0002\n\u0005\u0014\u0001\u0003S1t\r\u0016\fG/\u001e:f\u0007>dW/\u001c8\u0002\u0007ULG-\u0006\u0002\u0002\u0010A!\u0011\u0011CA\u0012\u001d\u0011\t\u0019\"a\b\u0011\t\u0005U\u00111D\u0007\u0003\u0003/Q1!!\u0007l\u0003\u0019a$o\\8u})\u0011\u0011QD\u0001\u0006g\u000e\fG.Y\u0005\u0005\u0003C\tY\"\u0001\u0004Qe\u0016$WMZ\u0005\u0005\u0003K\t9C\u0001\u0004TiJLgn\u001a\u0006\u0005\u0003C\tY\"\u0001\u0003vS\u0012\u0004\u0013A\u0002\u001fj]&$h\b\u0006\u0003\u00020\u0005E\u0002C\u00018\u0001\u0011\u001d\tYa\u0001a\u0001\u0003\u001f\tqB\\;nKJL7MR5mYN#\u0018\r^\u000b\u0003\u0003o\u0001b!!\u000f\u0002@\u0005=QBAA\u001e\u0015\r\ti$^\u0001\u0006a\u0006\u0014\u0018-\\\u0005\u0005\u0003\u0003\nYDA\u0003QCJ\fW.\u0001\tok6,'/[2GS2d7\u000b^1uA\u0005\t2\r[1sC\u000e$XM\u001d$jY2\u001cF/\u0019;\u0002%\rD\u0017M]1di\u0016\u0014h)\u001b7m'R\fG\u000fI\u0001 [>$W\r\\*fY\u0016\u001cG/[8o\t&\u001cH/\u001b8diRC'/Z:i_2$WCAA'!\u0011\tI$a\u0014\n\t\u0005E\u00131\b\u0002\t\u0013:$\b+\u0019:b[\u0006\u0001Sn\u001c3fYN+G.Z2uS>tG)[:uS:\u001cG\u000f\u00165sKNDw\u000e\u001c3!\u0003=1\u0017\u000e\u001c;feB\u0013XmY5tS>tWCAA-!\u0011\tI$a\u0017\n\t\u0005u\u00131\b\u0002\f\t>,(\r\\3QCJ\fW.\u0001\tgS2$XM\u001d)sK\u000eL7/[8oA\u0005Y\u0001/\u0019:bY2,G.[:n\u00031\u0001\u0018M]1mY\u0016d\u0017n]7!\u0003)q\u0017MR5mY\u001ac\u0017mZ\u000b\u0003\u0003S\u0002B!!\u000f\u0002l%!\u0011QNA\u001e\u00051\u0011un\u001c7fC:\u0004\u0016M]1n\u0003-q\u0017MR5mY\u001ac\u0017m\u001a\u0011\u0002-\r\fG/Z4pe&\u001c\u0017\r\\\"pYVlgNT1nKN,\"!!\u001e\u0011\t\u0005e\u0012qO\u0005\u0005\u0003s\nYD\u0001\tTiJLgnZ!se\u0006L\b+\u0019:b[\u000692-\u0019;fO>\u0014\u0018nY1m\u0007>dW/\u001c8OC6,7\u000fI\u0001\u0018G\u0006$XmZ8sS\u000e\fGnQ8mk6tg+\u00197vKN\f\u0001dY1uK\u001e|'/[2bY\u000e{G.^7o-\u0006dW/Z:!\u0003IqW/\\3sS\u000e\u001cu\u000e\\;n]:\u000bW.Z:\u0002'9,X.\u001a:jG\u000e{G.^7o\u001d\u0006lWm\u001d\u0011\u0002'9,X.\u001a:jG\u000e{G.^7o-\u0006dW/Z:\u0016\u0005\u0005%\u0005\u0003BA\u001d\u0003\u0017KA!!$\u0002<\t\u0001Bi\\;cY\u0016\f%O]1z!\u0006\u0014\u0018-\\\u0001\u0015]VlWM]5d\u0007>dW/\u001c8WC2,Xm\u001d\u0011\u0002%\t|w\u000e\\3b]\u000e{G.^7o\u001d\u0006lWm]\u0001\u0014E>|G.Z1o\u0007>dW/\u001c8OC6,7\u000fI\u0001\u0014E>|G.Z1o\u0007>dW/\u001c8WC2,Xm]\u0001\u0015E>|G.Z1o\u0007>dW/\u001c8WC2,Xm\u001d\u0011\u0002\u0017\u0011,7-\u001b3f\u001b>$W\r\\\u0001\rI\u0016\u001c\u0017\u000eZ3N_\u0012,G\u000eI\u0001\tM&dG.T8eK\u0006Ia-\u001b7m\u001b>$W\rI\u0001\u0017G\"\f'/Y2uKJt\u0015I\u00117b].,GOR5mY\u000692\r[1sC\u000e$XM\u001d(B\u00052\fgn[3u\r&dG\u000eI\u0001\u0015]VlWM]5d\u001d\u0006\u0013E.\u00198lKR4\u0015\u000e\u001c7\u0002+9,X.\u001a:jG:\u000b%\t\\1oW\u0016$h)\u001b7mA\u0005A2-\u0019;fO>\u0014\u0018nY1m\u001d\u00063\u0015\u000e\u001c7NCB\\U-_:\u00023\r\fG/Z4pe&\u001c\u0017\r\u001c(B\r&dG.T1q\u0017\u0016L8\u000fI\u0001\u001bG\u0006$XmZ8sS\u000e\fGNT!GS2dW*\u00199WC2,Xm]\u0001\u001cG\u0006$XmZ8sS\u000e\fGNT!GS2dW*\u00199WC2,Xm\u001d\u0011\u0002)9,X.\u001a:jG:\u000be)\u001b7m\u001b\u0006\u00048*Z=t\u0003UqW/\\3sS\u000et\u0015IR5mY6\u000b\u0007oS3zg\u0002\naC\\;nKJL7MT!GS2dW*\u00199WC2,Xm]\u0001\u0018]VlWM]5d\u001d\u00063\u0015\u000e\u001c7NCB4\u0016\r\\;fg\u0002\n!c]3u\u001dVlWM]5d\r&dGn\u0015;biR!\u0011QXA`\u001b\u0005\u0001\u0001bBAaY\u0001\u0007\u0011qB\u0001\u0006m\u0006dW/Z\u0001\u0013O\u0016$h*^7fe&\u001cg)\u001b7m'R\fG/\u0001\u000btKR\u001c\u0005.\u0019:bGR,'OR5mYN#\u0018\r\u001e\u000b\u0005\u0003{\u000bI\rC\u0004\u0002B:\u0002\r!a\u0004\u0002)\u001d,Go\u00115be\u0006\u001cG/\u001a:GS2d7\u000b^1u\u0003\t\u001aX\r^'pI\u0016d7+\u001a7fGRLwN\u001c#jgRLgn\u0019;UQJ,7\u000f[8mIR!\u0011QXAi\u0011\u001d\t\t\r\ra\u0001\u0003'\u0004B!!6\u0002X6\u0011\u00111D\u0005\u0005\u00033\fYBA\u0002J]R\f!eZ3u\u001b>$W\r\\*fY\u0016\u001cG/[8o\t&\u001cH/\u001b8diRC'/Z:i_2$WCAAj\u0003I\u0019X\r\u001e$jYR,'\u000f\u0015:fG&\u001c\u0018n\u001c8\u0015\t\u0005u\u00161\u001d\u0005\b\u0003\u0003\u0014\u0004\u0019AAs!\u0011\t).a:\n\t\u0005%\u00181\u0004\u0002\u0007\t>,(\r\\3\u0002%\u001d,GOR5mi\u0016\u0014\bK]3dSNLwN\\\u000b\u0003\u0003K\fab]3u!\u0006\u0014\u0018\r\u001c7fY&\u001cX\u000e\u0006\u0003\u0002>\u0006M\bbBAai\u0001\u0007\u00111[\u0001\u000fO\u0016$\b+\u0019:bY2,G.[:n\u00035\u0019X\r\u001e(b\r&dGN\u00127bOR!\u0011QXA~\u0011\u001d\t\tM\u000ea\u0001\u0003{\u0004B!!6\u0002��&!!\u0011AA\u000e\u0005\u001d\u0011un\u001c7fC:\fQbZ3u\u001d\u00064\u0015\u000e\u001c7GY\u0006<WCAA\u007f\u0003e\u0019X\r^\"bi\u0016<wN]5dC2\u001cu\u000e\\;n]:\u000bW.Z:\u0015\t\u0005u&1\u0002\u0005\b\u0003\u0003D\u0004\u0019\u0001B\u0007!\u0019\t)Na\u0004\u0002\u0010%!!\u0011CA\u000e\u0005\u0015\t%O]1z\u0003e9W\r^\"bi\u0016<wN]5dC2\u001cu\u000e\\;n]:\u000bW.Z:\u0016\u0005\t5\u0011AG:fi\u000e\u000bG/Z4pe&\u001c\u0017\r\\\"pYVlgNV1mk\u0016\u001cH\u0003BA_\u00057Aq!!1;\u0001\u0004\u0011i!\u0001\u000ehKR\u001c\u0015\r^3h_JL7-\u00197D_2,XN\u001c,bYV,7/A\u000btKRtU/\\3sS\u000e\u001cu\u000e\\;n]:\u000bW.Z:\u0015\t\u0005u&1\u0005\u0005\b\u0003\u0003d\u0004\u0019\u0001B\u0007\u0003U9W\r\u001e(v[\u0016\u0014\u0018nY\"pYVlgNT1nKN\fQc]3u\u0005>|G.Z1o\u0007>dW/\u001c8OC6,7\u000f\u0006\u0003\u0002>\n-\u0002bBAa}\u0001\u0007!QB\u0001\u0016O\u0016$(i\\8mK\u0006t7i\u001c7v[:t\u0015-\\3t\u0003Y\u0019X\r\u001e\"p_2,\u0017M\\\"pYVlgNV1mk\u0016\u001cH\u0003BA_\u0005gAq!!1A\u0001\u0004\u0011)\u0004\u0005\u0004\u0002V\n=\u0011Q`\u0001\u0017O\u0016$(i\\8mK\u0006t7i\u001c7v[:4\u0016\r\\;fgV\u0011!QG\u0001\u0017g\u0016$h*^7fe&\u001c7i\u001c7v[:4\u0016\r\\;fgR!\u0011Q\u0018B \u0011\u001d\t\tM\u0011a\u0001\u0005\u0003\u0002b!!6\u0003\u0010\u0005\u0015\u0018AF4fi:+X.\u001a:jG\u000e{G.^7o-\u0006dW/Z:\u0016\u0005\t\u0005\u0013AD:fi\u0012+7-\u001b3f\u001b>$W\r\u001c\u000b\u0005\u0003{\u0013Y\u0005C\u0004\u0002B\u0012\u0003\r!a\u0004\u0002\u001d\u001d,G\u000fR3dS\u0012,Wj\u001c3fY\u0006Y1/\u001a;GS2dWj\u001c3f)\u0011\tiLa\u0015\t\u000f\u0005\u0005g\t1\u0001\u0002\u0010\u0005Yq-\u001a;GS2dWj\u001c3f\u0003e\u0019X\r^\"iCJ\f7\r^3s\u001d\u0006\u0013E.\u00198lKR4\u0015\u000e\u001c7\u0015\t\u0005u&1\f\u0005\b\u0003\u0003D\u0005\u0019AA\b\u0003e9W\r^\"iCJ\f7\r^3s\u001d\u0006\u0013E.\u00198lKR4\u0015\u000e\u001c7\u0002/M,GOT;nKJL7MT!CY\u0006t7.\u001a;GS2dG\u0003BA_\u0005GBq!!1K\u0001\u0004\t)/A\fhKRtU/\\3sS\u000et\u0015I\u00117b].,GOR5mY\u0006Y2/\u001a;DCR,wm\u001c:jG\u0006dg*\u0011$jY2l\u0015\r]&fsN$B!!0\u0003l!9\u0011\u0011\u0019'A\u0002\t5\u0011aG4fi\u000e\u000bG/Z4pe&\u001c\u0017\r\u001c(B\r&dG.T1q\u0017\u0016L8/A\u000ftKR\u001c\u0015\r^3h_JL7-\u00197O\u0003\u001aKG\u000e\\'baZ\u000bG.^3t)\u0011\tiLa\u001d\t\u000f\u0005\u0005g\n1\u0001\u0003\u000e\u0005ir-\u001a;DCR,wm\u001c:jG\u0006dg*\u0011$jY2l\u0015\r\u001d,bYV,7/A\ftKRtU/\\3sS\u000et\u0015IR5mY6\u000b\u0007oS3zgR!\u0011Q\u0018B>\u0011\u001d\t\t\r\u0015a\u0001\u0005\u001b\tqcZ3u\u001dVlWM]5d\u001d\u00063\u0015\u000e\u001c7NCB\\U-_:\u00023M,GOT;nKJL7MT!GS2dW*\u00199WC2,Xm\u001d\u000b\u0005\u0003{\u0013\u0019\tC\u0004\u0002BJ\u0003\rA!\u0011\u00023\u001d,GOT;nKJL7MT!GS2dW*\u00199WC2,Xm]\u0001\u0018g\u0016$8)\u0019;fO>\u0014\u0018nY1m\u001d\u00063\u0015\u000e\u001c7NCB$B!!0\u0003\f\"9\u0011\u0011\u0019+A\u0002\t5\u0005\u0003CA\t\u0005\u001f\u000by!a\u0004\n\t\tE\u0015q\u0005\u0002\u0004\u001b\u0006\u0004\u0018aE:fi:+X.\u001a:jG:\u000be)\u001b7m\u001b\u0006\u0004H\u0003BA_\u0005/Cq!!1V\u0001\u0004\u0011I\n\u0005\u0005\u0002\u0012\t=\u0015qBAs)\t\ty#A\tue\u0006t7OZ8s[&sG/\u001a:oC2$BA!)\u0003DB!!1\u0015B_\u001d\u0011\u0011)Ka.\u000f\t\t\u001d&1\u0017\b\u0005\u0005S\u0013\tL\u0004\u0003\u0003,\n=f\u0002BA\u000b\u0005[K\u0011\u0001`\u0005\u0003unL!\u0001_=\n\u0007\tUv/A\u0002tc2LAA!/\u0003<\u00069\u0001/Y2lC\u001e,'b\u0001B[o&!!q\u0018Ba\u0005%!\u0015\r^1Ge\u0006lWM\u0003\u0003\u0003:\nm\u0006b\u0002Bc/\u0002\u0007!qY\u0001\bI\u0006$\u0018m]3ua\u0011\u0011IM!6\u0011\r\t-'Q\u001aBi\u001b\t\u0011Y,\u0003\u0003\u0003P\nm&a\u0002#bi\u0006\u001cX\r\u001e\t\u0005\u0005'\u0014)\u000e\u0004\u0001\u0005\u0019\t]'1YA\u0001\u0002\u0003\u0015\tA!7\u0003\u0007}#\u0013'\u0005\u0003\u0003\\\n\u0005\b\u0003BAk\u0005;LAAa8\u0002\u001c\t9aj\u001c;iS:<\u0007\u0003BAk\u0005GLAA!:\u0002\u001c\t\u0019\u0011I\\=\u0002\u001b\t,\u0018\u000e\u001c3OC\u000e{gNZ5h)\t\u0011Y\u000f\u0005\u0004\u0002V\n5(\u0011_\u0005\u0005\u0005_\fYB\u0001\u0004PaRLwN\u001c\t\u0005\u0005g\u0014I0\u0004\u0002\u0003v*\u0019!q_2\u0002\u0013%tg-\u001a:f]\u000e,\u0017\u0002\u0002B~\u0005k\u0014ABT1GS2d7i\u001c8gS\u001e\fq\u0003\u001e:b]N4wN]7TG\",W.Y%oi\u0016\u0014h.\u00197\u0015\t\r\u00051Q\u0002\t\u0005\u0007\u0007\u0019I!\u0004\u0002\u0004\u0006)!1q\u0001B^\u0003\u0015!\u0018\u0010]3t\u0013\u0011\u0019Ya!\u0002\u0003\u0015M#(/^2u)f\u0004X\rC\u0004\u0004\u0010e\u0003\ra!\u0001\u0002\rM\u001c\u0007.Z7b\u0003\u0011\u0019w\u000e]=\u0015\t\u0005=2Q\u0003\u0005\b\u0007/Q\u0006\u0019AB\r\u0003\u0015)\u0007\u0010\u001e:b!\u0011\tIda\u0007\n\t\ru\u00111\b\u0002\t!\u0006\u0014\u0018-\\'ba\u0006AB)\u0019;b'\u0006t\u0017\u000e^5{KJ$&/\u00198tM>\u0014X.\u001a:\u0011\u00059d6c\u0002/\u0004&\r-2\u0011\u0007\t\u0005\u0003+\u001c9#\u0003\u0003\u0004*\u0005m!AB!osJ+g\rE\u0003s\u0007[\ty#C\u0002\u00040M\u0014Q\u0003R3gCVdG\u000fU1sC6\u001c(+Z1eC\ndW\r\u0005\u0003\u0002V\u000eM\u0012\u0002BB\u001b\u00037\u0011AbU3sS\u0006d\u0017N_1cY\u0016$\"a!\t\u0002\t1|\u0017\r\u001a\u000b\u0005\u0003_\u0019i\u0004C\u0004\u0004@y\u0003\r!a\u0004\u0002\tA\fG\u000f[\u0001\fe\u0016\fGMU3t_24X\r\u0006\u0002\u0004FA!1qIB)\u001b\t\u0019IE\u0003\u0003\u0004L\r5\u0013\u0001\u00027b]\u001eT!aa\u0014\u0002\t)\fg/Y\u0005\u0005\u0007'\u001aIE\u0001\u0004PE*,7\r\u001e")
/* loaded from: input_file:com/databricks/labs/automl/pipeline/DataSanitizerTransformer.class */
public class DataSanitizerTransformer extends AbstractTransformer implements DefaultParamsWritable, HasLabelColumn, HasFeatureColumn {
    private final String uid;
    private final Param<String> numericFillStat;
    private final Param<String> characterFillStat;
    private final IntParam modelSelectionDistinctThreshold;
    private final DoubleParam filterPrecision;
    private final IntParam parallelism;
    private final BooleanParam naFillFlag;
    private final StringArrayParam categoricalColumnNames;
    private final StringArrayParam categoricalColumnValues;
    private final StringArrayParam numericColumnNames;
    private final DoubleArrayParam numericColumnValues;
    private final StringArrayParam booleanColumnNames;
    private final StringArrayParam booleanColumnValues;
    private final Param<String> decideModel;
    private final Param<String> fillMode;
    private final Param<String> characterNABlanketFill;
    private final DoubleParam numericNABlanketFill;
    private final StringArrayParam categoricalNAFillMapKeys;
    private final StringArrayParam categoricalNAFillMapValues;
    private final StringArrayParam numericNAFillMapKeys;
    private final DoubleArrayParam numericNAFillMapValues;
    private final Param<String> featureCol;
    private final Param<String> labelColumn;

    public static DataSanitizerTransformer load(String str) {
        return DataSanitizerTransformer$.MODULE$.m331load(str);
    }

    public static MLReader<DataSanitizerTransformer> read() {
        return DataSanitizerTransformer$.MODULE$.read();
    }

    @Override // com.databricks.labs.automl.pipeline.HasFeatureColumn
    public HasFeatureColumn setFeatureCol(String str) {
        HasFeatureColumn featureCol;
        featureCol = setFeatureCol(str);
        return featureCol;
    }

    @Override // com.databricks.labs.automl.pipeline.HasFeatureColumn
    public String getFeatureCol() {
        String featureCol;
        featureCol = getFeatureCol();
        return featureCol;
    }

    @Override // com.databricks.labs.automl.pipeline.HasLabelColumn
    public HasLabelColumn setLabelColumn(String str) {
        HasLabelColumn labelColumn;
        labelColumn = setLabelColumn(str);
        return labelColumn;
    }

    @Override // com.databricks.labs.automl.pipeline.HasLabelColumn
    public String getLabelColumn() {
        String labelColumn;
        labelColumn = getLabelColumn();
        return labelColumn;
    }

    public MLWriter write() {
        return DefaultParamsWritable.write$(this);
    }

    public void save(String str) throws IOException {
        MLWritable.save$(this, str);
    }

    @Override // com.databricks.labs.automl.pipeline.HasFeatureColumn
    public final Param<String> featureCol() {
        return this.featureCol;
    }

    @Override // com.databricks.labs.automl.pipeline.HasFeatureColumn
    public final void com$databricks$labs$automl$pipeline$HasFeatureColumn$_setter_$featureCol_$eq(Param<String> param) {
        this.featureCol = param;
    }

    @Override // com.databricks.labs.automl.pipeline.HasLabelColumn
    public final Param<String> labelColumn() {
        return this.labelColumn;
    }

    @Override // com.databricks.labs.automl.pipeline.HasLabelColumn
    public final void com$databricks$labs$automl$pipeline$HasLabelColumn$_setter_$labelColumn_$eq(Param<String> param) {
        this.labelColumn = param;
    }

    public String uid() {
        return this.uid;
    }

    public final Param<String> numericFillStat() {
        return this.numericFillStat;
    }

    public final Param<String> characterFillStat() {
        return this.characterFillStat;
    }

    public final IntParam modelSelectionDistinctThreshold() {
        return this.modelSelectionDistinctThreshold;
    }

    public final DoubleParam filterPrecision() {
        return this.filterPrecision;
    }

    public final IntParam parallelism() {
        return this.parallelism;
    }

    public final BooleanParam naFillFlag() {
        return this.naFillFlag;
    }

    public final StringArrayParam categoricalColumnNames() {
        return this.categoricalColumnNames;
    }

    public final StringArrayParam categoricalColumnValues() {
        return this.categoricalColumnValues;
    }

    public final StringArrayParam numericColumnNames() {
        return this.numericColumnNames;
    }

    public final DoubleArrayParam numericColumnValues() {
        return this.numericColumnValues;
    }

    public final StringArrayParam booleanColumnNames() {
        return this.booleanColumnNames;
    }

    public final StringArrayParam booleanColumnValues() {
        return this.booleanColumnValues;
    }

    public final Param<String> decideModel() {
        return this.decideModel;
    }

    public final Param<String> fillMode() {
        return this.fillMode;
    }

    public final Param<String> characterNABlanketFill() {
        return this.characterNABlanketFill;
    }

    public final DoubleParam numericNABlanketFill() {
        return this.numericNABlanketFill;
    }

    public final StringArrayParam categoricalNAFillMapKeys() {
        return this.categoricalNAFillMapKeys;
    }

    public final StringArrayParam categoricalNAFillMapValues() {
        return this.categoricalNAFillMapValues;
    }

    public final StringArrayParam numericNAFillMapKeys() {
        return this.numericNAFillMapKeys;
    }

    public final DoubleArrayParam numericNAFillMapValues() {
        return this.numericNAFillMapValues;
    }

    public DataSanitizerTransformer setNumericFillStat(String str) {
        return (DataSanitizerTransformer) set(numericFillStat(), str);
    }

    public String getNumericFillStat() {
        return (String) $(numericFillStat());
    }

    public DataSanitizerTransformer setCharacterFillStat(String str) {
        return (DataSanitizerTransformer) set(characterFillStat(), str);
    }

    public String getCharacterFillStat() {
        return (String) $(characterFillStat());
    }

    public DataSanitizerTransformer setModelSelectionDistinctThreshold(int i) {
        return (DataSanitizerTransformer) set(modelSelectionDistinctThreshold(), BoxesRunTime.boxToInteger(i));
    }

    public int getModelSelectionDistinctThreshold() {
        return BoxesRunTime.unboxToInt($(modelSelectionDistinctThreshold()));
    }

    public DataSanitizerTransformer setFilterPrecision(double d) {
        return (DataSanitizerTransformer) set(filterPrecision(), BoxesRunTime.boxToDouble(d));
    }

    public double getFilterPrecision() {
        return BoxesRunTime.unboxToDouble($(filterPrecision()));
    }

    public DataSanitizerTransformer setParallelism(int i) {
        return (DataSanitizerTransformer) set(parallelism(), BoxesRunTime.boxToInteger(i));
    }

    public int getParallelism() {
        return BoxesRunTime.unboxToInt($(parallelism()));
    }

    public DataSanitizerTransformer setNaFillFlag(boolean z) {
        return (DataSanitizerTransformer) set(naFillFlag(), BoxesRunTime.boxToBoolean(z));
    }

    public boolean getNaFillFlag() {
        return BoxesRunTime.unboxToBoolean($(naFillFlag()));
    }

    public DataSanitizerTransformer setCategoricalColumnNames(String[] strArr) {
        return (DataSanitizerTransformer) set(categoricalColumnNames(), strArr);
    }

    public String[] getCategoricalColumnNames() {
        return (String[]) $(categoricalColumnNames());
    }

    public DataSanitizerTransformer setCategoricalColumnValues(String[] strArr) {
        return (DataSanitizerTransformer) set(categoricalColumnValues(), strArr);
    }

    public String[] getCategoricalColumnValues() {
        return (String[]) $(categoricalColumnValues());
    }

    public DataSanitizerTransformer setNumericColumnNames(String[] strArr) {
        return (DataSanitizerTransformer) set(numericColumnNames(), strArr);
    }

    public String[] getNumericColumnNames() {
        return (String[]) $(numericColumnNames());
    }

    public DataSanitizerTransformer setBooleanColumnNames(String[] strArr) {
        return (DataSanitizerTransformer) set(booleanColumnNames(), strArr);
    }

    public String[] getBooleanColumnNames() {
        return (String[]) $(booleanColumnNames());
    }

    public DataSanitizerTransformer setBooleanColumnValues(boolean[] zArr) {
        return (DataSanitizerTransformer) set(booleanColumnValues(), new ArrayOps.ofBoolean(Predef$.MODULE$.booleanArrayOps(zArr)).map(obj -> {
            return $anonfun$setBooleanColumnValues$1(BoxesRunTime.unboxToBoolean(obj));
        }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class))));
    }

    public boolean[] getBooleanColumnValues() {
        return (boolean[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) $(booleanColumnValues()))).map(str -> {
            return BoxesRunTime.boxToBoolean($anonfun$getBooleanColumnValues$1(str));
        }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.Boolean()));
    }

    public DataSanitizerTransformer setNumericColumnValues(double[] dArr) {
        return (DataSanitizerTransformer) set(numericColumnValues(), dArr);
    }

    public double[] getNumericColumnValues() {
        return (double[]) $(numericColumnValues());
    }

    public DataSanitizerTransformer setDecideModel(String str) {
        return (DataSanitizerTransformer) set(decideModel(), str);
    }

    public String getDecideModel() {
        return (String) $(decideModel());
    }

    public DataSanitizerTransformer setFillMode(String str) {
        return (DataSanitizerTransformer) set(fillMode(), str);
    }

    public String getFillMode() {
        return (String) $(fillMode());
    }

    public DataSanitizerTransformer setCharacterNABlanketFill(String str) {
        return (DataSanitizerTransformer) set(characterNABlanketFill(), str);
    }

    public String getCharacterNABlanketFill() {
        return (String) $(characterNABlanketFill());
    }

    public DataSanitizerTransformer setNumericNABlanketFill(double d) {
        return (DataSanitizerTransformer) set(numericNABlanketFill(), BoxesRunTime.boxToDouble(d));
    }

    public double getNumericNABlanketFill() {
        return BoxesRunTime.unboxToDouble($(numericNABlanketFill()));
    }

    public DataSanitizerTransformer setCategoricalNAFillMapKeys(String[] strArr) {
        return (DataSanitizerTransformer) set(categoricalNAFillMapKeys(), strArr);
    }

    public String[] getCategoricalNAFillMapKeys() {
        return (String[]) $(categoricalNAFillMapKeys());
    }

    public DataSanitizerTransformer setCategoricalNAFillMapValues(String[] strArr) {
        return (DataSanitizerTransformer) set(categoricalNAFillMapValues(), strArr);
    }

    public String[] getCategoricalNAFillMapValues() {
        return (String[]) $(categoricalNAFillMapValues());
    }

    public DataSanitizerTransformer setNumericNAFillMapKeys(String[] strArr) {
        return (DataSanitizerTransformer) set(numericNAFillMapKeys(), strArr);
    }

    public String[] getNumericNAFillMapKeys() {
        return (String[]) $(numericNAFillMapKeys());
    }

    public DataSanitizerTransformer setNumericNAFillMapValues(double[] dArr) {
        return (DataSanitizerTransformer) set(numericNAFillMapValues(), dArr);
    }

    public double[] getNumericNAFillMapValues() {
        return (double[]) $(numericNAFillMapValues());
    }

    public DataSanitizerTransformer setCategoricalNAFillMap(Map<String, String> map) {
        setCategoricalNAFillMapKeys((String[]) map.keys().toArray(ClassTag$.MODULE$.apply(String.class)));
        return setCategoricalNAFillMapValues((String[]) map.values().toArray(ClassTag$.MODULE$.apply(String.class)));
    }

    public DataSanitizerTransformer setNumericNAFillMap(Map<String, Object> map) {
        setNumericNAFillMapKeys((String[]) map.keys().toArray(ClassTag$.MODULE$.apply(String.class)));
        return setNumericNAFillMapValues((double[]) map.values().toArray(ClassTag$.MODULE$.Double()));
    }

    @Override // com.databricks.labs.automl.pipeline.AbstractTransformer
    public Dataset<Row> transformInternal(Dataset<?> dataset) {
        Tuple3<Dataset<Row>, NaFillConfig, String> tuple3;
        DataSanitizer fieldsToIgnoreInVector = new DataSanitizer(dataset.toDF()).setLabelCol(getLabelColumn()).setFeatureCol(getFeatureCol()).setModelSelectionDistinctThreshold(getModelSelectionDistinctThreshold()).setNumericFillStat(getNumericFillStat()).setCharacterFillStat(getCharacterFillStat()).setParallelism(getParallelism()).setCategoricalNAFillMap(SchemaUtils$.MODULE$.generateMapFromKeysValues(getCategoricalNAFillMapKeys(), getCategoricalNAFillMapValues())).setCharacterNABlanketFillValue(getCharacterNABlanketFill()).setNumericNABlanketFillValue(getNumericNABlanketFill()).setNumericNAFillMap(SchemaUtils$.MODULE$.generateMapFromKeysValues(getNumericNAFillMapKeys(), getNumericNAFillMapValues())).setNAFillMode(getFillMode()).setFilterPrecision(getFilterPrecision()).setFieldsToIgnoreInVector(new String[]{getAutomlInternalId()});
        if (getNaFillFlag()) {
            Option<NaFillConfig> buildNaConfig = buildNaConfig();
            tuple3 = buildNaConfig.isDefined() ? fieldsToIgnoreInVector.generateCleanData((NaFillConfig) buildNaConfig.get(), false, getDecideModel()) : fieldsToIgnoreInVector.generateCleanData(fieldsToIgnoreInVector.generateCleanData$default$1(), false, getDecideModel());
        } else {
            tuple3 = new Tuple3<>(dataset, new NaFillConfig(Predef$.MODULE$.Map().apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(""), "")})), Predef$.MODULE$.Map().apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(""), BoxesRunTime.boxToDouble(0.0d))})), Predef$.MODULE$.Map().apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(""), BoxesRunTime.boxToBoolean(false))}))), fieldsToIgnoreInVector.decideModel());
        }
        Tuple3<Dataset<Row>, NaFillConfig, String> tuple32 = tuple3;
        if (tuple32 == null) {
            throw new MatchError(tuple32);
        }
        Tuple3 tuple33 = new Tuple3((Dataset) tuple32._1(), (NaFillConfig) tuple32._2(), (String) tuple32._3());
        Dataset dataset2 = (Dataset) tuple33._1();
        NaFillConfig naFillConfig = (NaFillConfig) tuple33._2();
        String str = (String) tuple33._3();
        if (getDecideModel() == null || getDecideModel().isEmpty()) {
            setCategoricalColumnNames((String[]) naFillConfig.categoricalColumns().keys().toArray(ClassTag$.MODULE$.apply(String.class)));
            setCategoricalColumnValues((String[]) naFillConfig.categoricalColumns().values().toArray(ClassTag$.MODULE$.apply(String.class)));
            setNumericColumnNames((String[]) naFillConfig.numericColumns().keys().toArray(ClassTag$.MODULE$.apply(String.class)));
            setNumericColumnValues((double[]) naFillConfig.numericColumns().values().toArray(ClassTag$.MODULE$.Double()));
            setDecideModel(str);
        } else {
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
        }
        return dataset2.toDF().filter(functions$.MODULE$.col((String) $(labelColumn())).isNotNull()).filter(functions$.MODULE$.col((String) $(labelColumn())).isNaN().unary_$bang());
    }

    private Option<NaFillConfig> buildNaConfig() {
        return (SchemaUtils$.MODULE$.isNotEmpty(getCategoricalColumnNames()) && SchemaUtils$.MODULE$.isNotEmpty(getNumericColumnNames())) ? new Some(new NaFillConfig(SchemaUtils$.MODULE$.generateMapFromKeysValues(getCategoricalColumnNames(), getCategoricalColumnValues()), SchemaUtils$.MODULE$.generateMapFromKeysValues(getNumericColumnNames(), getNumericColumnValues()), SchemaUtils$.MODULE$.generateMapFromKeysValues(getBooleanColumnNames(), getBooleanColumnValues()))) : None$.MODULE$;
    }

    @Override // com.databricks.labs.automl.pipeline.AbstractTransformer
    public StructType transformSchemaInternal(StructType structType) {
        return structType;
    }

    /* renamed from: copy, reason: merged with bridge method [inline-methods] and merged with bridge method [inline-methods] and merged with bridge method [inline-methods] */
    public DataSanitizerTransformer m329copy(ParamMap paramMap) {
        return (DataSanitizerTransformer) defaultCopy(paramMap);
    }

    public static final /* synthetic */ String $anonfun$setBooleanColumnValues$1(boolean z) {
        return BoxesRunTime.boxToBoolean(z).toString();
    }

    public static final /* synthetic */ boolean $anonfun$getBooleanColumnValues$1(String str) {
        return new StringOps(Predef$.MODULE$.augmentString(str)).toBoolean();
    }

    public DataSanitizerTransformer(String str) {
        this.uid = str;
        MLWritable.$init$(this);
        DefaultParamsWritable.$init$(this);
        com$databricks$labs$automl$pipeline$HasLabelColumn$_setter_$labelColumn_$eq(new Param<>(this, "labelColumn", "Label Column Name"));
        com$databricks$labs$automl$pipeline$HasFeatureColumn$_setter_$featureCol_$eq(new Param<>(this, "featureCol", "Feature Column Name"));
        this.numericFillStat = new Param<>(this, "numericFillStat", "Numeric fill stats");
        this.characterFillStat = new Param<>(this, "characterFillStat", "Character fill stat");
        this.modelSelectionDistinctThreshold = new IntParam(this, "modelSelectionDistinctThreshold", "model selection distinct threshold");
        this.filterPrecision = new DoubleParam(this, "filterPrecision", "Filter precision");
        this.parallelism = new IntParam(this, "parallelism", "filter parallelism");
        this.naFillFlag = new BooleanParam(this, "naFillFlag", "Na Fill flag");
        this.categoricalColumnNames = new StringArrayParam(this, "categoricalColumnNames", "Categorical Columns");
        this.categoricalColumnValues = new StringArrayParam(this, "categoricalColumnValues", "Categorical Columns' Values");
        this.numericColumnNames = new StringArrayParam(this, "numericColumnNames", "Numeric Columns");
        this.numericColumnValues = new DoubleArrayParam(this, "numericColumnValues", "Numeric Columns' Values");
        this.booleanColumnNames = new StringArrayParam(this, "booleanColumnNames", "Boolean Columns");
        this.booleanColumnValues = new StringArrayParam(this, "booleanColumnValues", "Boolean Columns' Values");
        this.decideModel = new Param<>(this, "decideModel", "Decided model");
        this.fillMode = new Param<>(this, "fillMode", "fillMode");
        this.characterNABlanketFill = new Param<>(this, "characterNABlanketFill", "characterNABlanketFill");
        this.numericNABlanketFill = new DoubleParam(this, "numericNABlanketFill", "numericNABlanketFill");
        this.categoricalNAFillMapKeys = new StringArrayParam(this, "categoricalNAFillMapKeys", "categoricalNAFillMapKeys");
        this.categoricalNAFillMapValues = new StringArrayParam(this, "categoricalNAFillMapValues", "categoricalNAFillMapValues");
        this.numericNAFillMapKeys = new StringArrayParam(this, "numericNAFillMapKeys", "numericNAFillMapKeys");
        this.numericNAFillMapValues = new DoubleArrayParam(this, "numericNAFillMapValues", "numericNAFillMapValues");
    }

    public DataSanitizerTransformer() {
        this(Identifiable$.MODULE$.randomUID("DataSanitizerTransformer"));
        setAutomlInternalId(AutoMlPipelineMlFlowUtils$.MODULE$.AUTOML_INTERNAL_ID_COL());
        setFeatureCol("features");
        setNumericFillStat("mean");
        setCharacterFillStat("max");
        setModelSelectionDistinctThreshold(10);
        setFilterPrecision(0.01d);
        setParallelism(20);
        setNaFillFlag(false);
        setDecideModel("");
        setCategoricalColumnNames((String[]) Array$.MODULE$.empty(ClassTag$.MODULE$.apply(String.class)));
        setNumericColumnValues((double[]) Array$.MODULE$.empty(ClassTag$.MODULE$.Double()));
        setNumericColumnNames((String[]) Array$.MODULE$.empty(ClassTag$.MODULE$.apply(String.class)));
        setNumericColumnValues((double[]) Array$.MODULE$.empty(ClassTag$.MODULE$.Double()));
        setBooleanColumnNames((String[]) Array$.MODULE$.empty(ClassTag$.MODULE$.apply(String.class)));
        setBooleanColumnValues((boolean[]) Array$.MODULE$.empty(ClassTag$.MODULE$.Boolean()));
        setCategoricalNAFillMapKeys((String[]) Array$.MODULE$.empty(ClassTag$.MODULE$.apply(String.class)));
        setCategoricalNAFillMapValues((String[]) Array$.MODULE$.empty(ClassTag$.MODULE$.apply(String.class)));
        setNumericNAFillMapKeys((String[]) Array$.MODULE$.empty(ClassTag$.MODULE$.apply(String.class)));
        setNumericNAFillMapValues((double[]) Array$.MODULE$.empty(ClassTag$.MODULE$.Double()));
        setCharacterNABlanketFill("");
        setNumericNABlanketFill(0.0d);
        setFillMode("auto");
        setDebugEnabled(false);
    }
}
