package io.cdap.mmds.modeler.feature;

import com.google.common.collect.ImmutableList;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import javax.annotation.Nullable;
import org.apache.spark.ml.PipelineModel;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.types.DataTypes;

/* loaded from: input_file:lib/mmds-model-1.7.1.jar:io/cdap/mmds/modeler/feature/FeatureGenerator.class */
public abstract class FeatureGenerator {
    protected final List<String> features;
    private final Set<String> categoricalFeatures;
    private PipelineModel featureGenModel;

    /* JADX INFO: Access modifiers changed from: protected */
    public FeatureGenerator(List<String> list, Set<String> set) {
        this.features = new ArrayList(list);
        this.categoricalFeatures = new HashSet(set);
    }

    public List<String> getFeatures() {
        return Collections.unmodifiableList(this.features);
    }

    public Dataset<Row> generateFeatures(Dataset<Row> dataset, String str) {
        return generateFeatures(dataset, ImmutableList.of(str));
    }

    public Dataset<Row> generateFeatures(Dataset<Row> dataset, List<String> list) {
        Column cast;
        int size = this.features.size();
        Column[] columnArr = new Column[(2 * size) + list.size()];
        int i = 0;
        HashMap hashMap = new HashMap();
        for (String str : this.features) {
            String cleanName = cleanName(str);
            columnArr[i] = new Column(str);
            Column as = new Column(str).as(cleanName);
            if (isCategorical(str)) {
                cast = as.cast(DataTypes.StringType);
                hashMap.put(cleanName, "?");
            } else {
                cast = as.cast(DataTypes.DoubleType);
                hashMap.put(cleanName, Double.valueOf(-1.0d));
            }
            columnArr[size + i] = cast;
            i++;
        }
        int i2 = 2 * size;
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            columnArr[i2] = new Column(it.next());
            i2++;
        }
        Dataset<Row> fill = dataset.select(columnArr).na().fill(hashMap);
        if (this.featureGenModel == null) {
            this.featureGenModel = getFeatureGenModel(fill);
        }
        return this.featureGenModel.transform(fill);
    }

    @Nullable
    public PipelineModel getFeatureGenModel() {
        return this.featureGenModel;
    }

    protected abstract PipelineModel getFeatureGenModel(Dataset<Row> dataset);

    /* JADX INFO: Access modifiers changed from: protected */
    public boolean isCategorical(String str) {
        return this.categoricalFeatures.contains(str);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String cleanName(String str) {
        return "_c_" + str;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String indexedName(String str) {
        return "_i_" + str;
    }
}
