package com.gengoai.apollo.ml.data;

import com.gengoai.apollo.ml.DataSet;
import com.gengoai.apollo.ml.Datum;
import com.gengoai.apollo.ml.StreamingDataSet;
import com.gengoai.apollo.ml.observation.Observation;
import com.gengoai.apollo.ml.observation.Variable;
import com.gengoai.io.CSV;
import com.gengoai.io.resource.Resource;
import com.gengoai.math.Math2;
import com.gengoai.stream.StreamingContext;
import java.io.IOException;
import java.io.Serializable;
import lombok.NonNull;

/* loaded from: input_file:com/gengoai/apollo/ml/data/CSVDataSetReader.class */
public class CSVDataSetReader implements DataSetReader, Serializable {
    private static final long serialVersionUID = 1;
    private final CSV csv;
    private final Schema schema;

    public CSVDataSetReader(@NonNull CSV csv) {
        if (csv == null) {
            throw new NullPointerException("csv is marked non-null but is null");
        }
        this.csv = csv;
        this.schema = null;
    }

    public CSVDataSetReader(@NonNull CSV csv, @NonNull Schema schema) {
        if (csv == null) {
            throw new NullPointerException("csv is marked non-null but is null");
        }
        if (schema == null) {
            throw new NullPointerException("schema is marked non-null but is null");
        }
        this.csv = csv;
        this.schema = schema;
        if (!csv.getHasHeader() && csv.getHeader().isEmpty()) {
            throw new IllegalArgumentException("Either the CSV must have a header or one must be defined.");
        }
    }

    private Variable guess(String str, String str2) {
        Double tryParseDouble = Math2.tryParseDouble(str2);
        return tryParseDouble == null ? Variable.binary(str, str2) : Variable.real(str, tryParseDouble.doubleValue());
    }

    @Override // com.gengoai.apollo.ml.data.DataSetReader
    public DataSet read(@NonNull Resource resource) throws IOException {
        if (resource == null) {
            throw new NullPointerException("dataResource is marked non-null but is null");
        }
        return new StreamingDataSet(StreamingContext.local().stream(this.csv.rowMapStream(resource).map(map -> {
            Datum datum = new Datum();
            for (String str : map.keySet()) {
                if (this.schema != null) {
                    datum.put(str, (Observation) this.schema.convert(str, map.get(str)));
                } else {
                    datum.put(str, (Observation) guess(str, (String) map.get(str)));
                }
            }
            return datum;
        }))).probe();
    }
}
