package ai.djl.basicdataset.tabular;

import ai.djl.basicdataset.utils.DynamicBuffer;
import ai.djl.ndarray.NDArray;
import ai.djl.ndarray.NDList;
import ai.djl.ndarray.NDManager;
import ai.djl.ndarray.types.Shape;
import ai.djl.training.dataset.RandomAccessDataset;
import ai.djl.training.dataset.Record;
import ai.djl.util.Progress;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.zip.GZIPInputStream;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;

/* loaded from: input_file:ai/djl/basicdataset/tabular/CsvDataset.class */
public class CsvDataset extends RandomAccessDataset {
    private static final Featurizer NUMERIC_FEATURIZER = new NumericFeaturizer();
    protected URL csvUrl;
    protected CSVFormat csvFormat;
    protected List<Feature> features;
    protected List<Feature> labels;
    protected List<CSVRecord> csvRecords;

    /* loaded from: input_file:ai/djl/basicdataset/tabular/CsvDataset$CsvBuilder.class */
    public static class CsvBuilder<T extends CsvBuilder<T>> extends RandomAccessDataset.BaseBuilder<T> {
        protected URL csvUrl;
        protected CSVFormat csvFormat;
        protected List<Feature> features = new ArrayList();
        protected List<Feature> labels = new ArrayList();

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // 
        /* renamed from: self, reason: merged with bridge method [inline-methods] */
        public T mo20self() {
            return this;
        }

        public T optCsvFile(Path path) {
            try {
                this.csvUrl = path.toAbsolutePath().toUri().toURL();
                return mo20self();
            } catch (MalformedURLException e) {
                throw new IllegalArgumentException("Invalid file path: " + path, e);
            }
        }

        public T optCsvUrl(String str) {
            try {
                this.csvUrl = new URL(str);
                return mo20self();
            } catch (MalformedURLException e) {
                throw new IllegalArgumentException("Invalid url: " + str, e);
            }
        }

        public T setCsvFormat(CSVFormat cSVFormat) {
            this.csvFormat = cSVFormat;
            return mo20self();
        }

        public T addFeature(Feature... featureArr) {
            Collections.addAll(this.features, featureArr);
            return mo20self();
        }

        public T addNumericFeature(String str) {
            this.features.add(new Feature(str, true));
            return mo20self();
        }

        public T addCategoricalFeature(String str) {
            this.features.add(new Feature(str, false));
            return mo20self();
        }

        public T addCategoricalFeature(String str, Map<String, Integer> map, boolean z) {
            this.features.add(new Feature(str, map, z));
            return mo20self();
        }

        public T addLabel(Feature... featureArr) {
            Collections.addAll(this.labels, featureArr);
            return mo20self();
        }

        public T addNumericLabel(String str) {
            this.labels.add(new Feature(str, true));
            return mo20self();
        }

        public T addCategoricalLabel(String str) {
            this.labels.add(new Feature(str, true));
            return mo20self();
        }

        public T addCategoricalLabel(String str, Map<String, Integer> map, boolean z) {
            this.labels.add(new Feature(str, map, z));
            return mo20self();
        }

        public CsvDataset build() {
            if (this.features.isEmpty()) {
                throw new IllegalArgumentException("Missing features.");
            }
            if (this.labels.isEmpty()) {
                throw new IllegalArgumentException("Missing labels.");
            }
            return new CsvDataset(this);
        }
    }

    /* loaded from: input_file:ai/djl/basicdataset/tabular/CsvDataset$Feature.class */
    public static final class Feature {
        String name;
        Featurizer featurizer;

        public Feature(String str, Featurizer featurizer) {
            this.name = str;
            this.featurizer = featurizer;
        }

        public Feature(String str, boolean z) {
            this.name = str;
            if (z) {
                this.featurizer = CsvDataset.NUMERIC_FEATURIZER;
            } else {
                this.featurizer = new StringFeaturizer();
            }
        }

        public Feature(String str, Map<String, Integer> map, boolean z) {
            this.name = str;
            this.featurizer = new StringFeaturizer(map, z);
        }

        public String getName() {
            return this.name;
        }

        public Featurizer getFeaturizer() {
            return this.featurizer;
        }
    }

    /* loaded from: input_file:ai/djl/basicdataset/tabular/CsvDataset$Featurizer.class */
    public interface Featurizer {
        void featurize(DynamicBuffer dynamicBuffer, String str);
    }

    /* loaded from: input_file:ai/djl/basicdataset/tabular/CsvDataset$NumericFeaturizer.class */
    private static final class NumericFeaturizer implements Featurizer {
        private NumericFeaturizer() {
        }

        @Override // ai.djl.basicdataset.tabular.CsvDataset.Featurizer
        public void featurize(DynamicBuffer dynamicBuffer, String str) {
            dynamicBuffer.put(Float.parseFloat(str));
        }
    }

    /* loaded from: input_file:ai/djl/basicdataset/tabular/CsvDataset$StringFeaturizer.class */
    private static final class StringFeaturizer implements Featurizer {
        private Map<String, Integer> map;
        private boolean onehotEncode;
        private boolean autoMap;

        StringFeaturizer() {
            this.map = new HashMap();
            this.autoMap = true;
        }

        StringFeaturizer(Map<String, Integer> map, boolean z) {
            this.map = map;
            this.onehotEncode = z;
        }

        @Override // ai.djl.basicdataset.tabular.CsvDataset.Featurizer
        public void featurize(DynamicBuffer dynamicBuffer, String str) {
            if (this.onehotEncode) {
                int i = 0;
                while (i < this.map.size()) {
                    dynamicBuffer.put(i == this.map.get(str).intValue() ? 1.0f : 0.0f);
                    i++;
                }
                return;
            }
            if (this.map.get(str) != null) {
                dynamicBuffer.put(r0.intValue());
            } else {
                if (!this.autoMap) {
                    throw new IllegalArgumentException("Value: " + str + " not found in the map.");
                }
                int size = this.map.size();
                this.map.put(str, Integer.valueOf(size));
                dynamicBuffer.put(size);
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public CsvDataset(CsvBuilder<?> csvBuilder) {
        super(csvBuilder);
        this.csvUrl = csvBuilder.csvUrl;
        this.csvFormat = csvBuilder.csvFormat;
        this.features = csvBuilder.features;
        this.labels = csvBuilder.labels;
    }

    public Record get(NDManager nDManager, long j) {
        CSVRecord cSVRecord = this.csvRecords.get(Math.toIntExact(j));
        return new Record(toNDList(nDManager, cSVRecord, this.features), toNDList(nDManager, cSVRecord, this.labels));
    }

    protected long availableSize() {
        return this.csvRecords.size();
    }

    public void prepare(Progress progress) throws IOException {
        InputStreamReader inputStreamReader = new InputStreamReader(getCsvStream(), StandardCharsets.UTF_8);
        Throwable th = null;
        try {
            this.csvRecords = new CSVParser(inputStreamReader, this.csvFormat).getRecords();
            if (inputStreamReader != null) {
                if (0 == 0) {
                    inputStreamReader.close();
                    return;
                }
                try {
                    inputStreamReader.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
        } catch (Throwable th3) {
            if (inputStreamReader != null) {
                if (0 != 0) {
                    try {
                        inputStreamReader.close();
                    } catch (Throwable th4) {
                        th.addSuppressed(th4);
                    }
                } else {
                    inputStreamReader.close();
                }
            }
            throw th3;
        }
    }

    private InputStream getCsvStream() throws IOException {
        return this.csvUrl.getFile().endsWith(".gz") ? new GZIPInputStream(this.csvUrl.openStream()) : this.csvUrl.openStream();
    }

    public static CsvBuilder<?> builder() {
        return new CsvBuilder<>();
    }

    public List<String> getColumnNames() {
        return this.csvRecords.isEmpty() ? Collections.emptyList() : this.csvRecords.get(0).getParser().getHeaderNames();
    }

    protected NDList toNDList(NDManager nDManager, CSVRecord cSVRecord, List<Feature> list) {
        DynamicBuffer dynamicBuffer = new DynamicBuffer();
        for (Feature feature : list) {
            feature.featurizer.featurize(dynamicBuffer, cSVRecord.get(feature.getName()));
        }
        return new NDList(new NDArray[]{nDManager.create(dynamicBuffer.getBuffer(), new Shape(new long[]{dynamicBuffer.getLength()}))});
    }
}
