package org.maochen.nlp.ml.util.dataio;

import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.maochen.nlp.ml.Tuple;
import org.maochen.nlp.ml.vector.FeatNamedVector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/maochen/nlp/ml/util/dataio/CSVDataReader.class */
public class CSVDataReader {
    private static final Logger LOG = LoggerFactory.getLogger(CSVDataReader.class);
    private String filename;
    private String delim;
    public int labelCol;
    public String[] header = null;
    public Set<Integer> ignoredColumns;

    public List<Tuple> read() throws IOException {
        return read(new FileInputStream(this.filename));
    }

    protected Tuple extractValuedFeat(CSVRecord cSVRecord) {
        FeatNamedVector featNamedVector = new FeatNamedVector(new double[(cSVRecord.size() - 1) - this.ignoredColumns.size()]);
        featNamedVector.featsName = new String[cSVRecord.size()];
        Tuple tuple = new Tuple(featNamedVector);
        tuple.label = cSVRecord.get(this.labelCol);
        for (int i = 0; i < cSVRecord.size(); i++) {
            if (i != this.labelCol && !this.ignoredColumns.contains(Integer.valueOf(i))) {
                featNamedVector.featsName[i] = this.header[i];
                try {
                    tuple.vector.getVector()[i] = Double.parseDouble(cSVRecord.get(i));
                } catch (NumberFormatException e) {
                    if (this.header != null) {
                        StringBuilder sb = new StringBuilder();
                        String[] strArr = featNamedVector.featsName;
                        int i2 = i;
                        strArr[i2] = sb.append(strArr[i2]).append("_").append(cSVRecord.get(i).toLowerCase().trim()).toString();
                    }
                    tuple.vector.getVector()[i] = cSVRecord.get(i).trim().isEmpty() ? 0.0d : 1.0d;
                }
            }
        }
        return tuple;
    }

    public List<Tuple> read(InputStream inputStream) throws IOException {
        CSVParser cSVParser = new CSVParser(new InputStreamReader(inputStream), CSVFormat.RFC4180.withHeader(new String[0]).withDelimiter(this.delim.charAt(0)));
        List records = cSVParser.getRecords();
        this.header = (String[]) cSVParser.getHeaderMap().entrySet().stream().sorted((entry, entry2) -> {
            return ((Integer) entry.getValue()).compareTo((Integer) entry2.getValue());
        }).map((v0) -> {
            return v0.getKey();
        }).toArray(i -> {
            return new String[i];
        });
        this.labelCol = this.labelCol == -1 ? ((CSVRecord) records.get(0)).size() - 1 : this.labelCol;
        return (List) ((Stream) records.stream().parallel()).map(this::extractValuedFeat).collect(Collectors.toList());
    }

    public CSVDataReader(String str, int i, String str2, Set<Integer> set) {
        this.ignoredColumns = new HashSet();
        this.filename = str;
        this.labelCol = i;
        this.delim = str2;
        if (set != null) {
            this.ignoredColumns = set;
        }
    }
}
