package org.datavec.nlp.reader;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import org.datavec.api.conf.Configuration;
import org.datavec.api.records.Record;
import org.datavec.api.records.metadata.RecordMetaData;
import org.datavec.api.records.metadata.RecordMetaDataURI;
import org.datavec.api.records.reader.RecordReader;
import org.datavec.api.records.reader.impl.FileRecordReader;
import org.datavec.api.split.InputSplit;
import org.datavec.api.vector.Vectorizer;
import org.datavec.api.writable.NDArrayWritable;
import org.datavec.api.writable.Writable;
import org.datavec.nlp.vectorizer.TfidfVectorizer;

/* loaded from: input_file:org/datavec/nlp/reader/TfidfRecordReader.class */
public class TfidfRecordReader extends FileRecordReader {
    private TfidfVectorizer tfidfVectorizer;
    private Iterator<Record> recordIter;
    private int numFeatures;
    private List<Record> records = new ArrayList();
    private boolean initialized = false;

    public void initialize(InputSplit inputSplit) throws IOException, InterruptedException {
        initialize(new Configuration(), inputSplit);
    }

    public void initialize(Configuration configuration, InputSplit inputSplit) throws IOException, InterruptedException {
        super.initialize(configuration, inputSplit);
        if (this.tfidfVectorizer == null) {
            this.tfidfVectorizer = new TfidfVectorizer();
            this.tfidfVectorizer.initialize(configuration);
            this.records.clear();
            this.numFeatures = this.tfidfVectorizer.m6fitTransform((RecordReader) this, new Vectorizer.RecordCallBack() { // from class: org.datavec.nlp.reader.TfidfRecordReader.1
                public void onRecord(Record record) {
                    TfidfRecordReader.this.records.add(record);
                }
            }).columns();
            this.recordIter = this.records.iterator();
        } else {
            this.records = new ArrayList();
            while (super.hasNext()) {
                Record nextRecord = super.nextRecord();
                Record record = new org.datavec.api.records.impl.Record(new ArrayList(Collections.singletonList(new NDArrayWritable(this.tfidfVectorizer.transform(nextRecord)))), new RecordMetaDataURI(nextRecord.getMetaData().getURI(), TfidfRecordReader.class));
                if (this.appendLabel) {
                    record.getRecord().add(nextRecord.getRecord().get(nextRecord.getRecord().size() - 1));
                }
                this.records.add(record);
            }
            this.recordIter = this.records.iterator();
        }
        this.initialized = true;
    }

    public void reset() {
        if (this.inputSplit == null) {
            throw new UnsupportedOperationException("Cannot reset without first initializing");
        }
        this.recordIter = this.records.iterator();
    }

    public Record nextRecord() {
        return this.recordIter == null ? super.nextRecord() : this.recordIter.next();
    }

    public List<Writable> next() {
        return nextRecord().getRecord();
    }

    public boolean hasNext() {
        return this.recordIter == null ? super.hasNext() : this.recordIter.hasNext();
    }

    public void close() throws IOException {
    }

    public void setConf(Configuration configuration) {
        this.conf = configuration;
    }

    public Configuration getConf() {
        return this.conf;
    }

    public TfidfVectorizer getTfidfVectorizer() {
        return this.tfidfVectorizer;
    }

    public void setTfidfVectorizer(TfidfVectorizer tfidfVectorizer) {
        if (this.initialized) {
            throw new IllegalArgumentException("Setting TfidfVectorizer after TfidfRecordReader initialization doesn't have an effect");
        }
        this.tfidfVectorizer = tfidfVectorizer;
    }

    public int getNumFeatures() {
        return this.numFeatures;
    }

    public void shuffle() {
        shuffle(new Random());
    }

    public void shuffle(Random random) {
        Collections.shuffle(this.records, random);
        reset();
    }

    public Record loadFromMetaData(RecordMetaData recordMetaData) throws IOException {
        return loadFromMetaData(Collections.singletonList(recordMetaData)).get(0);
    }

    public List<Record> loadFromMetaData(List<RecordMetaData> list) throws IOException {
        ArrayList arrayList = new ArrayList();
        for (Record record : super.loadFromMetaData(list)) {
            org.datavec.api.records.impl.Record record2 = new org.datavec.api.records.impl.Record(new ArrayList(Collections.singletonList(new NDArrayWritable(this.tfidfVectorizer.transform(record)))), new RecordMetaDataURI(record.getMetaData().getURI(), TfidfRecordReader.class));
            if (this.appendLabel) {
                record2.getRecord().add(record.getRecord().get(record.getRecord().size() - 1));
            }
            arrayList.add(record2);
        }
        return arrayList;
    }
}
