package org.tribuo.data.text;

import com.oracle.labs.mlrg.olcut.config.Config;
import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import org.tribuo.ConfigurableDataSource;
import org.tribuo.Example;
import org.tribuo.Output;
import org.tribuo.OutputFactory;

/* loaded from: input_file:org/tribuo/data/text/TextDataSource.class */
public abstract class TextDataSource<T extends Output<T>> implements ConfigurableDataSource<T> {

    @Config(description = "The document preprocessors to run on each document in the data source.")
    protected List<DocumentPreprocessor> preprocessors;

    @Config(mandatory = true, description = "The path to read the data from.")
    protected Path path;

    @Config(mandatory = true, description = "The factory that converts a String into an Output instance.")
    protected OutputFactory<T> outputFactory;

    @Config(mandatory = true, description = "The feature extractor that generates Features from text.")
    protected TextFeatureExtractor<T> extractor;
    protected final List<Example<T>> data;

    /* JADX INFO: Access modifiers changed from: protected */
    public TextDataSource() {
        this.preprocessors = new ArrayList();
        this.data = new ArrayList();
    }

    public TextDataSource(Path path, OutputFactory<T> outputFactory, TextFeatureExtractor<T> textFeatureExtractor, DocumentPreprocessor... documentPreprocessorArr) {
        this.preprocessors = new ArrayList();
        this.data = new ArrayList();
        this.path = path;
        this.outputFactory = outputFactory;
        this.extractor = textFeatureExtractor;
        this.preprocessors.addAll(Arrays.asList(documentPreprocessorArr));
    }

    public TextDataSource(File file, OutputFactory<T> outputFactory, TextFeatureExtractor<T> textFeatureExtractor, DocumentPreprocessor... documentPreprocessorArr) {
        this(file.toPath(), outputFactory, textFeatureExtractor, documentPreprocessorArr);
    }

    public Iterator<Example<T>> iterator() {
        if (this.data.isEmpty()) {
            throw new IllegalStateException("read was not called in " + getClass().getName());
        }
        return this.data.iterator();
    }

    public String toString() {
        return getClass().getSimpleName() + "(path=" + this.path.toString() + ",extractor=" + this.extractor.toString() + ",preprocessors=" + this.preprocessors.toString() + ")";
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String handleDoc(String str) {
        String str2 = str;
        Iterator<DocumentPreprocessor> it = this.preprocessors.iterator();
        while (it.hasNext()) {
            str2 = it.next().processDoc(str2);
        }
        return str2;
    }

    protected abstract void read() throws IOException;

    public OutputFactory<T> getOutputFactory() {
        return this.outputFactory;
    }
}
