package org.cleartk.syntax.constituent.ptb;

import java.io.File;
import java.io.IOException;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.FileUtils;
import org.apache.uima.util.Level;
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;
import org.cleartk.syntax.constituent.TreebankConstants;
import org.cleartk.util.ViewURIUtil;
import org.uimafit.component.JCasCollectionReader_ImplBase;
import org.uimafit.component.ViewCreatorAnnotator;
import org.uimafit.descriptor.ConfigurationParameter;
import org.uimafit.descriptor.SofaCapability;
import org.uimafit.factory.ConfigurationParameterFactory;

@SofaCapability(outputSofas = {TreebankConstants.TREEBANK_VIEW, "UriView"})
/* loaded from: input_file:org/cleartk/syntax/constituent/ptb/PennTreebankReader.class */
public class PennTreebankReader extends JCasCollectionReader_ImplBase {
    private static final String CORPUS_DIRECTORY_DESCRIPTION = "Specifies the location of WSJ/PennTreebank treebank files.  The directory should contain subdirectories corresponding to the sections (e.g. '00', '01', etc.) That is, if a local copy of PennTreebank sits at C:/Data/PTB/wsj/mrg, then the the subdirectory C:/Data/PTB/wsj/mrg/00 should exist. There are 24 sections in PTB corresponding to the directories 00, 01, 02, ... 24. ";

    @ConfigurationParameter(mandatory = true, description = CORPUS_DIRECTORY_DESCRIPTION)
    private String corpusDirectoryName;
    private static final String SECTIONS_DESCRIPTION = "specifies which sections of PTB to read in.  The required format for values of this parameter allows for comma-separated section numbers and section ranges, for example '02,07-12,16'.";

    @ConfigurationParameter(defaultValue = {"00-24"}, description = SECTIONS_DESCRIPTION)
    private String sectionsSpecifier;
    protected File directory;
    protected LinkedList<File> files;
    protected int numberOfFiles;
    protected ListSpecification sections;
    public static final String PARAM_CORPUS_DIRECTORY_NAME = ConfigurationParameterFactory.createConfigurationParameterName(PennTreebankReader.class, "corpusDirectoryName");
    public static final String PARAM_SECTIONS_SPECIFIER = ConfigurationParameterFactory.createConfigurationParameterName(PennTreebankReader.class, "sectionsSpecifier");

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        this.sections = new ListSpecification(this.sectionsSpecifier);
        this.directory = new File(this.corpusDirectoryName);
        this.files = new LinkedList<>();
        collectSections(new File(this.directory.getPath()), this.files, this.sections);
        Collections.sort(this.files);
        this.numberOfFiles = this.files.size();
    }

    public static void collectSections(File file, List<File> list, ListSpecification listSpecification) {
        if (file.isDirectory()) {
            for (File file2 : file.listFiles()) {
                if (file2.isDirectory()) {
                    try {
                        if (listSpecification.contains(Integer.valueOf(file2.getName()).intValue())) {
                            collectFiles(file2, list);
                        }
                    } catch (NumberFormatException e) {
                    }
                }
            }
        }
    }

    static void collectFiles(File file, List<File> list) {
        if (file.isFile() && file.getName().endsWith(".mrg")) {
            list.add(file);
            return;
        }
        if (file.isDirectory()) {
            for (File file2 : file.listFiles()) {
                collectFiles(file2, list);
            }
        }
    }

    public void getNext(JCas jCas) throws IOException, CollectionException {
        File removeFirst = this.files.removeFirst();
        getUimaContext().getLogger().log(Level.FINEST, "reading treebank file: " + removeFirst.getPath());
        ViewURIUtil.setURI(jCas, removeFirst.toURI());
        try {
            ViewCreatorAnnotator.createViewSafely(jCas, TreebankConstants.TREEBANK_VIEW).setSofaDataString(FileUtils.file2String(removeFirst), "text/plain");
        } catch (AnalysisEngineProcessException e) {
            throw new CollectionException(e);
        }
    }

    public void close() throws IOException {
    }

    public Progress[] getProgress() {
        return new Progress[]{new ProgressImpl(this.numberOfFiles - this.files.size(), this.numberOfFiles, "entities")};
    }

    public boolean hasNext() throws IOException, CollectionException {
        return this.files.size() > 0;
    }

    public void setCorpusDirectoryName(String str) {
        this.corpusDirectoryName = str;
    }

    public void setSectionsSpecifier(String str) {
        this.sectionsSpecifier = str;
    }
}
