package org.cleartk.syntax.constituent.ptb.util;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.cleartk.syntax.constituent.TreebankGoldAnnotator;
import org.cleartk.syntax.constituent.ptb.PennTreebankReader;
import org.cleartk.token.type.Sentence;
import org.cleartk.util.Options_ImplBase;
import org.cleartk.util.ViewURIFileNamer;
import org.kohsuke.args4j.Option;
import org.uimafit.component.JCasAnnotator_ImplBase;
import org.uimafit.component.xwriter.XWriter;
import org.uimafit.factory.AnalysisEngineFactory;
import org.uimafit.factory.CollectionReaderFactory;
import org.uimafit.pipeline.SimplePipeline;
import org.uimafit.util.JCasUtil;

@Deprecated
/* loaded from: input_file:org/cleartk/syntax/constituent/ptb/util/PrepareSentenceData.class */
public class PrepareSentenceData {

    /* loaded from: input_file:org/cleartk/syntax/constituent/ptb/util/PrepareSentenceData$Options.class */
    public static class Options extends Options_ImplBase {

        @Option(name = "-i", aliases = {"--inputDirectoryName"}, usage = "specify the name of the input directory for the wsj data.")
        public String inputDirectoryName = "../ClearTK Data/data/treebank/wsj";

        @Option(name = "-o", aliases = {"--outputDirectoryName"}, usage = "specify the name of the output directory for the sentence data.")
        public String outputDirectoryName = "../cleartk-token/src/main/resources/org/cleartk/sentence/english/ptb";

        @Option(name = "-s", aliases = {"--sectionsSpecifier"}, usage = "specify the sections that will be used.")
        public String sectionsSpecifier = "00-24";
    }

    /* loaded from: input_file:org/cleartk/syntax/constituent/ptb/util/PrepareSentenceData$SentencePrinter.class */
    public static class SentencePrinter extends JCasAnnotator_ImplBase {
        Map<Character, Integer> charCounts = new HashMap();
        int sentenceCount = 0;

        public void process(JCas jCas) throws AnalysisEngineProcessException {
            Iterator it = JCasUtil.select(jCas, Sentence.class).iterator();
            while (it.hasNext()) {
                String coveredText = ((Sentence) it.next()).getCoveredText();
                char charAt = coveredText.charAt(coveredText.length() - 1);
                int i = 0;
                if (this.charCounts.containsKey(Character.valueOf(charAt))) {
                    i = this.charCounts.get(Character.valueOf(charAt)).intValue();
                }
                this.charCounts.put(Character.valueOf(charAt), Integer.valueOf(i + 1));
                this.sentenceCount++;
                if (charAt != '.' && charAt != '\"' && charAt != '?') {
                    System.out.println(coveredText);
                }
            }
        }

        public void collectionProcessComplete() throws AnalysisEngineProcessException {
            ArrayList<Map.Entry> arrayList = new ArrayList(this.charCounts.entrySet());
            Collections.sort(arrayList, new Comparator<Map.Entry<Character, Integer>>() { // from class: org.cleartk.syntax.constituent.ptb.util.PrepareSentenceData.SentencePrinter.1
                @Override // java.util.Comparator
                public int compare(Map.Entry<Character, Integer> entry, Map.Entry<Character, Integer> entry2) {
                    int compareTo = entry.getValue().compareTo(entry2.getValue());
                    return compareTo == 0 ? entry.getKey().compareTo(entry2.getKey()) : -compareTo;
                }
            });
            for (Map.Entry entry : arrayList) {
                System.out.println(entry.getKey() + "\t" + entry.getValue());
            }
            System.out.println("total number of sentences: " + this.sentenceCount);
            super.collectionProcessComplete();
        }
    }

    public static void main(String[] strArr) throws Exception {
        Options options = new Options();
        options.parseOptions(strArr);
        String str = options.inputDirectoryName;
        String str2 = options.outputDirectoryName;
        SimplePipeline.runPipeline(CollectionReaderFactory.createCollectionReader(PennTreebankReader.class, new Object[]{PennTreebankReader.PARAM_CORPUS_DIRECTORY_NAME, str, PennTreebankReader.PARAM_SECTIONS_SPECIFIER, options.sectionsSpecifier}), new AnalysisEngine[]{AnalysisEngineFactory.createPrimitive(TreebankGoldAnnotator.class, new Object[]{TreebankGoldAnnotator.PARAM_POST_TREES, false}), AnalysisEngineFactory.createPrimitive(SentencePrinter.class, new Object[0]), AnalysisEngineFactory.createPrimitive(XWriter.class, new Object[]{XWriter.PARAM_OUTPUT_DIRECTORY_NAME, str2, XWriter.PARAM_FILE_NAMER_CLASS_NAME, ViewURIFileNamer.class.getName()})});
    }
}
