package org.cleartk.srl;

import java.io.File;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.logging.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.cleartk.classifier.CleartkAnnotator;
import org.cleartk.classifier.DataWriterFactory;
import org.cleartk.classifier.Feature;
import org.cleartk.classifier.Instance;
import org.cleartk.classifier.feature.extractor.CleartkExtractor;
import org.cleartk.classifier.feature.extractor.CleartkExtractorException;
import org.cleartk.classifier.feature.extractor.annotationpair.AnnotationPairFeatureExtractor;
import org.cleartk.classifier.feature.extractor.annotationpair.MatchingAnnotationPairExtractor;
import org.cleartk.classifier.feature.extractor.annotationpair.NamingAnnotationPairFeatureExtractor;
import org.cleartk.classifier.feature.extractor.annotationpair.RelativePositionExtractor;
import org.cleartk.classifier.feature.extractor.simple.CoveredTextExtractor;
import org.cleartk.classifier.feature.extractor.simple.MatchingAnnotationExtractor;
import org.cleartk.classifier.feature.extractor.simple.NamingExtractor;
import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
import org.cleartk.classifier.jar.DirectoryDataWriterFactory;
import org.cleartk.classifier.jar.GenericJarClassifierFactory;
import org.cleartk.srl.feature.NamedEntityExtractor;
import org.cleartk.srl.feature.NodeTypeExtractor;
import org.cleartk.srl.feature.POSExtractor;
import org.cleartk.srl.feature.StemExtractor;
import org.cleartk.srl.type.Argument;
import org.cleartk.srl.type.Predicate;
import org.cleartk.srl.type.SemanticArgument;
import org.cleartk.syntax.constituent.type.TopTreebankNode;
import org.cleartk.syntax.constituent.type.TreebankNode;
import org.cleartk.syntax.feature.HeadWordExtractor;
import org.cleartk.syntax.feature.SubCategorizationExtractor;
import org.cleartk.syntax.feature.SyntacticPathExtractor;
import org.cleartk.token.type.Sentence;
import org.cleartk.token.type.Token;
import org.cleartk.util.AnnotationUtil;
import org.cleartk.util.UIMAUtil;
import org.uimafit.factory.AnalysisEngineFactory;
import org.uimafit.util.JCasUtil;

/* loaded from: input_file:org/cleartk/srl/ArgumentIdentifier.class */
public class ArgumentIdentifier extends CleartkAnnotator<Boolean> {
    private SimpleFeatureExtractor perPredicateExtractor;
    private SimpleFeatureExtractor perConstituentExtractor;
    private AnnotationPairFeatureExtractor perPredicatAndConstituentExtractor;
    private int nSentences;
    private int nPredicates;
    private int nConstituents;
    private Logger logger = Logger.getLogger(getClass().getName());

    public static AnalysisEngineDescription getWriterDescription(Class<? extends DataWriterFactory<Boolean>> cls, File file) throws ResourceInitializationException {
        return AnalysisEngineFactory.createPrimitiveDescription(ArgumentIdentifier.class, new Object[]{CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME, cls.getName(), DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY, file.toString()});
    }

    public static AnalysisEngineDescription getClassifierDescription(File file) throws ResourceInitializationException {
        return AnalysisEngineFactory.createPrimitiveDescription(ArgumentIdentifier.class, new Object[]{GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH, file.toString()});
    }

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        MatchingAnnotationExtractor matchingAnnotationExtractor = new MatchingAnnotationExtractor(Token.class, new SimpleFeatureExtractor[]{new CoveredTextExtractor(), new StemExtractor(), new POSExtractor()});
        this.perPredicateExtractor = new NamingExtractor("Predicate", new SimpleFeatureExtractor[]{new MatchingAnnotationExtractor(Token.class, matchingAnnotationExtractor), new MatchingAnnotationExtractor(TreebankNode.class, new SubCategorizationExtractor())});
        this.perConstituentExtractor = new NamingExtractor("Constituent", new SimpleFeatureExtractor[]{new NodeTypeExtractor(), new HeadWordExtractor(matchingAnnotationExtractor), new CleartkExtractor(Token.class, matchingAnnotationExtractor, new CleartkExtractor.Context[]{new CleartkExtractor.FirstCovered(1)}), new CleartkExtractor(Token.class, matchingAnnotationExtractor, new CleartkExtractor.Context[]{new CleartkExtractor.LastCovered(1)}), new NamedEntityExtractor()});
        this.perPredicatAndConstituentExtractor = new NamingAnnotationPairFeatureExtractor("PredicateAndConstituent", new MatchingAnnotationPairExtractor(TreebankNode.class, TreebankNode.class, new AnnotationPairFeatureExtractor[]{new SyntacticPathExtractor(new NodeTypeExtractor()), new RelativePositionExtractor()}));
    }

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        Collection select = JCasUtil.select(jCas, Sentence.class);
        this.nSentences = 0;
        this.nPredicates = 0;
        this.nConstituents = 0;
        Iterator it = select.iterator();
        while (it.hasNext()) {
            processSentence(jCas, (Sentence) it.next());
        }
        Logger logger = this.logger;
        Object[] objArr = new Object[3];
        objArr[0] = Integer.valueOf(this.nSentences);
        objArr[1] = Integer.valueOf(this.nPredicates);
        objArr[2] = Integer.valueOf(this.nPredicates == 0 ? 0 : this.nConstituents / this.nPredicates);
        logger.info(String.format("processed %d sentences, %d predicates, ~%d constituents per predicate", objArr));
    }

    void processSentence(JCas jCas, Sentence sentence) throws AnalysisEngineProcessException {
        this.nSentences++;
        if (sentence.getCoveredText().length() > 40) {
            this.logger.fine(String.format("process sentence \"%s ...\"", sentence.getCoveredText().substring(0, 39)));
        } else {
            this.logger.fine(String.format("process sentence \"%s\"", sentence.getCoveredText()));
        }
        TopTreebankNode selectFirstMatching = AnnotationUtil.selectFirstMatching(jCas, TopTreebankNode.class, sentence);
        if (selectFirstMatching == null) {
            CleartkExtractorException.noAnnotationInWindow(TopTreebankNode.class, sentence);
        }
        ArrayList arrayList = new ArrayList(200);
        collectConstituents(selectFirstMatching, arrayList);
        ArrayList arrayList2 = new ArrayList(arrayList.size());
        Iterator<TreebankNode> it = arrayList.iterator();
        while (it.hasNext()) {
            arrayList2.add(this.perConstituentExtractor.extract(jCas, it.next()));
        }
        Iterator it2 = JCasUtil.selectCovered(jCas, Predicate.class, sentence).iterator();
        while (it2.hasNext()) {
            processPredicate(jCas, (Predicate) it2.next(), arrayList, arrayList2);
        }
    }

    public void processPredicate(JCas jCas, Predicate predicate, List<TreebankNode> list, List<List<Feature>> list2) throws AnalysisEngineProcessException {
        this.nPredicates++;
        ArrayList arrayList = new ArrayList(12);
        arrayList.addAll(this.perPredicateExtractor.extract(jCas, predicate.getAnnotation()));
        for (int i = 0; i < list.size(); i++) {
            this.nConstituents++;
            TreebankNode treebankNode = list.get(i);
            Instance instance = new Instance();
            instance.addAll(this.perPredicatAndConstituentExtractor.extract(jCas, treebankNode, predicate.getAnnotation()));
            instance.addAll(list2.get(i));
            instance.addAll(arrayList);
            if (isTraining()) {
                instance.setOutcome(false);
                int i2 = 0;
                while (true) {
                    if (i2 >= predicate.getArguments().size()) {
                        break;
                    }
                    if (predicate.getArguments(i2).getAnnotation().equals(treebankNode)) {
                        instance.setOutcome(true);
                        break;
                    }
                    i2++;
                }
            }
            if (isTraining()) {
                this.dataWriter.write(instance);
            } else if (((Boolean) this.classifier.classify(instance.getFeatures())).booleanValue()) {
                SemanticArgument semanticArgument = new SemanticArgument(jCas);
                semanticArgument.setAnnotation(treebankNode);
                semanticArgument.setBegin(treebankNode.getBegin());
                semanticArgument.setEnd(treebankNode.getEnd());
                semanticArgument.setLabel("?");
                semanticArgument.addToIndexes();
                List list3 = UIMAUtil.toList(predicate.getArguments(), Argument.class);
                list3.add(semanticArgument);
                predicate.setArguments(UIMAUtil.toFSArray(jCas, list3));
            }
        }
    }

    protected void collectConstituents(TreebankNode treebankNode, List<TreebankNode> list) {
        if (!(treebankNode instanceof TopTreebankNode)) {
            list.add(treebankNode);
        }
        if (treebankNode.getChildren() == null) {
            return;
        }
        int size = treebankNode.getChildren().size();
        for (int i = 0; i < size; i++) {
            collectConstituents(treebankNode.getChildren(i), list);
        }
    }
}
