package org.cleartk.ne.term;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.lang.reflect.Constructor;
import java.util.ArrayList;
import java.util.List;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.Type;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.cleartk.ne.term.util.SimpleTermFinder;
import org.cleartk.ne.term.util.TermFinder;
import org.cleartk.ne.term.util.TermList;
import org.cleartk.ne.term.util.TermMatch;
import org.cleartk.token.tokenizer.PennTreebankTokenizer;
import org.cleartk.token.tokenizer.Token;
import org.cleartk.util.UIMAUtil;
import org.uimafit.component.JCasAnnotator_ImplBase;
import org.uimafit.descriptor.ConfigurationParameter;
import org.uimafit.factory.ConfigurationParameterFactory;
import org.uimafit.factory.initializable.InitializableFactory;

/* loaded from: input_file:org/cleartk/ne/term/TermFinderAnnotator.class */
public class TermFinderAnnotator extends JCasAnnotator_ImplBase {
    public static final String TERM_LIST_FILE_NAMES_FILE_NAME_DESCRIPTION = "Provides the name of a file that contains file names of term lists that are to be loaded. Each line of the file should contain the name of a term list followed by the name of the file that contains the terms, a boolean ('true' or 'false')  that indicates whether the file should be treated as case sensitive followed optionally by separator string to be used to separate  an id from a term if the file contains ids. The values on each line should be tab delimited. ";

    @ConfigurationParameter(mandatory = true, description = TERM_LIST_FILE_NAMES_FILE_NAME_DESCRIPTION)
    public String termListFileNamesFileName;

    @ConfigurationParameter(description = "names the class of the type system type from which to extract tokens. Any annotation that contains tokens can be used (e.g. sentence, paragraph, document).  If no value is given for this parameter, then all tokens will be searched. An example value might be 'org.cleartk.type.Sentence'")
    private String windowClassName;

    @ConfigurationParameter(mandatory = true, defaultValue = {"org.cleartk.type.Token"}, description = "names the class of the type system type corresponding to tokens. ")
    private String tokenClassName;

    @ConfigurationParameter(description = "provides the class name of a class that extends org.cleartk.ne.term.TermMatchAnnotationCreator. If this parameter is not given a value, then the parameter 'termMatchAnnotationClassName'  must be given a value.")
    private String termMatchAnnotationCreatorClassName;

    @ConfigurationParameter(defaultValue = {"org.cleartk.ne.type.NamedEntityMention"}, description = "names the class of the type system type that specifies the annotations created of found term matches. One annotation is created for each term match found of the given type specified by this parameter. This parameter is ignored if 'termMatchAnnotationCreatorClassName' is given a value.")
    private String termMatchAnnotationClassName;
    TermFinder caseSensitiveTermFinder;
    TermFinder caseInsensitiveTermFinder;
    boolean allTokens = true;
    boolean typesInitialized = false;
    protected Class<? extends Annotation> sentenceClass;
    protected Type sentenceType;
    protected Class<? extends Annotation> tokenClass;
    protected Type tokenType;
    TermMatchAnnotationCreator annotationCreator;
    Constructor<? extends Annotation> annotationConstructor;
    public static final String PARAM_TERM_LIST_FILE_NAMES_FILE_NAME = ConfigurationParameterFactory.createConfigurationParameterName(TermFinderAnnotator.class, "termListFileNamesFileName");
    public static final String PARAM_WINDOW_CLASS_NAME = ConfigurationParameterFactory.createConfigurationParameterName(TermFinderAnnotator.class, "windowClassName");
    public static final String PARAM_TOKEN_CLASS_NAME = ConfigurationParameterFactory.createConfigurationParameterName(TermFinderAnnotator.class, "tokenClassName");
    public static final String PARAM_TERM_MATCH_ANNOTATION_CREATOR_CLASS_NAME = ConfigurationParameterFactory.createConfigurationParameterName(TermFinderAnnotator.class, "termMatchAnnotationCreatorClassName");
    public static final String PARAM_TERM_MATCH_ANNOTATION_CLASS_NAME = ConfigurationParameterFactory.createConfigurationParameterName(TermFinderAnnotator.class, "termMatchAnnotationClassName");

    /* JADX WARN: Finally extract failed */
    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        try {
            super.initialize(uimaContext);
            BufferedReader bufferedReader = new BufferedReader(new FileReader(this.termListFileNamesFileName));
            while (true) {
                try {
                    String readLine = bufferedReader.readLine();
                    if (readLine == null) {
                        break;
                    }
                    String[] split = readLine.trim().split("\t");
                    String str = split[0];
                    String str2 = split[1];
                    boolean parseBoolean = Boolean.parseBoolean(split[2]);
                    String str3 = split.length == 4 ? split[3] : null;
                    if (parseBoolean && this.caseSensitiveTermFinder == null) {
                        this.caseSensitiveTermFinder = new SimpleTermFinder(true, new PennTreebankTokenizer());
                    }
                    if (!parseBoolean && this.caseInsensitiveTermFinder == null) {
                        this.caseInsensitiveTermFinder = new SimpleTermFinder(false, new PennTreebankTokenizer());
                    }
                    TermList loadSimpleFile = TermList.loadSimpleFile(str, new File(str2), str3);
                    if (parseBoolean) {
                        this.caseSensitiveTermFinder.addTermList(loadSimpleFile);
                    } else {
                        this.caseInsensitiveTermFinder.addTermList(loadSimpleFile);
                    }
                } catch (Throwable th) {
                    bufferedReader.close();
                    throw th;
                }
            }
            bufferedReader.close();
            if (this.windowClassName != null) {
                this.allTokens = false;
                this.sentenceClass = InitializableFactory.getClass(this.windowClassName, Annotation.class);
            }
            this.tokenClass = InitializableFactory.getClass(this.tokenClassName, Annotation.class);
            if (this.termMatchAnnotationCreatorClassName == null || this.termMatchAnnotationCreatorClassName.equals("")) {
                this.annotationConstructor = InitializableFactory.getClass(this.termMatchAnnotationClassName, Annotation.class).getConstructor(JCas.class, Integer.TYPE, Integer.TYPE);
            } else {
                this.annotationCreator = (TermMatchAnnotationCreator) InitializableFactory.create(uimaContext, this.termMatchAnnotationCreatorClassName, TermMatchAnnotationCreator.class);
            }
        } catch (Exception e) {
            throw new ResourceInitializationException(e);
        }
    }

    private void initializeTypes(JCas jCas) {
        if (!this.allTokens) {
            this.sentenceType = UIMAUtil.getCasType(jCas, this.sentenceClass);
        }
        this.tokenType = UIMAUtil.getCasType(jCas, this.tokenClass);
        this.typesInitialized = true;
    }

    private Token createToken(Annotation annotation) {
        return new Token(annotation.getBegin(), annotation.getEnd(), annotation.getCoveredText());
    }

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        if (!this.typesInitialized) {
            initializeTypes(jCas);
        }
        ArrayList arrayList = new ArrayList();
        if (this.allTokens) {
            FSIterator it = jCas.getAnnotationIndex(this.tokenType).iterator();
            while (it.hasNext()) {
                arrayList.add(createToken((Annotation) it.next()));
            }
            findTerms(jCas, arrayList);
            return;
        }
        FSIterator it2 = jCas.getAnnotationIndex(this.sentenceType).iterator();
        while (it2.hasNext()) {
            arrayList.clear();
            FSIterator subiterator = jCas.getAnnotationIndex(this.tokenType).subiterator((Annotation) it2.next());
            while (subiterator.hasNext()) {
                arrayList.add(createToken((Annotation) subiterator.next()));
            }
            findTerms(jCas, arrayList);
        }
    }

    public void findTerms(JCas jCas, List<Token> list) throws AnalysisEngineProcessException {
        if (this.caseSensitiveTermFinder != null) {
            findTerms(jCas, list, this.caseSensitiveTermFinder);
        }
        if (this.caseInsensitiveTermFinder != null) {
            findTerms(jCas, list, this.caseInsensitiveTermFinder);
        }
    }

    public void findTerms(JCas jCas, List<Token> list, TermFinder termFinder) throws AnalysisEngineProcessException {
        for (TermMatch termMatch : termFinder.getMatches(list)) {
            if (this.annotationCreator != null) {
                this.annotationCreator.createTermMatchAnnotation(jCas, termMatch);
            } else {
                try {
                    this.annotationConstructor.newInstance(jCas, Integer.valueOf(termMatch.getBegin()), Integer.valueOf(termMatch.getEnd())).addToIndexes();
                } catch (Exception e) {
                    throw new AnalysisEngineProcessException(e);
                }
            }
        }
    }

    public void setTermListFileNamesFileName(String str) {
        this.termListFileNamesFileName = str;
    }

    public void setWindowClassName(String str) {
        this.windowClassName = str;
    }

    public void setTokenClassName(String str) {
        this.tokenClassName = str;
    }

    public void setTermMatchAnnotationCreatorClassName(String str) {
        this.termMatchAnnotationCreatorClassName = str;
    }

    public void setTermMatchAnnotationClassName(String str) {
        this.termMatchAnnotationClassName = str;
    }
}
