package org.dkpro.tc.features.pair.core.ngram.meta;

import de.tudarmstadt.ukp.dkpro.core.api.frequency.util.FrequencyDistribution;
import java.io.IOException;
import java.util.List;
import java.util.Set;
import org.apache.uima.UimaContext;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.dkpro.tc.api.exception.TextClassificationException;
import org.dkpro.tc.api.features.util.FeatureUtil;
import org.dkpro.tc.api.type.TextClassificationTarget;
import org.dkpro.tc.features.ngram.util.KeywordNGramUtils;
import org.dkpro.tc.features.pair.core.ngram.LuceneKeywordPFE;

/* loaded from: input_file:org/dkpro/tc/features/pair/core/ngram/meta/LuceneKeywordPMetaCollector.class */
public class LuceneKeywordPMetaCollector extends LucenePMetaCollectorBase {

    @ConfigurationParameter(name = LuceneKeywordPFE.PARAM_KEYWORD_NGRAM_MIN_N_VIEW1, mandatory = true, defaultValue = {"1"})
    private int ngramMinN1;

    @ConfigurationParameter(name = LuceneKeywordPFE.PARAM_KEYWORD_NGRAM_MIN_N_VIEW2, mandatory = true, defaultValue = {"1"})
    private int ngramMinN2;

    @ConfigurationParameter(name = LuceneKeywordPFE.PARAM_KEYWORD_NGRAM_MIN_N, mandatory = true, defaultValue = {"1"})
    private int ngramMinN;

    @ConfigurationParameter(name = LuceneKeywordPFE.PARAM_KEYWORD_NGRAM_MAX_N_VIEW1, mandatory = true, defaultValue = {"3"})
    private int ngramMaxN1;

    @ConfigurationParameter(name = LuceneKeywordPFE.PARAM_KEYWORD_NGRAM_MAX_N_VIEW2, mandatory = true, defaultValue = {"3"})
    private int ngramMaxN2;

    @ConfigurationParameter(name = LuceneKeywordPFE.PARAM_KEYWORD_NGRAM_MAX_N, mandatory = true, defaultValue = {"3"})
    private int ngramMaxN;

    @ConfigurationParameter(name = LuceneKeywordPFE.PARAM_NGRAM_KEYWORDS_FILE, mandatory = true)
    protected String keywordsFile;

    @ConfigurationParameter(name = LuceneKeywordPFE.PARAM_KEYWORD_NGRAM_MARK_SENTENCE_BOUNDARY, mandatory = false, defaultValue = {"true"})
    private boolean markSentenceBoundary;

    @ConfigurationParameter(name = LuceneKeywordPFE.PARAM_KEYWORD_NGRAM_MARK_SENTENCE_LOCATION, mandatory = false, defaultValue = {"false"})
    private boolean markSentenceLocation;

    @ConfigurationParameter(name = LuceneKeywordPFE.PARAM_KEYWORD_NGRAM_INCLUDE_COMMAS, mandatory = false, defaultValue = {"false"})
    private boolean includeCommas;
    private Set<String> keywords;

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        try {
            this.keywords = FeatureUtil.getStopwords(this.keywordsFile, true);
        } catch (IOException e) {
            throw new ResourceInitializationException(e);
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.dkpro.tc.features.pair.core.ngram.meta.LucenePMetaCollectorBase
    public FrequencyDistribution<String> getNgramsFD(List<JCas> list) throws TextClassificationException {
        return KeywordNGramUtils.getMultipleViewKeywordNgrams(list, this.ngramMinN, this.ngramMaxN, this.markSentenceBoundary, this.markSentenceLocation, this.includeCommas, this.keywords);
    }

    protected FrequencyDistribution<String> getNgramsFD(JCas jCas) throws TextClassificationException {
        return null;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.dkpro.tc.features.pair.core.ngram.meta.LucenePMetaCollectorBase
    public FrequencyDistribution<String> getNgramsFDView1(JCas jCas, TextClassificationTarget textClassificationTarget) throws TextClassificationException {
        return KeywordNGramUtils.getDocumentKeywordNgrams(jCas, textClassificationTarget, this.ngramMinN1, this.ngramMaxN1, this.markSentenceBoundary, this.markSentenceLocation, this.includeCommas, this.keywords);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.dkpro.tc.features.pair.core.ngram.meta.LucenePMetaCollectorBase
    public FrequencyDistribution<String> getNgramsFDView2(JCas jCas, TextClassificationTarget textClassificationTarget) throws TextClassificationException {
        return KeywordNGramUtils.getDocumentKeywordNgrams(jCas, textClassificationTarget, this.ngramMinN2, this.ngramMaxN2, this.markSentenceBoundary, this.markSentenceLocation, this.includeCommas, this.keywords);
    }

    protected String getFieldName() {
        return LuceneKeywordPFE.KEYWORD_NGRAM_FIELD;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.dkpro.tc.features.pair.core.ngram.meta.LucenePMetaCollectorBase
    public String getFieldNameView1() {
        return LuceneKeywordPFE.KEYWORD_NGRAM_FIELD1;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.dkpro.tc.features.pair.core.ngram.meta.LucenePMetaCollectorBase
    public String getFieldNameView2() {
        return LuceneKeywordPFE.KEYWORD_NGRAM_FIELD2;
    }
}
