package org.dkpro.tc.features.pair.core.ngram;

import de.tudarmstadt.ukp.dkpro.core.api.frequency.util.FrequencyDistribution;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.ResourceSpecifier;
import org.dkpro.tc.api.exception.TextClassificationException;
import org.dkpro.tc.api.features.Feature;
import org.dkpro.tc.api.features.PairFeatureExtractor;
import org.dkpro.tc.api.features.meta.MetaCollectorConfiguration;
import org.dkpro.tc.api.type.TextClassificationTarget;
import org.dkpro.tc.features.ngram.util.KeywordNGramUtils;
import org.dkpro.tc.features.pair.core.ngram.meta.ComboUtils;
import org.dkpro.tc.features.pair.core.ngram.meta.LuceneKeywordCPMetaCollector;

/* loaded from: input_file:org/dkpro/tc/features/pair/core/ngram/LuceneKeywordCPFE.class */
public class LuceneKeywordCPFE extends LuceneKeywordPFE implements PairFeatureExtractor {
    public static final String PARAM_KEYWORD_NGRAM_MIN_N_COMBO = "keywordNgramMinNCombo";

    @ConfigurationParameter(name = PARAM_KEYWORD_NGRAM_MIN_N_COMBO, mandatory = false, defaultValue = {"2"})
    protected int ngramMinNCombo;
    public static final String PARAM_KEYWORD_NGRAM_MAX_N_COMBO = "keywordNgramMaxNCombo";

    @ConfigurationParameter(name = PARAM_KEYWORD_NGRAM_MAX_N_COMBO, mandatory = false, defaultValue = {"4"})
    protected int ngramMaxNCombo;
    public static final String PARAM_KEYWORD_NGRAM_USE_TOP_K_COMBO = "keywordNgramUseTopKCombo";

    @ConfigurationParameter(name = PARAM_KEYWORD_NGRAM_USE_TOP_K_COMBO, mandatory = false, defaultValue = {"500"})
    protected int ngramUseTopKCombo;
    public static final String PARAM_KEYWORD_NGRAM_SYMMETRY_COMBO = "keywordNgramUseSymmetricalCombos";

    @ConfigurationParameter(name = PARAM_KEYWORD_NGRAM_SYMMETRY_COMBO, mandatory = false, defaultValue = {"false"})
    protected boolean ngramUseSymmetricalCombos;
    public static final String KEYWORD_NGRAM_FIELD_COMBO = "ngramKeywordCombo";
    protected FrequencyDistribution<String> topKSetCombo;
    private boolean useNgramScreening;

    @Override // org.dkpro.tc.features.pair.core.ngram.LuceneKeywordPFE
    public List<MetaCollectorConfiguration> getMetaCollectorClasses(Map<String, Object> map) throws ResourceInitializationException {
        return Arrays.asList(new MetaCollectorConfiguration(LuceneKeywordCPMetaCollector.class, map).addStorageMapping("targetLocation", "sourceLocation", "lucene"));
    }

    @Override // org.dkpro.tc.features.pair.core.ngram.LuceneKeywordPFE
    public boolean initialize(ResourceSpecifier resourceSpecifier, Map<String, Object> map) throws ResourceInitializationException {
        this.useNgramScreening = false;
        if (!super.initialize(resourceSpecifier, map)) {
            return false;
        }
        this.useNgramScreening = true;
        this.fieldOfTheMoment = KEYWORD_NGRAM_FIELD_COMBO;
        this.topNOfTheMoment = this.ngramUseTopKCombo;
        this.topKSetCombo = getTopNgrams();
        return true;
    }

    @Override // org.dkpro.tc.features.pair.core.ngram.LuceneKeywordPFE
    public Set<Feature> extract(JCas jCas, JCas jCas2) throws TextClassificationException {
        FrequencyDistribution<String> combinedNgrams = ComboUtils.getCombinedNgrams(KeywordNGramUtils.getDocumentKeywordNgrams(jCas, JCasUtil.selectSingle(jCas, TextClassificationTarget.class), this.ngramMinN1, this.ngramMaxN1, this.markSentenceBoundary, this.markSentenceLocation, this.includeCommas, this.keywords), KeywordNGramUtils.getDocumentKeywordNgrams(jCas2, JCasUtil.selectSingle(jCas2, TextClassificationTarget.class), this.ngramMinN2, this.ngramMaxN2, this.markSentenceBoundary, this.markSentenceLocation, this.includeCommas, this.keywords), this.ngramMinNCombo, this.ngramMaxNCombo, this.ngramUseSymmetricalCombos);
        this.prefix = "comboKNG";
        HashSet hashSet = new HashSet();
        addToFeatureArray(combinedNgrams, this.topKSetCombo, hashSet);
        return hashSet;
    }

    protected boolean passesScreening(String str) {
        if (!this.useNgramScreening) {
            return true;
        }
        String str2 = str.split(ComboUtils.JOINT)[0];
        String str3 = str.split(ComboUtils.JOINT)[1];
        int length = str2.split("_").length + str3.split("_").length;
        return this.topKSetView1.contains(str2) && this.topKSet.contains(str2) && this.topKSetView2.contains(str3) && this.topKSet.contains(str3) && length <= this.ngramMaxNCombo && length >= this.ngramMinNCombo;
    }
}
