package org.dkpro.tc.features.pair.core.ngram;

import com.google.common.collect.MinMaxPriorityQueue;
import de.tudarmstadt.ukp.dkpro.core.api.frequency.util.FrequencyDistribution;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.BytesRef;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.ResourceSpecifier;
import org.dkpro.tc.api.exception.TextClassificationException;
import org.dkpro.tc.api.features.Feature;
import org.dkpro.tc.api.features.PairFeatureExtractor;
import org.dkpro.tc.api.features.meta.MetaCollectorConfiguration;
import org.dkpro.tc.api.type.TextClassificationTarget;
import org.dkpro.tc.features.ngram.base.LuceneFeatureExtractorBase;
import org.dkpro.tc.features.ngram.util.NGramUtils;
import org.dkpro.tc.features.ngram.util.TermFreqTuple;
import org.dkpro.tc.features.pair.core.ngram.meta.ComboUtils;
import org.dkpro.tc.features.pair.core.ngram.meta.LuceneNGramPMetaCollector;

/* loaded from: input_file:org/dkpro/tc/features/pair/core/ngram/LuceneNGramPFE.class */
public class LuceneNGramPFE extends LuceneFeatureExtractorBase implements PairFeatureExtractor {
    public static final String PARAM_NGRAM_MIN_N_VIEW1 = "pairNgramMinNView1";

    @ConfigurationParameter(name = PARAM_NGRAM_MIN_N_VIEW1, mandatory = true, defaultValue = {"1"})
    protected int ngramMinN1;
    public static final String PARAM_NGRAM_MIN_N_VIEW2 = "pairNgramMinNView2";

    @ConfigurationParameter(name = PARAM_NGRAM_MIN_N_VIEW2, mandatory = true, defaultValue = {"1"})
    protected int ngramMinN2;
    public static final String PARAM_NGRAM_MAX_N_VIEW1 = "pairNgramMaxNView1";

    @ConfigurationParameter(name = PARAM_NGRAM_MAX_N_VIEW1, mandatory = true, defaultValue = {"3"})
    protected int ngramMaxN1;
    public static final String PARAM_NGRAM_MAX_N_VIEW2 = "pairNgramMaxNView2";

    @ConfigurationParameter(name = PARAM_NGRAM_MAX_N_VIEW2, mandatory = true, defaultValue = {"3"})
    protected int ngramMaxN2;
    public static final String PARAM_NGRAM_USE_TOP_K_VIEW1 = "pairNgramUseTopK1";

    @ConfigurationParameter(name = PARAM_NGRAM_USE_TOP_K_VIEW1, mandatory = true, defaultValue = {"500"})
    protected int ngramUseTopK1;
    public static final String PARAM_NGRAM_USE_TOP_K_VIEW2 = "pairNgramUseTopK2";

    @ConfigurationParameter(name = PARAM_NGRAM_USE_TOP_K_VIEW2, mandatory = true, defaultValue = {"500"})
    protected int ngramUseTopK2;
    public static final String PARAM_USE_VIEW1_NGRAMS_AS_FEATURES = "useView1NgramsAsFeatures";

    @ConfigurationParameter(name = PARAM_USE_VIEW1_NGRAMS_AS_FEATURES, mandatory = true)
    protected boolean useView1NgramsAsFeatures;
    public static final String PARAM_USE_VIEW2_NGRAMS_AS_FEATURES = "useView2NgramsAsFeatures";

    @ConfigurationParameter(name = PARAM_USE_VIEW2_NGRAMS_AS_FEATURES, mandatory = true)
    protected boolean useView2NgramsAsFeatures;
    public static final String PARAM_USE_VIEWBLIND_NGRAMS_AS_FEATURES = "useViewBlindNgramsAsFeatures";

    @ConfigurationParameter(name = PARAM_USE_VIEWBLIND_NGRAMS_AS_FEATURES, mandatory = true)
    protected boolean useViewBlindNgramsAsFeatures;
    public static final String PARAM_MARK_VIEWBLIND_NGRAMS_WITH_LOCAL_VIEW = "markViewBlindNgramsWithLocalView";

    @ConfigurationParameter(name = PARAM_MARK_VIEWBLIND_NGRAMS_WITH_LOCAL_VIEW, mandatory = false, defaultValue = {"false"})
    protected boolean markViewBlindNgramsWithLocalView;
    public static final String PARAM_NGRAM_BINARY_FEATURE_VALUES_COMBO = "ngramBinaryFeatureValuesCombos";

    @ConfigurationParameter(name = "ngramBinaryFeatureValuesCombos", mandatory = false, defaultValue = {"true"})
    protected boolean ngramBinaryFeatureValuesCombos;
    public static final String LUCENE_NGRAM_FIELD1 = "ngram1";
    public static final String LUCENE_NGRAM_FIELD2 = "ngram2";
    protected FrequencyDistribution<String> topKSetView1;
    protected FrequencyDistribution<String> topKSetView2;

    public List<MetaCollectorConfiguration> getMetaCollectorClasses(Map<String, Object> map) throws ResourceInitializationException {
        return Arrays.asList(new MetaCollectorConfiguration(LuceneNGramPMetaCollector.class, map).addStorageMapping("targetLocation", "sourceLocation", "lucene"));
    }

    public boolean initialize(ResourceSpecifier resourceSpecifier, Map<String, Object> map) throws ResourceInitializationException {
        if (!super.initialize(resourceSpecifier, map)) {
            return false;
        }
        this.topKSetView1 = getTopNgramsView1();
        this.topKSetView2 = getTopNgramsView2();
        return true;
    }

    public Set<Feature> extract(JCas jCas, JCas jCas2) throws TextClassificationException {
        TextClassificationTarget selectSingle = JCasUtil.selectSingle(jCas, TextClassificationTarget.class);
        TextClassificationTarget selectSingle2 = JCasUtil.selectSingle(jCas2, TextClassificationTarget.class);
        FrequencyDistribution<String> documentNgrams = NGramUtils.getDocumentNgrams(jCas, selectSingle, this.ngramLowerCase, this.filterPartialStopwordMatches, this.ngramMinN1, this.ngramMaxN1, this.stopwords);
        FrequencyDistribution<String> documentNgrams2 = NGramUtils.getDocumentNgrams(jCas2, selectSingle2, this.ngramLowerCase, this.filterPartialStopwordMatches, this.ngramMinN2, this.ngramMaxN2, this.stopwords);
        FrequencyDistribution<String> viewNgrams = getViewNgrams(jCas, jCas2);
        Set<Feature> hashSet = new HashSet();
        if (this.useView1NgramsAsFeatures) {
            this.prefix = "view1NG";
            hashSet = addToFeatureArray(documentNgrams, this.topKSetView1, hashSet);
        }
        if (this.useView2NgramsAsFeatures) {
            this.prefix = "view2NG";
            hashSet = addToFeatureArray(documentNgrams2, this.topKSetView2, hashSet);
        }
        if (this.useViewBlindNgramsAsFeatures && !this.markViewBlindNgramsWithLocalView) {
            this.prefix = "allNG";
            hashSet = addToFeatureArray(viewNgrams, this.topKSet, hashSet);
        }
        if (this.useViewBlindNgramsAsFeatures && this.markViewBlindNgramsWithLocalView) {
            this.prefix = "view1allNG";
            Set<Feature> addToFeatureArray = addToFeatureArray(documentNgrams, this.topKSet, hashSet);
            this.prefix = "view2allNG";
            hashSet = addToFeatureArray(documentNgrams2, this.topKSet, addToFeatureArray);
        }
        return hashSet;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Set<Feature> addToFeatureArray(FrequencyDistribution<String> frequencyDistribution, FrequencyDistribution<String> frequencyDistribution2, Set<Feature> set) {
        for (String str : frequencyDistribution2.getKeys()) {
            long count = this.ngramBinaryFeatureValuesCombos ? 1L : frequencyDistribution.getCount(str);
            if (frequencyDistribution.contains(str)) {
                set.add(new Feature(this.prefix + "_" + str, Long.valueOf(count)));
            } else {
                set.add(new Feature(this.prefix + "_" + str, 0));
            }
        }
        return set;
    }

    protected FrequencyDistribution<String> getTopNgrams() throws ResourceInitializationException {
        return getTopNgrams(this.ngramUseTopK, "ngram");
    }

    protected FrequencyDistribution<String> getTopNgramsView1() throws ResourceInitializationException {
        return getTopNgrams(this.ngramUseTopK1, LUCENE_NGRAM_FIELD1);
    }

    protected FrequencyDistribution<String> getTopNgramsView2() throws ResourceInitializationException {
        return getTopNgrams(this.ngramUseTopK2, LUCENE_NGRAM_FIELD2);
    }

    private FrequencyDistribution<String> getTopNgrams(int i, String str) throws ResourceInitializationException {
        Terms terms;
        FrequencyDistribution<String> frequencyDistribution = new FrequencyDistribution<>();
        MinMaxPriorityQueue create = MinMaxPriorityQueue.maximumSize(i).create();
        try {
            Fields fields = MultiFields.getFields(DirectoryReader.open(FSDirectory.open(this.luceneDir)));
            if (fields != null && (terms = fields.terms(str)) != null) {
                TermsEnum it = terms.iterator((TermsEnum) null);
                while (true) {
                    BytesRef next = it.next();
                    if (next == null) {
                        break;
                    }
                    create.add(new TermFreqTuple(next.utf8ToString(), it.totalTermFreq()));
                }
            }
            int size = create.size();
            for (int i2 = 0; i2 < size; i2++) {
                TermFreqTuple termFreqTuple = (TermFreqTuple) create.poll();
                frequencyDistribution.addSample(termFreqTuple.getTerm(), termFreqTuple.getFreq());
            }
            return frequencyDistribution;
        } catch (Exception e) {
            throw new ResourceInitializationException(e);
        }
    }

    protected FrequencyDistribution<String> getViewNgrams(JCas jCas, JCas jCas2) throws TextClassificationException {
        ArrayList arrayList = new ArrayList();
        arrayList.add(jCas);
        arrayList.add(jCas2);
        return ComboUtils.getMultipleViewNgrams(arrayList, null, this.ngramLowerCase, this.filterPartialStopwordMatches, this.ngramMinN, this.ngramMaxN, this.stopwords);
    }

    protected String getFieldName() {
        return "ngram";
    }

    protected int getTopN() {
        return this.ngramUseTopK;
    }

    protected String getFeaturePrefix() {
        return "allNG";
    }
}
