package org.opensextant.extractors.geo;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
import org.opensextant.ConfigException;
import org.opensextant.extraction.MatchFilter;
import org.opensextant.util.LuceneStopwords;
import org.opensextant.util.TextUtils;
import org.supercsv.io.CsvMapReader;
import org.supercsv.prefs.CsvPreference;

/* loaded from: input_file:org/opensextant/extractors/geo/TagFilter.class */
public class TagFilter extends MatchFilter {
    Set<String> nonPlaceStopTerms;
    boolean filter_stopwords = true;
    boolean filter_on_case = true;
    private final Map<String, Set<Object>> langStopFilters = new HashMap();

    public TagFilter() throws IOException {
        this.nonPlaceStopTerms = null;
        this.nonPlaceStopTerms = new HashSet();
        for (String str : new String[]{"/filters/non-placenames.csv", "/filters/non-placenames,spa.csv", "/filters/non-placenames,deu.csv", "/filters/non-placenames,rus,ukr.csv", "/filters/non-placenames,acronym.csv"}) {
            this.nonPlaceStopTerms.addAll(loadExclusions(GazetteerMatcher.class.getResourceAsStream(str)));
        }
        loadLanguageStopwords(new String[]{"ja", "ko", "zh", "ar", "fa", "ur", "th", "tr", "id", "tl", "vi", "ru", "it", "pt", "de", "nl", "es", "en"});
    }

    private void loadLanguageStopwords(String[] strArr) throws IOException, ConfigException {
        for (String str : strArr) {
            this.langStopFilters.put(str, LuceneStopwords.getStopwords(new ClasspathResourceLoader(TagFilter.class), str));
        }
    }

    private void loadStopSet(URL url, String str) throws IOException {
        InputStream openStream = url.openStream();
        try {
            HashSet hashSet = new HashSet();
            for (String str2 : IOUtils.readLines(openStream, StandardCharsets.UTF_8)) {
                if (!str2.trim().startsWith("#")) {
                    hashSet.add(str2.trim().toLowerCase());
                }
            }
            if (hashSet.isEmpty()) {
                throw new ConfigException("No terms found in stop filter file " + url);
            }
            this.langStopFilters.put(str, hashSet);
            if (openStream != null) {
                openStream.close();
            }
        } catch (Throwable th) {
            if (openStream != null) {
                try {
                    openStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    public void enableStopwordFilter(boolean z) {
        this.filter_stopwords = z;
    }

    public void enableCaseSensitive(boolean z) {
        this.filter_on_case = z;
    }

    public boolean filterOut(String str) {
        if (this.filter_on_case && StringUtils.isAllLowerCase(str)) {
            return true;
        }
        if (this.filter_stopwords) {
            return this.nonPlaceStopTerms.contains(str.toLowerCase().replace('-', ' '));
        }
        return false;
    }

    public boolean filterOut(PlaceCandidate placeCandidate, String str, boolean z, boolean z2) {
        if (str == null) {
            if (placeCandidate.isASCII()) {
                return false;
            }
            if (placeCandidate.getLength() < 4) {
                return assessAllFilters(placeCandidate.getText().toLowerCase());
            }
        }
        if (this.langStopFilters.containsKey(str)) {
            return this.langStopFilters.get(str).contains(placeCandidate.getText().toLowerCase());
        }
        if (!z && Character.isUpperCase(placeCandidate.getText().charAt(0)) && !placeCandidate.isUpper()) {
            return false;
        }
        boolean isCJK = TextUtils.isCJK(str);
        if (isCJK && filterOutCJK(placeCandidate)) {
            return true;
        }
        return (isCJK || z2 || z || !placeCandidate.isLower() || placeCandidate.getLength() >= 10) ? false : true;
    }

    public boolean filterOut(String str, String str2) {
        String str3 = str != null ? str : "en";
        if (this.langStopFilters.containsKey(str3)) {
            return this.langStopFilters.get(str3).contains(str2);
        }
        return false;
    }

    private boolean filterOutCJK(PlaceCandidate placeCandidate) {
        return placeCandidate.getLength() < 5 && TextUtils.count_ws(placeCandidate.getText()) > 0;
    }

    public boolean assessAllFilters(String str) {
        Iterator<Set<Object>> it = this.langStopFilters.values().iterator();
        while (it.hasNext()) {
            if (it.next().contains(str)) {
                return true;
            }
        }
        return false;
    }

    public static Set<String> loadExclusions(InputStream inputStream) throws ConfigException {
        try {
            InputStreamReader inputStreamReader = new InputStreamReader(inputStream);
            try {
                CsvMapReader csvMapReader = new CsvMapReader(inputStreamReader, CsvPreference.EXCEL_PREFERENCE);
                String[] header = csvMapReader.getHeader(true);
                HashSet hashSet = new HashSet();
                while (true) {
                    Map read = csvMapReader.read(header);
                    if (read == null) {
                        csvMapReader.close();
                        inputStreamReader.close();
                        return hashSet;
                    }
                    String str = (String) read.get("exclusion");
                    if (!StringUtils.isBlank(str) && !str.startsWith("#")) {
                        String trim = str.trim();
                        hashSet.add(trim);
                        hashSet.add(trim.toLowerCase());
                    }
                }
            } finally {
            }
        } catch (Exception e) {
            throw new ConfigException("Could not load exclusions.", e);
        }
    }
}
