package org.opensextant.extractors.geo.rules;

import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.opensextant.data.Place;
import org.opensextant.extractors.geo.PlaceCandidate;
import org.opensextant.extractors.geo.ScoredPlace;
import org.opensextant.util.TextUtils;

/* loaded from: input_file:org/opensextant/extractors/geo/rules/NonsenseFilter.class */
public class NonsenseFilter extends GeocodeRule {
    public static final int GENERIC_ONE_WORD = 10;
    public static Pattern tokenizer = Pattern.compile("[\\s+\\p{Punct}]+");
    private static int MAX_NONSENSE_PHRASE_LEN = 20;
    private static int MIN_PHONETIC_MATCH_LEN = 4;
    private static Pattern wsRedux = Pattern.compile("[-\\s+`]");
    static Pattern validAbbrev = Pattern.compile("\\w+[.] \\S+");
    static Pattern invalidPunct = Pattern.compile("[\\p{Punct}&&[^'`]]+\\s+|[\"—―“”″]");
    static Pattern trivialNumerics = Pattern.compile("\\w+[\\p{Punct}\\s]+\\d+");
    static Pattern anyInvalidPunct = Pattern.compile("[[\\p{Punct}—―“”″]&&[^-_.'`]]+");

    protected static final String phoneticRedux(String str) {
        return wsRedux.matcher(str).replaceAll("");
    }

    protected static final boolean isPhoneticMatch(String str, String str2) {
        return phoneticRedux(str2).equalsIgnoreCase(str);
    }

    @Override // org.opensextant.extractors.geo.rules.GeocodeRule
    public void evaluate(List<PlaceCandidate> list) {
        for (PlaceCandidate placeCandidate : list) {
            if (!placeCandidate.isValid()) {
                if (irregularPunctPatterns(placeCandidate.getText())) {
                    placeCandidate.setFilteredOut(true);
                    placeCandidate.addRule("Nonsense,Punct");
                } else if (placeCandidate.getLength() <= MAX_NONSENSE_PHRASE_LEN) {
                    if (placeCandidate.getLength() >= 10 || !trivialNumerics.matcher(placeCandidate.getText()).matches()) {
                        if (placeCandidate.isLower()) {
                            String[] split = tokenizer.split(placeCandidate.getTextnorm());
                            HashSet hashSet = new HashSet();
                            int length = split.length;
                            int i = 0;
                            while (true) {
                                if (i >= length) {
                                    break;
                                }
                                String str = split[i];
                                if (hashSet.contains(str)) {
                                    placeCandidate.setFilteredOut(true);
                                    placeCandidate.addRule("Nonsense,Repeated,Lower");
                                    break;
                                } else {
                                    hashSet.add(str);
                                    i++;
                                }
                            }
                        }
                        if (!placeCandidate.isFilteredOut() && placeCandidate.getLength() <= 10) {
                            assessPhoneticMatch(placeCandidate);
                        }
                    } else {
                        placeCandidate.setFilteredOut(true);
                        placeCandidate.addRule("Nonsense,Numbers");
                    }
                }
            }
        }
    }

    public void assessPhoneticMatch(PlaceCandidate placeCandidate) {
        boolean z = false;
        String phoneticRedux = phoneticRedux(placeCandidate.getTextnorm());
        String str = null;
        this.log.debug("Testing phrase {} phonetic:{}", placeCandidate.getTextnorm(), phoneticRedux);
        Iterator<ScoredPlace> it = placeCandidate.getPlaces().iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            ScoredPlace next = it.next();
            this.log.debug("\tPLACE={}, {}", next, next.getNamenorm());
            boolean hasDiacritics = TextUtils.hasDiacritics(next.getPlaceName());
            if (!hasDiacritics || !placeCandidate.hasDiacritics || !next.getName().equalsIgnoreCase(placeCandidate.getText())) {
                if (!hasDiacritics && !placeCandidate.hasDiacritics) {
                    z = true;
                    break;
                }
                if (placeCandidate.getLength() > MIN_PHONETIC_MATCH_LEN) {
                    if (next.getNamenorm().contains(placeCandidate.getTextnorm())) {
                        z = true;
                        str = "Location-Contains-Name";
                        break;
                    } else if (isPhoneticMatch(phoneticRedux, next.getNamenorm())) {
                        z = true;
                        str = "Matched-Phonetic";
                        break;
                    }
                }
                this.log.debug("\t{} !~ {}", placeCandidate.getText(), next.getNamenorm());
            } else {
                z = true;
                str = "Matched-Diacritics";
                break;
            }
        }
        if (!z) {
            placeCandidate.setFilteredOut(true);
            placeCandidate.addRule("Nonsense,Mismatched,Diacritic");
        } else if (str != null) {
            placeCandidate.addRule(str);
        }
    }

    public static boolean irregularPunctPatterns(String str) {
        return anyInvalidPunct.matcher(str).find();
    }

    public static boolean irregularPunctPatternsComplicated(String str) {
        if (str.indexOf(60) >= 0 || str.indexOf(62) >= 0) {
            return true;
        }
        Matcher matcher = validAbbrev.matcher(str);
        Matcher matcher2 = invalidPunct.matcher(str);
        int i = 0;
        int i2 = 0;
        while (matcher.find()) {
            i++;
        }
        if (str.endsWith(".")) {
            i++;
        }
        while (matcher2.find()) {
            i2++;
        }
        return (i < 0 || i2 != 0) && i < i2 && i2 > 0;
    }

    public static int[] irregularPunct(String str) {
        int i = 0;
        int i2 = 0;
        char c = 0;
        for (char c2 : str.toCharArray()) {
            if (Character.isWhitespace(c2)) {
                i2++;
            }
            if ((Character.isWhitespace(c2) || !Character.isLetterOrDigit(c2)) && !Character.isLetterOrDigit(c) && c != 0) {
                i++;
            }
            c = c2;
        }
        return new int[]{i2, i};
    }

    @Override // org.opensextant.extractors.geo.rules.GeocodeRule
    public void evaluate(PlaceCandidate placeCandidate, Place place) {
    }
}
