package org.opensextant.extractors.geo.social;

import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.solr.client.solrj.SolrServerException;
import org.opensextant.ConfigException;
import org.opensextant.data.Country;
import org.opensextant.data.Geocoding;
import org.opensextant.data.Language;
import org.opensextant.data.Place;
import org.opensextant.data.TextInput;
import org.opensextant.data.social.Message;
import org.opensextant.data.social.MessageParseException;
import org.opensextant.data.social.Tweet;
import org.opensextant.extraction.ExtractionException;
import org.opensextant.extraction.MatchFilter;
import org.opensextant.extraction.TextMatch;
import org.opensextant.extractors.geo.PlaceCandidate;
import org.opensextant.extractors.geo.PlaceGeocoder;
import org.opensextant.extractors.geo.ScoredPlace;
import org.opensextant.extractors.geo.SolrGazetteer;
import org.opensextant.extractors.geo.rules.GeocodeRule;
import org.opensextant.extractors.xcoord.GeocoordMatch;
import org.opensextant.extractors.xcoord.XCoord;
import org.opensextant.processing.Parameters;
import org.opensextant.util.GeodeticUtility;
import org.opensextant.util.GeonamesUtility;
import org.opensextant.util.TextUtils;

/* loaded from: input_file:org/opensextant/extractors/geo/social/XponentGeocoder.class */
public class XponentGeocoder extends GeoInferencer {
    protected SolrGazetteer gazetteer = null;
    protected XCoord userlocX = null;
    protected PlaceGeocoder tagger = null;
    protected long recordsWithCoord = 0;
    protected long recordsWithTZ = 0;
    protected long recordsWithPlace = 0;
    protected MatchFilter profilePlaceFilter = null;
    protected UserProfileLocationRule profileRule = null;
    private static final Set<String> usableFeatures = new HashSet();
    static Set<String> allowedCountryCodeNames;
    static Set<String> disallowedCountryNames;
    public static final int DEFAULT_COUNTRY_CONF = 75;
    private static final Pattern remove_punct;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/opensextant/extractors/geo/social/XponentGeocoder$InferredCountry.class */
    public static class InferredCountry {
        String id = null;
        Country country = null;
        int score = 0;
        boolean validMatch = false;

        InferredCountry() {
        }

        public String toString() {
            return String.format("%s %d %s", this.id, Integer.valueOf(this.score), Boolean.valueOf(this.validMatch));
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/opensextant/extractors/geo/social/XponentGeocoder$UserProfileLocationRule.class */
    public class UserProfileLocationRule extends GeocodeRule {
        public Map<String, InferredCountry> inferredCountries = null;
        public String timezone = null;
        public int utcOffset = 999999;
        public Tweet currentTweet = null;
        public String cc = null;
        public String adm1 = null;
        public boolean validTZ = false;
        public Place currentGeo = null;
        final int UTC_LON_WINDOW = 5;

        /* JADX INFO: Access modifiers changed from: package-private */
        public UserProfileLocationRule() {
        }

        public void resetBefore(Tweet tweet, Place place) {
            this.currentTweet = tweet;
            this.inferredCountries = null;
            this.timezone = null;
            this.validTZ = Message.validTZ(this.currentTweet);
            if (place == null) {
                this.cc = null;
                this.adm1 = null;
                return;
            }
            this.currentGeo = place;
            this.cc = place.getCountryCode();
            this.adm1 = place.getAdmin1();
            if (this.cc == null && this.validTZ) {
                this.inferredCountries = XponentGeocoder.this.getInferredCountry(this.currentTweet);
            }
        }

        @Override // org.opensextant.extractors.geo.rules.GeocodeRule
        public void evaluate(PlaceCandidate placeCandidate, Place place) {
            InferredCountry inferredCountry;
            if (XponentGeocoder.isUsableFeature(place.getFeatureClass())) {
                placeCandidate.incrementPlaceScore(place, Double.valueOf(5.0d), "SocGeo:Feature");
                this.log.debug("Place {} ", place);
                if (this.validTZ) {
                    if (Math.abs(place.getLongitude() - GeonamesUtility.approximateLongitudeForUTCOffset(this.currentTweet.utcOffsetHours)) < 5.0d) {
                        placeCandidate.incrementPlaceScore(place, Double.valueOf(3.0d), "SocGeo:TZ+Lon");
                    }
                }
                if (this.currentGeo != null) {
                    if (this.currentGeo.getHierarchicalPath() != null && this.currentGeo.getHierarchicalPath().equals(place.getHierarchicalPath())) {
                        placeCandidate.incrementPlaceScore(place, Double.valueOf(15.0d), "SocGeo:Country+Admin1");
                        this.log.debug("\tadd HASC evidence");
                    }
                } else if (this.cc != null && this.cc.equals(place.getCountryCode())) {
                    placeCandidate.incrementPlaceScore(place, Double.valueOf(5.0d), "SocGeo:Country");
                    this.log.debug("\tadd CC");
                }
                if (this.inferredCountries == null || (inferredCountry = this.inferredCountries.get(place.getCountryCode())) == null || !inferredCountry.validMatch) {
                    return;
                }
                placeCandidate.incrementPlaceScore(place, Double.valueOf(inferredCountry.score), "SocGeo:TZ+Lang");
                this.log.debug("\tadd TZ+Lang evidence");
            }
        }
    }

    public XponentGeocoder() {
        this.inferencerID = "XpMeta";
        this.inferencerDescription = "Geoparsing, Geocoding and ProvinceID of user and messages";
        this.infersAuthors = true;
        this.infersStatus = true;
        this.infersPlaces = false;
    }

    @Override // org.opensextant.extractors.geo.social.GeoInferencer
    public Collection<GeoInference> geoinferencePlaceMentions(Tweet tweet) throws MessageParseException, ExtractionException {
        return null;
    }

    @Override // org.opensextant.extractors.geo.social.GeoInferencer
    public String report() {
        return String.format("%s Total Records:%d, Has Coords:%2.0f%%, Has TZ:%2.0f%%, Has Place: %2.0f%%", this.inferencerID, Long.valueOf(this.totalRecords), Double.valueOf(pct(this.totalRecords, this.recordsWithCoord)), Double.valueOf(pct(this.totalRecords, this.recordsWithTZ)), Double.valueOf(pct(this.totalRecords, this.recordsWithPlace)));
    }

    @Override // org.opensextant.extractors.geo.social.SocialGeo
    public void configure() throws ConfigException {
        this.tagger = new PlaceGeocoder(true);
        this.tagger.enablePersonNameMatching(true);
        Parameters parameters = new Parameters();
        parameters.tag_coordinates = false;
        parameters.resolve_localities = true;
        this.tagger.setParameters(parameters);
        this.tagger.setMatchFilter(this.profilePlaceFilter);
        this.tagger.configure();
        this.profileRule = new UserProfileLocationRule();
        this.tagger.addRule(this.profileRule);
        this.userlocX = new XCoord();
        this.userlocX.configure(getClass().getResource("/twitter/tweet-xcoord.cfg"));
        this.userlocX.disableAll();
        this.userlocX.match_DD(true);
        XCoord.RUNTIME_FLAGS ^= 2;
        XCoord.RUNTIME_FLAGS ^= 16;
        try {
            this.profilePlaceFilter = new MatchFilter("/twitter/exclude-tweet-profile-placenames.txt");
            this.countries = new GeonamesUtility();
            this.countries.loadCountryLanguages();
            this.countries.loadWorldAdmin1Metadata();
            this.gazetteer = this.tagger.getGazetteer();
            populateAllCountries(this.gazetteer);
        } catch (IOException e) {
            throw new ConfigException("IO Problems, possibly missing resource files.", e);
        }
    }

    @Override // org.opensextant.extractors.geo.social.SocialGeo
    public void close() {
        this.tagger.close();
    }

    @Override // org.opensextant.extractors.geo.social.GeoInferencer
    public GeoInference geoinferenceTweetAuthor(Tweet tweet) throws MessageParseException, ExtractionException {
        return processLocation(tweet, (Place) tweet.authorGeo, tweet.id, "ugeo");
    }

    @Override // org.opensextant.extractors.geo.social.GeoInferencer
    public GeoInference geoinferenceTweetStatus(Tweet tweet) throws MessageParseException, ExtractionException {
        return processLocation(tweet, (Place) tweet.statusGeo, tweet.id, "geo");
    }

    public void parseFreeTextCoordinates(Place place) {
        List<GeocoordMatch> extract;
        if (place.getPlaceName() == null || (extract = this.userlocX.extract(place.getPlaceName())) == null || extract.size() <= 0) {
            return;
        }
        if (extract.size() > 1) {
            this.log.error("Incorrect assumption: Found multiple coordinates; Using first.");
        }
        for (GeocoordMatch geocoordMatch : extract) {
            if (!((TextMatch) geocoordMatch).is_submatch) {
                GeocoordMatch geocoordMatch2 = geocoordMatch;
                place.setMethod(geocoordMatch2.getMethod());
                place.setLatLon(geocoordMatch2);
                flattenPrecision(place, geocoordMatch2.precision);
                return;
            }
        }
    }

    public boolean provinceID(Place place) {
        try {
            Place inferPlaceRecursively = inferPlaceRecursively(this.gazetteer, place, true);
            if (inferPlaceRecursively == null) {
                this.log.debug("Location not found {}", place);
                return false;
            }
            place.setAdmin1(inferPlaceRecursively.getAdmin1());
            place.setMethod("proximity-provinceID");
            if (place.getCountryCode() != null) {
                return true;
            }
            place.setCountryCode(inferPlaceRecursively.getCountryCode());
            this.log.debug("Back fill empty CC on coordinate. {}", place);
            return true;
        } catch (Exception e) {
            this.log.error("Geocoding Bug! " + place, e);
            return false;
        }
    }

    public int inferCountryTimezone(Tweet tweet, Place place) throws ExtractionException {
        List<TextMatch> extract;
        PlaceCandidate placeCandidate;
        Place chosenPlace;
        if (tweet.timezone != null && (extract = this.tagger.extract(tweet.timezone)) != null) {
            for (TextMatch textMatch : extract) {
                if (!textMatch.isFilteredOut() && (textMatch instanceof PlaceCandidate) && (chosenPlace = (placeCandidate = (PlaceCandidate) textMatch).getChosenPlace()) != null) {
                    int confidence = chosenPlace.isCountry() ? 75 : placeCandidate.getConfidence();
                    if (chosenPlace.isCountry()) {
                        place.setMethod("geotag/tz/country");
                    } else {
                        place.setMethod("geotag/tz");
                    }
                    geocode(place, chosenPlace);
                    return confidence;
                }
            }
        }
        if (this.profileRule.inferredCountries == null) {
            return -1;
        }
        for (InferredCountry inferredCountry : this.profileRule.inferredCountries.values()) {
            if (inferredCountry.validMatch) {
                place.setMethod("tz/lang");
                place.setCountry(inferredCountry.country);
                return 25 + inferredCountry.score;
            }
        }
        return -1;
    }

    public Map<String, InferredCountry> getInferredCountry(Tweet tweet) {
        Collection countriesInDSTOffset = Message.validateUTCOffset(tweet.utcOffset) ? tweet.isDST ? this.countries.countriesInDSTOffset(tweet.utcOffset) : this.countries.countriesInUTCOffset(tweet.utcOffset) : tweet.timezone != null ? this.countries.countriesInTimezone(tweet.timezone) : tweet.lang.equals(tweet.userLang) ? this.countries.countriesSpeaking(tweet.lang) : this.countries.countriesSpeaking(tweet.userLang);
        if (countriesInDSTOffset == null) {
            return null;
        }
        HashMap hashMap = new HashMap();
        int i = 0;
        Iterator it = countriesInDSTOffset.iterator();
        while (it.hasNext()) {
            Country country = this.countries.getCountry((String) it.next());
            int scoreCountryPrediction = scoreCountryPrediction(country, tweet);
            if (scoreCountryPrediction > 0) {
                InferredCountry inferredCountry = new InferredCountry();
                inferredCountry.id = country.getCountryCode();
                inferredCountry.score = scoreCountryPrediction;
                inferredCountry.country = country;
                hashMap.put(inferredCountry.id, inferredCountry);
                if (scoreCountryPrediction > i) {
                    i = scoreCountryPrediction;
                }
            }
        }
        if (hashMap.isEmpty()) {
            return null;
        }
        Iterator it2 = hashMap.keySet().iterator();
        while (it2.hasNext()) {
            InferredCountry inferredCountry2 = (InferredCountry) hashMap.get((String) it2.next());
            inferredCountry2.validMatch = inferredCountry2.score == i;
        }
        return hashMap;
    }

    public int inferProvinceByHierarchy(Tweet tweet, Place place) throws ExtractionException {
        int inferPlaceByName;
        Language detectSocialMediaLang;
        String countryCode = place.getCountryCode();
        String admin1 = place.getAdmin1();
        String adminName = place.getAdminName();
        if (adminName == null) {
            adminName = place.getAdmin1Name();
        }
        String placeName = place.getPlaceName();
        if (admin1 == null && adminName == null && placeName == null) {
            return 0;
        }
        if (placeName != null) {
            placeName = filterOutName(placeName.toLowerCase());
        }
        if (placeName == null) {
            return 0;
        }
        String placeName2 = place.getPlaceName();
        Place place2 = null;
        boolean hasDigits = TextUtils.hasDigits(placeName2);
        boolean z = false;
        boolean z2 = false;
        if (!TextUtils.isASCII(placeName2)) {
            z = TextUtils.hasCJKText(placeName2);
            if (!z && (detectSocialMediaLang = this.langidTool.detectSocialMediaLang((String) null, placeName2, true)) != null) {
                detectSocialMediaLang.getCode();
                z2 = "ar".equalsIgnoreCase(detectSocialMediaLang.getCode());
            }
        }
        if (adminName != null && admin1 == null) {
            try {
                String inferProvinceByName = inferProvinceByName(adminName, countryCode);
                if (inferProvinceByName != null) {
                    place.setAdmin1(inferProvinceByName);
                }
            } catch (Exception e) {
                this.log.error("Failed to find province ID given metadata", e);
            }
        }
        this.profileRule.resetBefore(tweet, place);
        String replace = TextUtils.fast_replace(placeName2, ".", ". ").replace('\\', ' ');
        TextInput textInput = new TextInput(tweet.id, replace);
        String str = "name";
        if (z) {
            textInput.langid = "zh";
            str = "name_cjk";
        } else if (z2) {
            textInput.langid = "ar";
            str = "name_ar";
        }
        if (!hasDigits && (inferPlaceByName = inferPlaceByName(replace, place, str)) > 0) {
            return inferPlaceByName;
        }
        int i = -1;
        try {
            List<TextMatch> extract = this.tagger.extract(textInput);
            if (extract != null) {
                for (TextMatch textMatch : extract) {
                    if (!textMatch.isFilteredOut() && (textMatch instanceof PlaceCandidate)) {
                        PlaceCandidate placeCandidate = (PlaceCandidate) textMatch;
                        if (place2 != null) {
                            this.log.debug("Ignored 2nd place in free text:{}@{}", textMatch.getText(), placeCandidate.getChosen());
                        } else {
                            place2 = placeCandidate.getChosenPlace();
                            if (place2 != null) {
                                i = place2.isCountry() ? 75 : placeCandidate.getConfidence();
                                place.setMethod(place2.isCountry() ? "geotag/freetext/country" : "geotag/freetext");
                            }
                        }
                    }
                }
            }
        } catch (Exception e2) {
            this.log.error(String.format("Unable to parse %s / name=%s", place, placeName2), e2);
        }
        if (place2 == null) {
            return -1;
        }
        this.log.debug("Search for {};  inferred {} with rules {}", new Object[]{placeName2, place2, place.getMethod()});
        geocode(place, place2);
        if (this.profileRule.inferredCountries != null) {
            InferredCountry inferredCountry = this.profileRule.inferredCountries.get(place2.getCountryCode());
            boolean z3 = inferredCountry == null;
            if (!(inferredCountry != null && inferredCountry.validMatch)) {
                if (!"US".equalsIgnoreCase(place2.getCountryCode())) {
                    this.log.debug("\t Stop here, tweet={}", tweet);
                }
                this.log.debug("\tSurprise! R={} (lang={}, tz={}) Chosen country does not match inferred {}", new Object[]{tweet.id, tweet.lang, tweet.timezone, place2.getCountryCode()});
            } else if (z3) {
                this.log.info("\tOdd! Chosen R={}, geo={} was not inferred at all.", tweet.id, place2);
            }
        }
        return i;
    }

    private int inferPlaceByName(String str, Place place, String str2) {
        int i = -1;
        try {
            String fast_replace = TextUtils.fast_replace(str, "\\\"", ";");
            List<Place> searchAdvanced = this.tagger.searchAdvanced(place.getCountryCode() != null ? String.format("%s:\"%s\" AND feat_class:(P A L) AND cc:%s", str2, fast_replace, place.getCountryCode()) : String.format("%s:\"%s\" AND feat_class:(P A L)", str2, fast_replace), true, fast_replace.length() + 3);
            if (!searchAdvanced.isEmpty()) {
                PlaceCandidate placeCandidate = new PlaceCandidate(0, str.length() - 1);
                placeCandidate.setText(fast_replace);
                placeCandidate.inferTextSense(false, false);
                for (Place place2 : searchAdvanced) {
                    ScoredPlace scoredPlace = new ScoredPlace(place2.getPlaceID(), place2.getName());
                    scoredPlace.setPlace(place2);
                    placeCandidate.addPlace(scoredPlace);
                    this.profileRule.evaluate(placeCandidate, place2);
                }
                placeCandidate.choose();
                ScoredPlace chosen = placeCandidate.getChosen();
                if (chosen != null) {
                    geocode(place, chosen.getPlace());
                    place.setMethod("geotag/lookup");
                    int size = (place.getCountryCode() != null ? 75 : 50) - ((int) (0.1d * searchAdvanced.size()));
                    if (this.profileRule.inferredCountries != null) {
                        size -= (int) (0.1d * this.profileRule.inferredCountries.size());
                    }
                    i = (int) (size + chosen.getScore());
                    return i;
                }
            }
        } catch (Exception e) {
            this.log.debug("Query Parsing Error", e);
        }
        return i;
    }

    private void geocode(Place place, Place place2) {
        place.setCountryCode(place2.getCountryCode());
        place.setCountry(place2.getCountry());
        place.setAdmin1(place2.getAdmin1());
        place.setFeatureClass(place2.getFeatureClass());
        place.setFeatureCode(place2.getFeatureCode());
        place.setPlaceID(place2.getPlaceID());
        place.setPrecision(GeodeticUtility.getFeaturePrecision(place.getFeatureClass(), place.getFeatureCode()));
        if (!GeodeticUtility.isCoord(place2) || place2.isCountry()) {
            return;
        }
        place.setLatLon(place2);
    }

    private String filterOutName(String str) {
        if (str.contains("http")) {
            return null;
        }
        if (this.profilePlaceFilter.filterOut(str)) {
            this.log.debug("Excluded place {}", str);
            return null;
        }
        if (str.contains(":") || TextUtils.countDigits(str) > 5) {
            return null;
        }
        String trim = TextUtils.squeeze_whitespace(TextUtils.fast_replace(str, "/\\#;", " ")).trim();
        if (trim.length() < 2) {
            return null;
        }
        return trim;
    }

    private String inferProvinceByName(String str, String str2) throws SolrServerException, IOException {
        StringBuilder sb = new StringBuilder();
        sb.append(" +feat_class:A AND +feat_code:ADM1");
        if (TextUtils.hasCJKText(str)) {
            sb.append(String.format(" AND +name_cjk:\"%s\"", str));
        } else {
            sb.append(String.format(" AND +name:\"%s\"", str));
        }
        if (str2 != null) {
            sb.append(String.format(" AND +cc:%s", str2));
        }
        for (Place place : this.gazetteer.search(sb.toString(), true)) {
            this.log.debug("Matched PROV {} =? {}", str, place);
            if (place.getName().equalsIgnoreCase(str)) {
                return place.getAdmin1();
            }
        }
        return null;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static boolean isUsableFeature(String str) {
        return usableFeatures.contains(str);
    }

    public int inferCountryName(Geocoding geocoding) {
        if (!isValue(geocoding.getPlaceName())) {
            return -1;
        }
        String lowerCase = geocoding.getPlaceName().trim().toLowerCase();
        if (disallowedCountryNames.contains(lowerCase)) {
            return 0;
        }
        Country countryNamed = getCountryNamed(lowerCase);
        if (countryNamed != null) {
            geocoding.setMethod("name/country");
            geocoding.setCountry(countryNamed);
            return 75;
        }
        Country countryNamed2 = getCountryNamed(removePunct(lowerCase));
        if (countryNamed2 == null) {
            return 0;
        }
        geocoding.setMethod("name/country");
        geocoding.setCountry(countryNamed2);
        return 75;
    }

    public static final String removePunct(String str) {
        return remove_punct.matcher(str).replaceAll("");
    }

    public GeoInference processLocation(Tweet tweet, Place place, String str, String str2) throws ExtractionException {
        if (place == null) {
            return null;
        }
        place.setMethod("given");
        boolean z = false;
        int inferCountryName = inferCountryName(place);
        int i = inferCountryName;
        if (inferCountryName > 0) {
            GeoInference geoInference = new GeoInference();
            geoInference.recordId = str;
            geoInference.contributor = this.inferencerID;
            if (!GeodeticUtility.isCoord(place)) {
                geoInference.confidence = i;
                geoInference.inferenceName = "country";
                geoInference.geocode = place;
                this.log.debug("Lookup Chooser: Chose country {} ", place);
                return geoInference;
            }
            this.recordsWithCoord++;
            geoInference.confidence += 10;
            if (provinceID(place)) {
                geoInference.confidence += 10;
                setProvinceName(place);
            }
            geoInference.inferenceName = str2;
            geoInference.geocode = place;
            return geoInference;
        }
        parseFreeTextCoordinates(place);
        if (GeodeticUtility.isCoord(place)) {
            this.recordsWithCoord++;
            if (provinceID(place)) {
                i = 90;
                z = true;
            }
        } else {
            i = inferProvinceByHierarchy(tweet, place);
            z = i > 0;
            if (z) {
                this.recordsWithPlace++;
            }
        }
        if (Message.validTZ(tweet)) {
            this.recordsWithTZ++;
            if (!z) {
                i = inferCountryTimezone(tweet, place);
                z = i > 0;
            }
        }
        if (place.getMethod().endsWith("country")) {
            str2 = "country";
        }
        if (!z) {
            this.log.debug("\tUnresolved geo for: R={}, geo={}, tz={}", new Object[]{tweet.id, place, tweet.timezone});
            return null;
        }
        setProvinceName(place);
        GeoInference geoInference2 = new GeoInference();
        geoInference2.contributor = this.inferencerID;
        geoInference2.recordId = str;
        geoInference2.inferenceName = str2;
        geoInference2.confidence = i;
        geoInference2.geocode = place;
        return geoInference2;
    }

    @Override // org.opensextant.extractors.geo.social.GeoInferencer
    public Collection<TextMatch> getAdditionalMatches() {
        return null;
    }

    static {
        usableFeatures.add("A");
        usableFeatures.add("P");
        usableFeatures.add("S");
        usableFeatures.add("L");
        allowedCountryCodeNames = new HashSet();
        disallowedCountryNames = new HashSet();
        allowedCountryCodeNames.add("us");
        allowedCountryCodeNames.add("usa");
        allowedCountryCodeNames.add("uae");
        allowedCountryCodeNames.add("uk");
        allowedCountryCodeNames.add("gb");
        allowedCountryCodeNames.add("gbr");
        disallowedCountryNames.add("georgia");
        disallowedCountryNames.add("jersey");
        remove_punct = Pattern.compile("\\.");
    }
}
