package org.opensextant.extractors.geo;

import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.solr.client.solrj.SolrServerException;
import org.opensextant.ConfigException;
import org.opensextant.data.Country;
import org.opensextant.data.Geocoding;
import org.opensextant.data.Place;
import org.opensextant.data.Taxon;
import org.opensextant.data.TextInput;
import org.opensextant.extraction.ExtractionException;
import org.opensextant.extraction.ExtractionMetrics;
import org.opensextant.extraction.Extractor;
import org.opensextant.extraction.TextMatch;
import org.opensextant.extractors.geo.rules.ContextualOrganizationRule;
import org.opensextant.extractors.geo.rules.CoordinateAssociationRule;
import org.opensextant.extractors.geo.rules.CountryRule;
import org.opensextant.extractors.geo.rules.FeatureRule;
import org.opensextant.extractors.geo.rules.GeocodeRule;
import org.opensextant.extractors.geo.rules.LocationChooserRule;
import org.opensextant.extractors.geo.rules.MajorPlaceRule;
import org.opensextant.extractors.geo.rules.NameCodeRule;
import org.opensextant.extractors.geo.rules.NameRule;
import org.opensextant.extractors.geo.rules.NonsenseFilter;
import org.opensextant.extractors.geo.rules.PersonNameFilter;
import org.opensextant.extractors.geo.rules.ProvinceAssociationRule;
import org.opensextant.extractors.geo.rules.ProvinceNameSetter;
import org.opensextant.extractors.xcoord.GeocoordMatch;
import org.opensextant.extractors.xcoord.XCoord;
import org.opensextant.extractors.xtax.TaxonMatch;
import org.opensextant.extractors.xtax.TaxonMatcher;
import org.opensextant.processing.Parameters;
import org.opensextant.util.GeonamesUtility;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/opensextant/extractors/geo/PlaceGeocoder.class */
public class PlaceGeocoder extends GazetteerMatcher implements Extractor, CountryObserver, BoundaryObserver, LocationObserver {
    private XCoord xcoord;
    private PersonNameFilter personNameRule;
    private TaxonMatcher personMatcher;
    private GeonamesUtility nameHelper;
    private final ExtractionMetrics taggingTimes;
    private final ExtractionMetrics matcherTotalTimes;
    private CountryRule countryRule;
    private CoordinateAssociationRule coordRule;
    private ProvinceAssociationRule adm1Rule;
    private NameCodeRule nameWithAdminRule;
    private MajorPlaceRule majorPlaceRule;
    private LocationChooserRule chooser;
    private ContextualOrganizationRule placeInOrgRule;
    private NonsenseFilter nonsenseFilter;
    private ProvinceNameSetter provinceNameSetter;
    private List<GeocodeRule> rules;
    private Parameters taggingParams;
    private Map<String, CountryCount> relevantCountries;
    private Map<String, PlaceCount> relevantProvinces;
    private Map<String, Place> relevantLocations;
    private Map<String, String> nationalities;
    private boolean geocode;
    private boolean tagOnly;
    Map<String, Integer> locationBias;
    public static final int COORDINATE_PROXIMITY_CITY_THRESHOLD = 25;
    public static final int COORDINATE_PROXIMITY_ADM1_THRESHOLD = 50;
    public static final String VERSION = "3.3";
    public static final String METHOD_DEFAULT = String.format("PlaceGeocoder v%s", VERSION);
    private static Map<String, Country> countryCatalog = null;

    public PlaceGeocoder() throws ConfigException {
        this.xcoord = null;
        this.personNameRule = null;
        this.personMatcher = null;
        this.nameHelper = null;
        this.taggingTimes = new ExtractionMetrics("tagging");
        this.matcherTotalTimes = new ExtractionMetrics("matcher-total");
        this.countryRule = null;
        this.coordRule = null;
        this.adm1Rule = null;
        this.nameWithAdminRule = null;
        this.majorPlaceRule = null;
        this.chooser = null;
        this.placeInOrgRule = null;
        this.nonsenseFilter = null;
        this.provinceNameSetter = null;
        this.rules = new ArrayList();
        this.taggingParams = new Parameters();
        this.relevantCountries = new HashMap();
        this.relevantProvinces = new HashMap();
        this.relevantLocations = new HashMap();
        this.nationalities = new HashMap();
        this.geocode = true;
        this.tagOnly = !this.geocode;
        this.locationBias = new HashMap();
        this.log = LoggerFactory.getLogger(getClass());
    }

    public PlaceGeocoder(boolean z) throws ConfigException {
        super(z);
        this.xcoord = null;
        this.personNameRule = null;
        this.personMatcher = null;
        this.nameHelper = null;
        this.taggingTimes = new ExtractionMetrics("tagging");
        this.matcherTotalTimes = new ExtractionMetrics("matcher-total");
        this.countryRule = null;
        this.coordRule = null;
        this.adm1Rule = null;
        this.nameWithAdminRule = null;
        this.majorPlaceRule = null;
        this.chooser = null;
        this.placeInOrgRule = null;
        this.nonsenseFilter = null;
        this.provinceNameSetter = null;
        this.rules = new ArrayList();
        this.taggingParams = new Parameters();
        this.relevantCountries = new HashMap();
        this.relevantProvinces = new HashMap();
        this.relevantLocations = new HashMap();
        this.nationalities = new HashMap();
        this.geocode = true;
        this.tagOnly = !this.geocode;
        this.locationBias = new HashMap();
        this.log = LoggerFactory.getLogger(getClass());
    }

    public String getName() {
        return "Advanced PlaceGeocoder";
    }

    public void configure(String str) throws ConfigException {
        throw new ConfigException("Configure by path Not available");
    }

    public void configure(URL url) throws ConfigException {
        throw new ConfigException("Configure by URL Not available");
    }

    public void reportMemory() {
        Runtime runtime = Runtime.getRuntime();
        this.log.info("CURRENT MEM USAGE(K)=" + ((int) ((runtime.totalMemory() - runtime.freeMemory()) / 1024)));
    }

    public void reportMetrics() {
        this.log.info("=======================\nTAGGING METRICS");
        this.log.info(this.taggingTimes.toString());
        this.log.info(this.matcherTotalTimes.toString());
    }

    public void configure() throws ConfigException {
        this.countryRule = new CountryRule();
        this.countryRule.setCountryObserver(this);
        this.nameWithAdminRule = new NameCodeRule();
        this.nameWithAdminRule.setBoundaryObserver(this);
        this.nonsenseFilter = new NonsenseFilter();
        this.rules.add(this.nonsenseFilter);
        this.personNameRule = new PersonNameFilter("/filters/person-name-filter.txt", "/filters/person-title-filter.txt", "/filters/person-suffix-filter.txt");
        this.coordRule = new CoordinateAssociationRule();
        this.coordRule.setCountryObserver(this);
        this.coordRule.setLocationObserver(this);
        if (this.xcoord == null && isCoordExtractionEnabled()) {
            this.xcoord = new XCoord();
            this.xcoord.configure();
            this.adm1Rule = new ProvinceAssociationRule();
            this.adm1Rule.setCountryObserver(this);
            this.rules.add(this.coordRule);
            this.rules.add(this.adm1Rule);
        }
        try {
            this.majorPlaceRule = new MajorPlaceRule(GeonamesUtility.mapPopulationByLocation(GeonamesUtility.loadMajorCities("/geonames.org/cities15000.txt")));
        } catch (IOException e) {
            this.log.error("Xponents 2.8: cities population data is used for geocoding. Will continue without it.");
            this.majorPlaceRule = new MajorPlaceRule(null);
        }
        this.majorPlaceRule.setCountryObserver(this);
        this.majorPlaceRule.setBoundaryObserver(this);
        this.rules.add(this.majorPlaceRule);
        this.rules.add(this.personNameRule);
        if (isPersonNameMatchingEnabled()) {
            try {
                this.personMatcher = new TaxonMatcher();
                this.personMatcher.excludeTaxons("place.");
                this.personMatcher.addCatalogFilter("JRC");
                this.personMatcher.addCatalogFilter("nationality");
                this.personMatcher.addCatalogFilter("person_names");
            } catch (IOException e2) {
                throw new ConfigException("XTax resource not available.");
            }
        }
        this.placeInOrgRule = new ContextualOrganizationRule();
        this.placeInOrgRule.setBoundaryObserver(this);
        this.rules.add(this.placeInOrgRule);
        this.rules.add(new NameRule());
        this.rules.add(new FeatureRule());
        this.chooser = new LocationChooserRule();
        this.chooser.setCountryObserver(this);
        this.chooser.setBoundaryObserver(this);
        this.chooser.setLocationObserver(this);
        countryCatalog = getGazetteer().getCountries();
        if (this.taggingParams.resolve_localities) {
            try {
                this.nameHelper = new GeonamesUtility();
                this.nameHelper.loadWorldAdmin1Metadata();
                this.provinceNameSetter = new ProvinceNameSetter(this.nameHelper);
            } catch (Exception e3) {
                throw new ConfigException("Failed to load names of ADM1 boundaries", e3);
            }
        }
    }

    public void addRule(GeocodeRule geocodeRule) {
        this.rules.add(geocodeRule);
    }

    public void setRules(List<GeocodeRule> list) {
        this.rules.clear();
        this.rules.addAll(list);
    }

    public void cleanup() {
        reportMetrics();
        close();
    }

    @Override // org.opensextant.extraction.SolrMatcherSupport, java.io.Closeable, java.lang.AutoCloseable
    public void close() {
        super.close();
        if (this.personMatcher != null) {
            this.personMatcher.close();
        }
    }

    public void setParameters(Parameters parameters) {
        this.taggingParams = parameters;
        this.taggingParams.isdefault = false;
    }

    public boolean isCoordExtractionEnabled() {
        return this.taggingParams.tag_coordinates;
    }

    public boolean isPersonNameMatchingEnabled() {
        return this.taggingParams.tag_names;
    }

    public void enablePersonNameMatching(boolean z) {
        this.taggingParams.tag_names = z;
    }

    private void reset() {
        this.relevantCountries.clear();
        this.relevantProvinces.clear();
        this.relevantLocations.clear();
        this.nationalities.clear();
        this.personNameRule.reset();
        this.countryRule.reset();
        this.majorPlaceRule.reset();
        this.chooser.reset();
        Iterator<GeocodeRule> it = this.rules.iterator();
        while (it.hasNext()) {
            it.next().reset();
        }
    }

    public List<TextMatch> extract(TextInput textInput) throws ExtractionException {
        long currentTimeMillis = System.currentTimeMillis();
        reset();
        ArrayList arrayList = new ArrayList();
        List<PlaceCandidate> tagText = tagText(textInput, this.tagOnly);
        List<TextMatch> parseGeoCoordinates = parseGeoCoordinates(textInput);
        if (parseGeoCoordinates != null) {
            arrayList.addAll(parseGeoCoordinates);
        }
        if (tagText == null) {
            return arrayList;
        }
        this.countryRule.evaluate(tagText);
        this.nameWithAdminRule.evaluate(tagText);
        parseKnownNonPlaces(textInput, tagText, arrayList);
        this.taggingTimes.addTimeSince(currentTimeMillis);
        Iterator<GeocodeRule> it = this.rules.iterator();
        while (it.hasNext()) {
            it.next().evaluate(tagText);
        }
        this.chooser.setTextCase(textInput.isLower ? 1 : 0);
        this.chooser.evaluate(tagText);
        if (this.provinceNameSetter != null) {
            this.provinceNameSetter.evaluate(tagText);
        }
        arrayList.addAll(tagText);
        this.matcherTotalTimes.addBytes(textInput.buffer.length());
        this.matcherTotalTimes.addTimeSince(currentTimeMillis);
        return arrayList;
    }

    private void parseKnownNonPlaces(TextInput textInput, List<PlaceCandidate> list, List<TextMatch> list2) {
        if (isPersonNameMatchingEnabled()) {
            try {
                List<TextMatch> extract = this.personMatcher.extract(textInput.buffer);
                if (extract != null) {
                    if (extract.isEmpty()) {
                        return;
                    }
                    ArrayList arrayList = new ArrayList();
                    ArrayList arrayList2 = new ArrayList();
                    this.log.debug("Matched {}", Integer.valueOf(extract.size()));
                    for (TextMatch textMatch : extract) {
                        if (textMatch instanceof TaxonMatch) {
                            if (!textMatch.isLower() || textInput.isLower) {
                                TaxonMatch taxonMatch = (TaxonMatch) textMatch;
                                Iterator<Taxon> it = taxonMatch.getTaxons().iterator();
                                while (true) {
                                    if (it.hasNext()) {
                                        Taxon next = it.next();
                                        String lowerCase = next.name.toLowerCase();
                                        if (lowerCase.startsWith("person.")) {
                                            arrayList.add(taxonMatch);
                                            break;
                                        }
                                        if (lowerCase.startsWith("org.")) {
                                            if (!next.isAcronym || textMatch.isUpper()) {
                                                break;
                                            }
                                        } else if (lowerCase.startsWith("nationality.")) {
                                            arrayList.add(taxonMatch);
                                            if (next.hasTags()) {
                                                for (String str : next.tagset) {
                                                    int indexOf = str.indexOf("cc+");
                                                    if (indexOf >= 0) {
                                                        String substring = str.substring(indexOf + 3);
                                                        countryInScope(substring);
                                                        this.nationalities.put(taxonMatch.getText(), substring);
                                                    }
                                                }
                                            } else {
                                                this.log.debug("Taxon has not tags {}", next);
                                            }
                                        } else if (lowerCase.startsWith("person_name.")) {
                                            boolean z = textMatch.isLower() || textMatch.getLength() < 4;
                                            if (!this.filter.filterOut(textInput.langid, taxonMatch.getText().toLowerCase()) || !z) {
                                                arrayList.add(taxonMatch);
                                            }
                                        }
                                    }
                                }
                                arrayList2.add(taxonMatch);
                            } else {
                                textMatch.setFilteredOut(true);
                            }
                        }
                    }
                    this.personNameRule.evaluateNamedEntities(textInput, list, arrayList, arrayList2);
                    list2.addAll(arrayList);
                    list2.addAll(arrayList2);
                }
            } catch (Exception e) {
                this.log.error(e.getMessage());
            }
        }
    }

    private List<TextMatch> parseGeoCoordinates(TextInput textInput) {
        if (!isCoordExtractionEnabled()) {
            return null;
        }
        List<TextMatch> extract = this.xcoord.extract(textInput);
        if (extract.isEmpty()) {
            return null;
        }
        this.coordRule.addCoordinates(extract);
        this.adm1Rule.setProvinces(this.relevantProvinces.values());
        return extract;
    }

    @Override // org.opensextant.extractors.geo.CountryObserver
    public void countryInScope(Country country) {
        if (country == null) {
            return;
        }
        CountryCount countryCount = this.relevantCountries.get(country.getCountryCode());
        if (countryCount != null) {
            countryCount.count++;
            return;
        }
        CountryCount countryCount2 = new CountryCount();
        countryCount2.country = country;
        this.relevantCountries.put(country.getCountryCode(), countryCount2);
    }

    @Override // org.opensextant.extractors.geo.CountryObserver
    public int countryCount() {
        return this.relevantCountries.size();
    }

    @Override // org.opensextant.extractors.geo.CountryObserver
    public Map<String, CountryCount> countryMentionCount() {
        int i = 0;
        Iterator<CountryCount> it = this.relevantCountries.values().iterator();
        while (it.hasNext()) {
            i += it.next().count;
        }
        Iterator<CountryCount> it2 = this.relevantCountries.values().iterator();
        while (it2.hasNext()) {
            it2.next().total = i;
        }
        return this.relevantCountries;
    }

    @Override // org.opensextant.extractors.geo.BoundaryObserver
    public Map<String, PlaceCount> placeMentionCount() {
        int i = 0;
        Iterator<PlaceCount> it = this.relevantProvinces.values().iterator();
        while (it.hasNext()) {
            i += it.next().count;
        }
        Iterator<PlaceCount> it2 = this.relevantProvinces.values().iterator();
        while (it2.hasNext()) {
            it2.next().total = i;
        }
        return this.relevantProvinces;
    }

    @Override // org.opensextant.extractors.geo.CountryObserver
    public void countryInScope(String str) {
        Country country = countryCatalog.get(str);
        if (country == null) {
            this.log.debug("Unknown country code {}", str);
            return;
        }
        CountryCount countryCount = this.relevantCountries.get(country.getCountryCode());
        if (countryCount != null) {
            countryCount.count++;
            return;
        }
        CountryCount countryCount2 = new CountryCount();
        countryCount2.country = country;
        this.relevantCountries.put(country.getCountryCode(), countryCount2);
    }

    @Override // org.opensextant.extractors.geo.CountryObserver
    public boolean countryObserved(String str) {
        if (str == null) {
            return false;
        }
        return this.relevantCountries.containsKey(str);
    }

    @Override // org.opensextant.extractors.geo.CountryObserver
    public boolean countryObserved(Country country) {
        if (country == null) {
            return false;
        }
        return this.relevantCountries.containsKey(country.getCountryCode());
    }

    @Override // org.opensextant.extractors.geo.LocationObserver
    public void locationInScope(Geocoding geocoding) {
        try {
            Place evaluateCoordinate = evaluateCoordinate(geocoding);
            if (evaluateCoordinate == null) {
                return;
            }
            evaluateCoordinate.defaultHierarchicalPath();
            this.relevantLocations.put(evaluateCoordinate.getPlaceID(), evaluateCoordinate);
            boundaryLevel1InScope(evaluateCoordinate);
            countryInScope(evaluateCoordinate.getCountryCode());
        } catch (Exception e) {
            this.log.error("Spatial search error", e);
        }
    }

    @Override // org.opensextant.extractors.geo.BoundaryObserver
    public void boundaryLevel1InScope(Place place) {
        if (place.getHierarchicalPath() == null) {
            return;
        }
        PlaceCount placeCount = this.relevantProvinces.get(place.getHierarchicalPath());
        if (placeCount != null) {
            placeCount.count++;
            return;
        }
        PlaceCount placeCount2 = new PlaceCount();
        placeCount2.place = place;
        this.relevantProvinces.put(place.getHierarchicalPath(), placeCount2);
    }

    @Override // org.opensextant.extractors.geo.BoundaryObserver
    public void boundaryLevel2InScope(Place place) {
    }

    public List<TextMatch> extract(String str) throws ExtractionException {
        return extract(new TextInput((String) null, str));
    }

    private Place getProvinceFor(Place place) throws IOException {
        if (this.nameHelper == null) {
            throw new IOException("GeonamesUtility was not initialized");
        }
        return this.nameHelper.getProvince(place.getCountryCode(), place.getAdmin1());
    }

    public Place evaluateCoordinate(Geocoding geocoding) throws SolrServerException, IOException {
        List<Place> placesAt = getGazetteer().placesAt(geocoding, 25);
        if (!(geocoding instanceof GeocoordMatch)) {
            return null;
        }
        GeocoordMatch geocoordMatch = (GeocoordMatch) geocoding;
        int i = 0;
        Place place = null;
        for (Place place2 : placesAt) {
            i++;
            if (i <= 5) {
                geocoordMatch.addNearByPlace(place2);
            }
            if (i == 1) {
                place = place2;
            }
            if (place2.isPopulated()) {
                Place provinceFor = getProvinceFor(place2);
                if (provinceFor != null) {
                    place2.setAdmin1Name(provinceFor.getName());
                }
                geocoordMatch.setRelatedPlace(place2);
                return place2;
            }
        }
        if (place != null && this.nameHelper != null) {
            Place provinceFor2 = getProvinceFor(place);
            if (provinceFor2 != null) {
                place.setAdmin1Name(provinceFor2.getName());
                geocoordMatch.setRelatedPlace(place);
                return provinceFor2;
            }
            geocoordMatch.setRelatedPlace(place);
        }
        Place placeAt = getGazetteer().placeAt(geocoding, 25, "P");
        if (placeAt == null) {
            placeAt = getGazetteer().placeAt(geocoding, 50, "A");
        }
        if (placeAt != null) {
            Place provinceFor3 = getProvinceFor(placeAt);
            if (provinceFor3 != null) {
                placeAt.setAdmin1Name(provinceFor3.getName());
            }
            geocoordMatch.setRelatedPlace(placeAt);
        }
        return placeAt;
    }

    @Override // org.opensextant.extractors.geo.LocationObserver
    public boolean placeObserved(Place place) {
        return this.relevantLocations.containsKey(place.getKey());
    }
}
