package com.bericotech.clavin.index;

import com.bericotech.clavin.gazetteer.BasicGeoName;
import com.bericotech.clavin.gazetteer.CountryCode;
import com.bericotech.clavin.gazetteer.FeatureClass;
import com.bericotech.clavin.gazetteer.FeatureCode;
import com.bericotech.clavin.gazetteer.GeoName;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.TimeUnit;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import shaded.org.apache.lucene.document.Document;
import shaded.org.apache.lucene.document.Field;
import shaded.org.apache.lucene.document.IntPoint;
import shaded.org.apache.lucene.document.LongPoint;
import shaded.org.apache.lucene.document.NumericDocValuesField;
import shaded.org.apache.lucene.document.StoredField;
import shaded.org.apache.lucene.document.StringField;
import shaded.org.apache.lucene.document.TextField;
import shaded.org.apache.lucene.index.IndexWriter;
import shaded.org.apache.lucene.index.IndexWriterConfig;
import shaded.org.apache.lucene.store.FSDirectory;

/* loaded from: input_file:com/bericotech/clavin/index/IndexDirectoryBuilder.class */
public class IndexDirectoryBuilder {
    private static final String HELP_OPTION = "help";
    private static final String FULL_ANCESTRY_OPTION = "with-full-ancestry";
    private static final String GAZETTEER_FILES_OPTION = "gazetteer-files";
    private static final String INDEX_PATH_OPTION = "index-path";
    private static final String REPLACE_INDEX_OPTION = "replace-index";
    private static final String ALTERNATE_NAMES_OPTION = "alt-names-file";
    private static final String DEFAULT_INDEX_DIRECTORY = "./IndexDirectory";
    private final Map<String, GeoName> adminMap = new TreeMap();
    private final Map<String, Set<GeoName>> unresolvedMap = new TreeMap();
    private final Map<Integer, AlternateName> alternateNameMap = new HashMap();
    private final boolean fullAncestry;
    private IndexWriter indexWriter;
    private int indexCount;
    private static final int ALT_NAMES_ID_FIELD = 1;
    private static final int ALT_NAMES_LANG_FIELD = 2;
    private static final int ALT_NAMES_NAME_FIELD = 3;
    private static final int ALT_NAMES_PREFERRED_FIELD = 4;
    private static final int ALT_NAMES_SHORT_FIELD = 5;
    private static final String ALT_NAMES_TRUE = "1";
    private static final String ISO2_ENGLISH = "en";
    private static final String ISO3_ENGLISH = "eng";
    private static final Logger LOG = LoggerFactory.getLogger(IndexDirectoryBuilder.class);
    private static final String[] DEFAULT_GAZETTEER_FILES = {"./allCountries.txt", "./src/main/resources/SupplementaryGazetteer.txt"};

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:com/bericotech/clavin/index/IndexDirectoryBuilder$AlternateName.class */
    public static class AlternateName implements Comparable<AlternateName> {
        private final int geonameId;
        private final String name;
        private final String lang;
        private final boolean preferredName;
        private final boolean shortName;

        public AlternateName(String str) {
            String[] split = str.split("\t");
            this.geonameId = Integer.parseInt(split[1]);
            this.lang = split[2];
            this.name = split[3];
            this.preferredName = split.length > 4 && IndexDirectoryBuilder.ALT_NAMES_TRUE.equals(split[4].trim());
            this.shortName = split.length > 5 && IndexDirectoryBuilder.ALT_NAMES_TRUE.equals(split[5].trim());
        }

        public boolean isEnglish() {
            return IndexDirectoryBuilder.ISO2_ENGLISH.equalsIgnoreCase(this.lang) || IndexDirectoryBuilder.ISO3_ENGLISH.equalsIgnoreCase(this.lang);
        }

        public boolean isPrefOrShort() {
            return this.preferredName || this.shortName;
        }

        @Override // java.lang.Comparable
        public int compareTo(AlternateName alternateName) {
            int i = this.geonameId - alternateName.geonameId;
            int compare = i == 0 ? Boolean.compare(this.preferredName, alternateName.preferredName) : i;
            int compare2 = compare == 0 ? Boolean.compare(this.shortName, alternateName.shortName) : compare;
            return compare2 == 0 ? this.name.compareTo(alternateName.name) : compare2;
        }

        public AlternateName bestName(AlternateName alternateName) {
            if (alternateName == null) {
                return this;
            }
            int compare = Boolean.compare(this.preferredName, alternateName.preferredName);
            return (compare != 0 ? Boolean.compare(this.shortName, alternateName.shortName) : compare) >= 0 ? this : alternateName;
        }
    }

    private IndexDirectoryBuilder(boolean z) {
        this.fullAncestry = z;
    }

    public void buildIndex(File file, List<File> list, File file2) throws IOException {
        LOG.info("Indexing... please wait.");
        this.indexCount = 0;
        FSDirectory open = FSDirectory.open(file.toPath());
        this.indexWriter = new IndexWriter(open, new IndexWriterConfig(new WhitespaceLowerCaseAnalyzer()));
        Date date = new Date();
        if (file2 != null) {
            loadAlternateNames(file2);
        }
        int i = 0;
        for (File file3 : list) {
            LOG.info("Processing Gazetteer: {}", file3.getAbsolutePath());
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file3), "UTF-8"));
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine != null) {
                    try {
                        i++;
                        if (i % 100000 == 0) {
                            LOG.info("rowcount: " + i);
                        }
                        resolveAncestry(BasicGeoName.parseFromGeoNamesRecord(readLine));
                    } catch (IOException e) {
                        LOG.info("Skipping... Error on line: {}", readLine);
                    } catch (RuntimeException e2) {
                        LOG.info("Skipping... Error on line: {}", readLine);
                    }
                }
            }
            bufferedReader.close();
        }
        Date date2 = new Date();
        LOG.info("Unresolved GeoNames (Pre-resolution)");
        logUnresolved();
        resolveUnresolved();
        LOG.info("Unresolved GeoNames (Post-resolution)");
        logUnresolved();
        LOG.info("Indexing unresolved GeoNames.");
        Iterator<Set<GeoName>> it = this.unresolvedMap.values().iterator();
        while (it.hasNext()) {
            Iterator<GeoName> it2 = it.next().iterator();
            while (it2.hasNext()) {
                indexGeoName(it2.next());
            }
        }
        LOG.info("[DONE]");
        LOG.info("{} geonames added to index. ({} records)", Integer.valueOf(this.indexWriter.getDocStats().maxDoc), Integer.valueOf(this.indexCount));
        LOG.info("Merging indices... please wait.");
        this.indexWriter.close();
        open.close();
        LOG.info("[DONE]");
        SimpleDateFormat simpleDateFormat = new SimpleDateFormat("HH:mm:ss");
        LOG.info("Process started: " + simpleDateFormat.format(date) + ", ended: " + simpleDateFormat.format(date2) + "; elapsed time: " + TimeUnit.MILLISECONDS.toSeconds(date2.getTime() - date.getTime()) + " seconds.");
    }

    private void loadAlternateNames(File file) throws IOException {
        LOG.info("Reading alternate names file: {}", file.getAbsolutePath());
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
        int i = 0;
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                LOG.info("Processed {} alternate names.  Found {} names.", Integer.valueOf(i), Integer.valueOf(this.alternateNameMap.size()));
                return;
            } else {
                i++;
                AlternateName alternateName = new AlternateName(readLine);
                if (alternateName.isEnglish() && alternateName.isPrefOrShort()) {
                    this.alternateNameMap.put(Integer.valueOf(alternateName.geonameId), alternateName.bestName(this.alternateNameMap.get(Integer.valueOf(alternateName.geonameId))));
                }
            }
        }
    }

    private void resolveAncestry(GeoName geoName) throws IOException {
        String parentAncestryKey = geoName.getParentAncestryKey();
        if (parentAncestryKey != null && (!geoName.setParent(this.adminMap.get(parentAncestryKey)) || !geoName.isAncestryResolved())) {
            Set<GeoName> set = this.unresolvedMap.get(parentAncestryKey);
            if (set == null) {
                set = new HashSet();
                this.unresolvedMap.put(parentAncestryKey, set);
            }
            set.add(geoName);
        }
        if (geoName.isAncestryResolved()) {
            indexGeoName(geoName);
        }
        String ancestryKey = geoName.getAncestryKey();
        if (ancestryKey != null) {
            GeoName geoName2 = this.adminMap.get(ancestryKey);
            if (geoName2 != null) {
                LOG.error(String.format("Resolved duplicate admin key [%s] for GeoNames (%d %s:%s %s) and (%d %s:%s %s)", ancestryKey, Integer.valueOf(geoName2.getGeonameID()), geoName2.getFeatureClass(), geoName2.getFeatureCode(), geoName2.getName(), Integer.valueOf(geoName.getGeonameID()), geoName.getFeatureClass(), geoName.getFeatureCode(), geoName.getName()));
            }
            this.adminMap.put(ancestryKey, geoName);
            checkDescendantsResolved(geoName, true);
        }
    }

    private void checkDescendantsResolved(GeoName geoName, boolean z) throws IOException {
        Set<GeoName> set;
        String ancestryKey = geoName.getAncestryKey();
        if (ancestryKey == null || (set = this.unresolvedMap.get(ancestryKey)) == null) {
            return;
        }
        Iterator<GeoName> it = set.iterator();
        while (it.hasNext()) {
            GeoName next = it.next();
            if (z && !next.setParent(geoName)) {
                LOG.error("Error setting parent [{}] of GeoName [{}].", geoName, next);
            }
            if (next.isAncestryResolved()) {
                checkDescendantsResolved(next, false);
                indexGeoName(next);
                it.remove();
            }
        }
        if (set.isEmpty()) {
            this.unresolvedMap.remove(ancestryKey);
        }
    }

    private void resolveUnresolved() throws IOException {
        GeoName geoName;
        int lastIndexOf;
        TreeSet<String> treeSet = new TreeSet(new Comparator<String>() { // from class: com.bericotech.clavin.index.IndexDirectoryBuilder.1
            @Override // java.util.Comparator
            public int compare(String str, String str2) {
                int length = str.split("\\.").length;
                int length2 = str2.split("\\.").length;
                return length != length2 ? length - length2 : str.compareTo(str2);
            }
        });
        treeSet.addAll(this.unresolvedMap.keySet());
        for (String str : treeSet) {
            String str2 = str;
            GeoName geoName2 = null;
            while (true) {
                geoName = geoName2;
                if (geoName != null || (lastIndexOf = str2.lastIndexOf(".")) <= 0) {
                    break;
                }
                str2 = str.substring(0, lastIndexOf);
                geoName2 = this.adminMap.get(str2);
            }
            if (geoName != null) {
                Set<GeoName> set = this.unresolvedMap.get(str);
                if (set == null) {
                    return;
                }
                Iterator<GeoName> it = set.iterator();
                while (it.hasNext()) {
                    GeoName next = it.next();
                    if (next.isAncestryResolved()) {
                        indexGeoName(next);
                        it.remove();
                    } else if (!next.setParent(geoName)) {
                        LOG.error("Unable to set parent of {} to {}", next, geoName);
                    } else if (next.isAncestryResolved()) {
                        indexGeoName(next);
                        it.remove();
                    } else {
                        LOG.error("GeoName [{}] should be fully resolved. (parent: {})", next, geoName);
                    }
                }
                if (set.isEmpty()) {
                    this.unresolvedMap.remove(str);
                }
            } else {
                LOG.error("Unable to resolve parent for GeoName key: {}", str);
            }
        }
    }

    private void indexGeoName(GeoName geoName) throws IOException {
        this.indexCount++;
        String name = geoName.getName();
        String asciiName = geoName.getAsciiName();
        HashSet hashSet = new HashSet();
        hashSet.add(name);
        hashSet.add(asciiName);
        hashSet.addAll(geoName.getAlternateNames());
        if (geoName.isTopLevelAdminDivision()) {
            if (geoName.getPrimaryCountryCode() != null) {
                hashSet.add(geoName.getPrimaryCountryCode().name());
            }
            Iterator<CountryCode> it = geoName.getAlternateCountryCodes().iterator();
            while (it.hasNext()) {
                hashSet.add(it.next().name());
            }
        }
        AlternateName alternateName = this.alternateNameMap.get(Integer.valueOf(geoName.getGeonameID()));
        if (alternateName != null) {
            hashSet.add(alternateName.name);
        }
        hashSet.remove(null);
        hashSet.remove("");
        Document document = new Document();
        document.add(new StoredField(IndexField.GEONAME.key(), this.fullAncestry ? geoName.getGazetteerRecordWithAncestry() : geoName.getGazetteerRecord()));
        document.add(new StoredField(IndexField.GEONAME_ID.key(), geoName.getGeonameID()));
        document.add(new IntPoint(IndexField.GEONAME_ID.key(), geoName.getGeonameID()));
        if (alternateName != null) {
            document.add(new StoredField(IndexField.PREFERRED_NAME.key(), alternateName.name));
        }
        GeoName parent = geoName.getParent();
        if (parent != null) {
            document.add(new StoredField(IndexField.PARENT_ID.key(), parent.getGeonameID()));
            document.add(new IntPoint(IndexField.PARENT_ID.key(), parent.getGeonameID()));
        }
        while (parent != null) {
            document.add(new StoredField(IndexField.ANCESTOR_IDS.key(), parent.getGeonameID()));
            document.add(new IntPoint(IndexField.ANCESTOR_IDS.key(), parent.getGeonameID()));
            parent = parent.getParent();
        }
        document.add(new StoredField(IndexField.POPULATION.key(), geoName.getPopulation()));
        document.add(new LongPoint(IndexField.POPULATION.key(), geoName.getPopulation()));
        if (!geoName.getFeatureClass().equals(FeatureClass.P) && !geoName.getFeatureCode().name().startsWith("PCL")) {
            document.add(new NumericDocValuesField(IndexField.SORT_POP.key(), geoName.getPopulation()));
        } else if (geoName.getGeonameID() != 2643741) {
            document.add(new NumericDocValuesField(IndexField.SORT_POP.key(), geoName.getPopulation() * 11));
        }
        document.add(new StoredField(IndexField.HISTORICAL.key(), IndexField.getBooleanIndexValue(geoName.getFeatureCode().isHistorical())));
        document.add(new IntPoint(IndexField.HISTORICAL.key(), IndexField.getBooleanIndexValue(geoName.getFeatureCode().isHistorical())));
        document.add(new StringField(IndexField.FEATURE_CODE.key(), geoName.getFeatureCode().name(), Field.Store.NO));
        TextField textField = new TextField(IndexField.INDEX_NAME.key(), "", Field.Store.YES);
        document.add(textField);
        Iterator it2 = hashSet.iterator();
        while (it2.hasNext()) {
            textField.setStringValue((String) it2.next());
            this.indexWriter.addDocument(document);
        }
    }

    private void logUnresolved() {
        FeatureCode featureCode;
        int i = 0;
        TreeMap treeMap = new TreeMap();
        TreeMap treeMap2 = new TreeMap();
        for (Map.Entry<String, Set<GeoName>> entry : this.unresolvedMap.entrySet()) {
            LOG.trace("{}: {} unresolved GeoNames", entry.getKey(), Integer.valueOf(entry.getValue().size()));
            i += entry.getValue().size();
            switch (entry.getKey().split("\\.").length) {
                case 1:
                    featureCode = FeatureCode.PCL;
                    break;
                case 2:
                    featureCode = FeatureCode.ADM1;
                    break;
                case 3:
                    featureCode = FeatureCode.ADM2;
                    break;
                case 4:
                    featureCode = FeatureCode.ADM3;
                    break;
                case 5:
                    featureCode = FeatureCode.ADM4;
                    break;
                default:
                    LOG.error("Unexpected ancestry key: {}", entry.getKey());
                    featureCode = FeatureCode.NULL;
                    break;
            }
            if (treeMap2.containsKey(featureCode.name())) {
                treeMap2.put(featureCode.name(), Integer.valueOf(((Integer) treeMap2.get(featureCode.name())).intValue() + 1));
            } else {
                treeMap2.put(featureCode.name(), 1);
            }
            for (GeoName geoName : entry.getValue()) {
                String format = String.format("%s:%s", geoName.getFeatureClass(), geoName.getFeatureCode());
                if (treeMap.containsKey(format)) {
                    treeMap.put(format, Integer.valueOf(((Integer) treeMap.get(format)).intValue() + 1));
                } else {
                    treeMap.put(format, 1);
                }
            }
        }
        LOG.info("Found {} administrative divisions.", Integer.valueOf(this.adminMap.size()));
        LOG.info("Found {} missing administrative keys.", Integer.valueOf(this.unresolvedMap.size()));
        for (String str : treeMap2.keySet()) {
            LOG.info("{}: {}", str, treeMap2.get(str));
        }
        LOG.info("{} total unresolved GeoNames", Integer.valueOf(i));
        for (String str2 : treeMap.keySet()) {
            LOG.trace("{}: {}", str2, treeMap.get(str2));
        }
    }

    public static void main(String[] strArr) throws IOException {
        Options options = getOptions();
        CommandLine commandLine = null;
        try {
            commandLine = new GnuParser().parse(options, strArr);
        } catch (ParseException e) {
            LOG.error(e.getMessage());
            printHelp(options);
            System.exit(-1);
        }
        if (commandLine.hasOption(HELP_OPTION)) {
            printHelp(options);
            System.exit(0);
        }
        String optionValue = commandLine.getOptionValue(INDEX_PATH_OPTION, DEFAULT_INDEX_DIRECTORY);
        String[] optionValues = commandLine.getOptionValues(GAZETTEER_FILES_OPTION);
        if (optionValues == null || optionValues.length == 0) {
            optionValues = DEFAULT_GAZETTEER_FILES;
        }
        boolean hasOption = commandLine.hasOption(REPLACE_INDEX_OPTION);
        boolean hasOption2 = commandLine.hasOption(FULL_ANCESTRY_OPTION);
        File file = new File(optionValue);
        if (file.exists()) {
            if (hasOption) {
                LOG.info("Replacing index: {}", file.getAbsolutePath());
                FileUtils.deleteDirectory(file);
            } else {
                LOG.info("{} exists. Remove the directory and try again.", file.getAbsolutePath());
                System.exit(-1);
            }
        }
        ArrayList arrayList = new ArrayList();
        for (String str : optionValues) {
            File file2 = new File(str);
            if (file2.isFile() && file2.canRead()) {
                arrayList.add(file2);
            } else {
                LOG.info("Unable to read Gazetteer file: {}", file2.getAbsolutePath());
            }
        }
        if (arrayList.isEmpty()) {
            LOG.error("No Gazetteer files found.");
            System.exit(-1);
        }
        String optionValue2 = commandLine.getOptionValue(ALTERNATE_NAMES_OPTION);
        File file3 = optionValue2 != null ? new File(optionValue2) : null;
        if (file3 != null && (!file3.isFile() || !file3.canRead())) {
            LOG.error("Unable to read alternate names file: {}", optionValue2);
            System.exit(-1);
        }
        new IndexDirectoryBuilder(hasOption2).buildIndex(file, arrayList, file3);
    }

    private static Options getOptions() {
        Options options = new Options();
        OptionBuilder.withLongOpt(HELP_OPTION);
        OptionBuilder.withDescription("Print help");
        options.addOption(OptionBuilder.create('?'));
        OptionBuilder.withLongOpt(FULL_ANCESTRY_OPTION);
        OptionBuilder.withDescription("Store the gazetteer records for the full ancestry tree of each element. This will increase performance at the expense of a larger index.");
        options.addOption(OptionBuilder.create());
        OptionBuilder.withLongOpt(GAZETTEER_FILES_OPTION);
        OptionBuilder.withDescription(String.format("The ':'-separated list of input Gazetteer files to parse.  Default: %s", StringUtils.join(DEFAULT_GAZETTEER_FILES, ':')));
        OptionBuilder.hasArgs();
        OptionBuilder.withValueSeparator(':');
        options.addOption(OptionBuilder.create('i'));
        OptionBuilder.withLongOpt(ALTERNATE_NAMES_OPTION);
        OptionBuilder.withDescription("When provided, the path to the GeoNames.org alternate names file for resolution of common and short names for each location. If not provided, the default name for each location will be used.");
        OptionBuilder.hasArg();
        options.addOption(OptionBuilder.create());
        OptionBuilder.withLongOpt(INDEX_PATH_OPTION);
        OptionBuilder.withDescription(String.format("The path to the output index directory. Default: %s", DEFAULT_INDEX_DIRECTORY));
        OptionBuilder.hasArg();
        options.addOption(OptionBuilder.create('o'));
        OptionBuilder.withLongOpt(REPLACE_INDEX_OPTION);
        OptionBuilder.withDescription("Replace an existing index if it exists. If this option is not specified,index processing will fail if an index already exists at the specified location.");
        options.addOption(OptionBuilder.create('r'));
        return options;
    }

    private static void printHelp(Options options) {
        new HelpFormatter().printHelp("run", options, true);
    }
}
