package org.fbk.cit.hlt.thewikimachine.xmldump;

import info.bliki.api.Connector;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.parsers.DocumentBuilderFactory;
import opennlp.tools.parser.AbstractBottomUpParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.OptionBuilder;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.fbk.cit.hlt.thewikimachine.index.BigCrossLanguageSearcher;
import org.fbk.cit.hlt.thewikimachine.index.QIDPageSearcher;
import org.fbk.cit.hlt.thewikimachine.util.CommandLineWithLogger;
import org.fbk.cit.hlt.thewikimachine.util.ExtendedProperties;
import org.fbk.cit.hlt.thewikimachine.util.StringTable;
import org.fbk.cit.hlt.thewikimachine.wikipedia.StatisticsIndexer;
import org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExampleExtractor;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

/* loaded from: input_file:org/fbk/cit/hlt/thewikimachine/xmldump/ExternalTypesLoader.class */
public class ExternalTypesLoader {
    static Logger logger = Logger.getLogger(ExternalTypesLoader.class.getName());
    static Pattern bncfPattern = Pattern.compile("([0-9]+)$");
    HashMap<Integer, HashSet<Integer>> bncfBroaders = new HashMap<>();
    HashMap<Integer, HashMap<String, HashSet<String>>> wikidataProperties = new HashMap<>();
    BigCrossLanguageSearcher clSearcher = null;
    ExtendedProperties wikidataMappings = null;
    ExtendedProperties bncfMappings = null;
    private static final int MAX_DEPTH = 10;

    public static String multiply(String str, int i) {
        if (i == 0) {
            return "";
        }
        StringBuilder sb = new StringBuilder(i * str.length());
        for (int i2 = 0; i2 < i; i2++) {
            sb.append(str);
        }
        return sb.toString();
    }

    public static String multiply(char c, int i) {
        if (i == 0) {
            return "";
        }
        StringBuilder sb = new StringBuilder(i);
        for (int i2 = 0; i2 < i; i2++) {
            sb.append(c);
        }
        return sb.toString();
    }

    public void getBncfClass(Integer num, HashSet<String> hashSet) {
        if (logger.isDebugEnabled()) {
            logger.debug(String.format("BNCF %s [%d]", this.clSearcher != null ? this.clSearcher.search(QIDPageSearcher.QID_LABEL, num.toString()).get("en") : null, num));
        }
        HashMap<String, HashSet<String>> hashMap = this.wikidataProperties.get(num);
        if (hashMap == null) {
            logger.debug(String.format("No BNCF/WikiData properties for [%d]", num));
            return;
        }
        for (String str : hashMap.keySet()) {
            if (str.equals("bncf")) {
                Iterator<String> it = hashMap.get(str).iterator();
                while (it.hasNext()) {
                    try {
                        getBncfClassInside(Integer.valueOf(Integer.parseInt(it.next())), hashSet, 1);
                    } catch (Exception e) {
                        logger.debug("ERROR: " + e.getMessage());
                    }
                }
            }
        }
    }

    public void getBncfClassInside(Integer num, HashSet<String> hashSet, int i) {
        if (i > 10) {
            logger.debug("--- Max depth reached");
            return;
        }
        String multiply = multiply('\t', i);
        String property = this.bncfMappings.getProperty(num.toString());
        if (property != null) {
            if (property.length() == 0) {
                logger.debug(String.format("%sSTOPPED!", multiply));
                return;
            } else {
                hashSet.add(property);
                logger.debug(String.format("%sMap to: %s", multiply, property));
                return;
            }
        }
        logger.debug(String.format("%sNo BNCF mappings for [%d]", multiply, num));
        HashSet<Integer> hashSet2 = this.bncfBroaders.get(num);
        if (hashSet2 == null) {
            logger.debug(String.format("%sNo more superclasses", multiply));
            return;
        }
        logger.debug(String.format("%sSuperclasses: %d", multiply, Integer.valueOf(hashSet2.size())));
        Iterator<Integer> it = hashSet2.iterator();
        while (it.hasNext()) {
            Integer next = it.next();
            logger.debug(String.format("%ssuper-class %d", multiply, next));
            getBncfClassInside(next, hashSet, i + 1);
        }
    }

    public void getWikidataClass(Integer num, HashSet<String> hashSet) {
        getWikidataClass(num, hashSet, 0);
    }

    public void getWikidataClass(Integer num, HashSet<String> hashSet, int i) {
        String property;
        if (i > 10) {
            logger.debug("--- Max depth reached");
            return;
        }
        String multiply = multiply('\t', i);
        if (logger.isDebugEnabled()) {
            logger.debug(String.format("%sWIKIDATA %s [%d]", multiply, this.clSearcher != null ? this.clSearcher.search(QIDPageSearcher.QID_LABEL, num.toString()).get("en") : null, num));
        }
        if (i > 0 && (property = this.wikidataMappings.getProperty(num.toString())) != null) {
            if (property.length() == 0) {
                logger.debug(String.format("%sSTOPPED!", multiply));
                return;
            } else {
                hashSet.add(property);
                logger.debug(String.format("%sMap to: %s", multiply, property));
                return;
            }
        }
        HashMap<String, HashSet<String>> hashMap = this.wikidataProperties.get(num);
        if (hashMap == null) {
            logger.debug(String.format("%swP null", multiply));
            return;
        }
        for (String str : hashMap.keySet()) {
            if (str.equals("subclass_of")) {
                logger.debug(String.format("%s%s", multiply, str));
                Iterator<String> it = hashMap.get(str).iterator();
                while (it.hasNext()) {
                    getWikidataClass(Integer.valueOf(Integer.parseInt(it.next())), hashSet, i + 1);
                }
            }
        }
    }

    public static HashMap<Integer, HashSet<Integer>> getBncf(String str) {
        HashMap<Integer, HashSet<Integer>> hashMap = new HashMap<>();
        HashMap hashMap2 = new HashMap();
        try {
            for (File file : new File(str).listFiles()) {
                if (file.getName().endsWith(".xml")) {
                    logger.info("Loading file " + file.getName());
                    Document parse = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(file);
                    parse.getDocumentElement().normalize();
                    NodeList elementsByTagName = parse.getElementsByTagName("rdf:Description");
                    for (int i = 0; i < elementsByTagName.getLength(); i++) {
                        Node item = elementsByTagName.item(i);
                        if (item.getNodeType() == 1) {
                            Element element = (Element) item;
                            Matcher matcher = bncfPattern.matcher(element.getAttribute("rdf:about"));
                            if (matcher.find()) {
                                Integer valueOf = Integer.valueOf(Integer.parseInt(matcher.group(1)));
                                logger.trace(String.format("About: %d", valueOf));
                                if (hashMap.get(valueOf) == null) {
                                    hashMap.put(valueOf, new HashSet<>());
                                }
                                boolean z = false;
                                NodeList elementsByTagName2 = element.getElementsByTagName("rdf:type");
                                for (int i2 = 0; i2 < elementsByTagName2.getLength(); i2++) {
                                    Node item2 = elementsByTagName2.item(i2);
                                    if (item2.getNodeType() == 1) {
                                        z = ((Element) item2).getAttribute("rdf:resource").equals("http://www.w3.org/2004/02/skos/core#Collection");
                                    }
                                }
                                if (z) {
                                    logger.trace("It is a collection");
                                    if (hashMap2.get(valueOf) == null) {
                                        hashMap2.put(valueOf, new HashSet());
                                    }
                                    NodeList elementsByTagName3 = element.getElementsByTagName("skos:member");
                                    for (int i3 = 0; i3 < elementsByTagName3.getLength(); i3++) {
                                        Node item3 = elementsByTagName3.item(i3);
                                        if (item3.getNodeType() == 1) {
                                            Matcher matcher2 = bncfPattern.matcher(((Element) item3).getAttribute("rdf:resource"));
                                            if (matcher2.find()) {
                                                Integer valueOf2 = Integer.valueOf(Integer.parseInt(matcher2.group(1)));
                                                ((HashSet) hashMap2.get(valueOf)).add(valueOf2);
                                                logger.trace(String.format("Member: %d", valueOf2));
                                            }
                                        }
                                    }
                                } else {
                                    NodeList elementsByTagName4 = element.getElementsByTagName("skos:broader");
                                    for (int i4 = 0; i4 < elementsByTagName4.getLength(); i4++) {
                                        Node item4 = elementsByTagName4.item(i4);
                                        if (item4.getNodeType() == 1) {
                                            Matcher matcher3 = bncfPattern.matcher(((Element) item4).getAttribute("rdf:resource"));
                                            if (matcher3.find()) {
                                                Integer valueOf3 = Integer.valueOf(Integer.parseInt(matcher3.group(1)));
                                                hashMap.get(valueOf).add(valueOf3);
                                                logger.trace(String.format("Broader: %d", valueOf3));
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
        } catch (Exception e) {
            logger.error(e.getMessage());
        }
        return hashMap;
    }

    public void init(String str, BigCrossLanguageSearcher bigCrossLanguageSearcher, String str2, String str3, String str4) {
        init(str, bigCrossLanguageSearcher, str2, str3, str4, null);
    }

    public void init(String str, BigCrossLanguageSearcher bigCrossLanguageSearcher, String str2, String str3, String str4, Integer num) {
        this.bncfBroaders = new HashMap<>();
        this.wikidataProperties = new HashMap<>();
        this.wikidataMappings = new ExtendedProperties();
        try {
            this.wikidataMappings = new ExtendedProperties(str);
        } catch (Exception e) {
            logger.error(e.getMessage());
        }
        logger.debug(this.wikidataMappings);
        this.bncfMappings = new ExtendedProperties();
        try {
            this.bncfMappings = new ExtendedProperties(str3);
        } catch (Exception e2) {
            logger.error(e2.getMessage());
        }
        this.clSearcher = bigCrossLanguageSearcher;
        HashMap hashMap = new HashMap();
        try {
            for (File file : new File(str4).listFiles()) {
                if (file.getName().endsWith(".xml")) {
                    logger.info("Loading file " + file.getName());
                    Document parse = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(file);
                    parse.getDocumentElement().normalize();
                    NodeList elementsByTagName = parse.getElementsByTagName("rdf:Description");
                    for (int i = 0; i < elementsByTagName.getLength(); i++) {
                        Node item = elementsByTagName.item(i);
                        if (item.getNodeType() == 1) {
                            Element element = (Element) item;
                            Matcher matcher = bncfPattern.matcher(element.getAttribute("rdf:about"));
                            if (matcher.find()) {
                                Integer valueOf = Integer.valueOf(Integer.parseInt(matcher.group(1)));
                                logger.trace(String.format("About: %d", valueOf));
                                if (this.bncfBroaders.get(valueOf) == null) {
                                    this.bncfBroaders.put(valueOf, new HashSet<>());
                                }
                                boolean z = false;
                                NodeList elementsByTagName2 = element.getElementsByTagName("rdf:type");
                                for (int i2 = 0; i2 < elementsByTagName2.getLength(); i2++) {
                                    Node item2 = elementsByTagName2.item(i2);
                                    if (item2.getNodeType() == 1) {
                                        z = ((Element) item2).getAttribute("rdf:resource").equals("http://www.w3.org/2004/02/skos/core#Collection");
                                    }
                                }
                                if (z) {
                                    logger.trace("It is a collection");
                                    if (hashMap.get(valueOf) == null) {
                                        hashMap.put(valueOf, new HashSet());
                                    }
                                    NodeList elementsByTagName3 = element.getElementsByTagName("skos:member");
                                    for (int i3 = 0; i3 < elementsByTagName3.getLength(); i3++) {
                                        Node item3 = elementsByTagName3.item(i3);
                                        if (item3.getNodeType() == 1) {
                                            Matcher matcher2 = bncfPattern.matcher(((Element) item3).getAttribute("rdf:resource"));
                                            if (matcher2.find()) {
                                                Integer valueOf2 = Integer.valueOf(Integer.parseInt(matcher2.group(1)));
                                                ((HashSet) hashMap.get(valueOf)).add(valueOf2);
                                                logger.trace(String.format("Member: %d", valueOf2));
                                            }
                                        }
                                    }
                                } else {
                                    NodeList elementsByTagName4 = element.getElementsByTagName("skos:broader");
                                    for (int i4 = 0; i4 < elementsByTagName4.getLength(); i4++) {
                                        Node item4 = elementsByTagName4.item(i4);
                                        if (item4.getNodeType() == 1) {
                                            Matcher matcher3 = bncfPattern.matcher(((Element) item4).getAttribute("rdf:resource"));
                                            if (matcher3.find()) {
                                                Integer valueOf3 = Integer.valueOf(Integer.parseInt(matcher3.group(1)));
                                                this.bncfBroaders.get(valueOf).add(valueOf3);
                                                logger.trace(String.format("Broader: %d", valueOf3));
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
        } catch (Exception e3) {
            logger.error(e3.getMessage());
        }
        for (Integer num2 : hashMap.keySet()) {
            String str5 = (String) this.bncfMappings.get(num2.toString());
            if (str5 != null) {
                Iterator it = ((HashSet) hashMap.get(num2)).iterator();
                while (it.hasNext()) {
                    Integer num3 = (Integer) it.next();
                    if (this.bncfMappings.get(num3.toString()) == null) {
                        this.bncfMappings.put(num3.toString(), str5);
                        logger.trace(String.format("Mapping added: %d --> %s", num3, str5));
                    }
                }
            }
        }
        logger.info(String.format("BNCF file(s) loaded: %d entities", Integer.valueOf(this.bncfBroaders.size())));
        logger.info("Loading WikiData file " + str2);
        try {
            this.wikidataProperties = loadWikiDataProperties(str2, num);
        } catch (Exception e4) {
            logger.error(e4.getMessage());
        }
        logger.info(String.format("WikiData file loaded: %d entities", Integer.valueOf(this.wikidataProperties.size())));
    }

    public static HashMap<Integer, HashMap<String, HashSet<String>>> loadWikiDataProperties(String str) throws IOException {
        return loadWikiDataProperties(str, null);
    }

    public static HashMap<Integer, HashMap<String, HashSet<String>>> loadWikiDataProperties(String str, Integer num) throws IOException {
        HashMap<Integer, HashMap<String, HashSet<String>>> hashMap = new HashMap<>();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(str), Connector.UTF8_CHARSET));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null || (num != null && hashMap.size() >= num.intValue())) {
                break;
            }
            String[] split = readLine.split(StringTable.HORIZONTAL_TABULATION);
            if (split.length >= 3) {
                Integer valueOf = Integer.valueOf(Integer.parseInt(split[0]));
                String str2 = split[1];
                String str3 = split[2];
                if (hashMap.get(valueOf) == null) {
                    hashMap.put(valueOf, new HashMap<>());
                }
                if (hashMap.get(valueOf).get(str2) == null) {
                    hashMap.get(valueOf).put(str2, new HashSet<>());
                }
                hashMap.get(valueOf).get(str2).add(str3);
            }
        }
        bufferedReader.close();
        return hashMap;
    }

    public HashSet<String> search(Integer num) {
        HashSet<String> hashSet = new HashSet<>();
        getWikidataClass(num, hashSet);
        getBncfClass(num, hashSet);
        logger.debug(String.format("Found %d --> %s", num, hashSet));
        return hashSet;
    }

    public void runForAll() {
        for (Integer num : this.wikidataProperties.keySet()) {
            HashSet<String> search = search(num);
            if (search.size() > 0) {
                logger.debug(String.format("%s [%s] --> %s", this.clSearcher != null ? this.clSearcher.search(QIDPageSearcher.QID_LABEL, num.toString()).get("en") : null, num, search));
            }
        }
    }

    public static void main(String[] strArr) {
        CommandLineWithLogger commandLineWithLogger = new CommandLineWithLogger();
        OptionBuilder.withDescription("BNCF folder");
        OptionBuilder.isRequired();
        OptionBuilder.hasArgs();
        OptionBuilder.withArgName("folder");
        OptionBuilder.withLongOpt("bncf");
        commandLineWithLogger.addOption(OptionBuilder.create("b"));
        OptionBuilder.withDescription("BNCF mappings file");
        OptionBuilder.isRequired();
        OptionBuilder.hasArgs();
        OptionBuilder.withArgName("file");
        OptionBuilder.withLongOpt("bncf-map");
        commandLineWithLogger.addOption(OptionBuilder.create("n"));
        OptionBuilder.withDescription("WikiData properties file");
        OptionBuilder.isRequired();
        OptionBuilder.hasArgs();
        OptionBuilder.withArgName("file");
        OptionBuilder.withLongOpt("wikidata");
        commandLineWithLogger.addOption(OptionBuilder.create("w"));
        OptionBuilder.withDescription("WikiData mappings file");
        OptionBuilder.isRequired();
        OptionBuilder.hasArgs();
        OptionBuilder.withArgName("file");
        OptionBuilder.withLongOpt("wikidata-map");
        commandLineWithLogger.addOption(OptionBuilder.create("m"));
        OptionBuilder.withDescription("WikiData cross-language index");
        OptionBuilder.hasArgs();
        OptionBuilder.withArgName("folder");
        OptionBuilder.withLongOpt("wikidata-cl");
        commandLineWithLogger.addOption(OptionBuilder.create(AbstractBottomUpParser.COMPLETE));
        OptionBuilder.withDescription("WikiData number of pages");
        OptionBuilder.hasArgs();
        OptionBuilder.withArgName(StatisticsIndexer.TRAFFIC_FIELD_NAME);
        OptionBuilder.withLongOpt("pages");
        commandLineWithLogger.addOption(OptionBuilder.create(WikipediaExampleExtractor.Example.CONTENT_FROM_NOMINAL));
        CommandLine commandLine = null;
        try {
            commandLine = commandLineWithLogger.getCommandLine(strArr);
            PropertyConfigurator.configure(commandLineWithLogger.getLoggerProps());
        } catch (Exception e) {
            System.exit(1);
        }
        String optionValue = commandLine.getOptionValue("bncf");
        String optionValue2 = commandLine.getOptionValue("wikidata");
        String optionValue3 = commandLine.getOptionValue("wikidata-map");
        String optionValue4 = commandLine.getOptionValue("bncf-map");
        String optionValue5 = commandLine.getOptionValue("wikidata-cl");
        Integer num = null;
        if (commandLine.hasOption("pages")) {
            num = Integer.valueOf(Integer.parseInt(commandLine.getOptionValue("pages")));
        }
        BigCrossLanguageSearcher bigCrossLanguageSearcher = null;
        if (optionValue5 != null) {
            try {
                bigCrossLanguageSearcher = new BigCrossLanguageSearcher(optionValue5);
            } catch (Exception e2) {
                logger.error(e2.getMessage());
                return;
            }
        }
        ExternalTypesLoader externalTypesLoader = new ExternalTypesLoader();
        externalTypesLoader.init(optionValue3, bigCrossLanguageSearcher, optionValue2, optionValue4, optionValue, num);
        externalTypesLoader.runForAll();
        if (optionValue5 != null) {
            bigCrossLanguageSearcher.close();
        }
    }
}
