package org.fbk.cit.hlt.thewikimachine.xmldump;

import info.bliki.api.Connector;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.net.URLDecoder;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import opennlp.tools.parser.AbstractBottomUpParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.OptionBuilder;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.fbk.cit.hlt.thewikimachine.ExtractorParameters;
import org.fbk.cit.hlt.thewikimachine.util.CommandLineWithLogger;
import org.fbk.cit.hlt.thewikimachine.util.StringTable;
import org.fbk.cit.hlt.thewikimachine.xmldump.util.WikiTemplate;
import org.fbk.cit.hlt.thewikimachine.xmldump.util.WikiTemplateParser;

/* loaded from: input_file:org/fbk/cit/hlt/thewikimachine/xmldump/DBpediaAllMappingsExtractor.class */
public class DBpediaAllMappingsExtractor extends AbstractWikipediaExtractor implements WikipediaExtractor {
    private PrintWriter dbpediaWriter;
    private HashMap<String, String> cache;
    private HashMap<String, String> redirect;
    private HashMap<String, HashSet<String>> ignore;
    private HashSet<String> added;
    static Logger logger = Logger.getLogger(DBpediaAllMappingsExtractor.class.getName());
    private static Pattern pt = Pattern.compile("^Mapping.([a-zA-Z0-9]+):(.*)");
    private static Pattern redirectPattern = Pattern.compile("#REDIRECT\\s+\\[\\[Mapping.{0,3}:(.*)\\]\\]");

    public DBpediaAllMappingsExtractor() {
        super(1, 10000, new Locale("en"));
        this.cache = new HashMap<>();
        this.redirect = new HashMap<>();
        this.ignore = new HashMap<>();
        this.added = new HashSet<>();
    }

    public void start(String str, String str2, String str3) {
        this.cache = new HashMap<>();
        this.redirect = new HashMap<>();
        if (str3 != null) {
            try {
                BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(str3), Connector.UTF8_CHARSET));
                while (true) {
                    String readLine = bufferedReader.readLine();
                    if (readLine == null) {
                        break;
                    }
                    String trim = readLine.trim();
                    if (trim.length() != 0 && !trim.startsWith("#")) {
                        if (trim.startsWith(Character.toString('-'))) {
                            String[] split = trim.split(StringTable.HORIZONTAL_TABULATION);
                            String trim2 = split[0].substring(1).trim();
                            String trim3 = split[1].trim();
                            if (this.ignore.get(trim2) == null) {
                                this.ignore.put(trim2, new HashSet<>());
                            }
                            this.ignore.get(trim2).add(trim3);
                        } else {
                            this.added.add(trim);
                        }
                    }
                }
                bufferedReader.close();
            } catch (Exception e) {
                logger.error(e.getMessage());
            }
        }
        logger.info("To ignore: " + this.ignore.size());
        logger.debug(this.ignore);
        logger.info("To add: " + this.added.size());
        logger.debug(this.added);
        try {
            this.dbpediaWriter = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(str2), "UTF-8")));
        } catch (IOException e2) {
            logger.error(e2);
        }
        startProcess(str);
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void contentPage(String str, String str2, int i) {
        String str3;
        try {
            String decode = URLDecoder.decode(str2, "UTF-8");
            Matcher matcher = pt.matcher(decode);
            if (!matcher.find()) {
                logger.debug("Pattern not found: " + decode);
                return;
            }
            String group = matcher.group(2);
            String group2 = matcher.group(1);
            if (this.ignore.get(group2) != null && this.ignore.get(group2).contains(group)) {
                logger.debug("Infobox ignored: " + group);
                return;
            }
            logger.debug("Title: " + decode + " | Lang: " + group2 + " | Infobox: " + group);
            String trim = str.replaceAll("<!--.*?-->", "").trim();
            Matcher matcher2 = redirectPattern.matcher(trim);
            if (matcher2.find()) {
                this.redirect.put(group2 + StringTable.HORIZONTAL_TABULATION + group, group2 + StringTable.HORIZONTAL_TABULATION + matcher2.group(1).trim().replace(' ', '_'));
                return;
            }
            Iterator<WikiTemplate> it = WikiTemplateParser.parse(trim, false).iterator();
            while (it.hasNext()) {
                WikiTemplate next = it.next();
                if (next.isRoot) {
                    StringBuffer stringBuffer = new StringBuffer();
                    if (next.getFirstPart().equals("ConditionalMapping")) {
                        logger.trace("Conditional");
                        Iterator<WikiTemplate> it2 = WikiTemplateParser.parse(next.getContent(), false).iterator();
                        while (it2.hasNext()) {
                            WikiTemplate next2 = it2.next();
                            if (next2.getFirstPart().equals("Condition")) {
                                HashMap<String, String> hashMapOfParts = next2.getHashMapOfParts();
                                logger.trace("Property: " + hashMapOfParts.get("templateProperty"));
                                logger.trace("Operator: " + hashMapOfParts.get("operator"));
                                logger.trace("Value: " + hashMapOfParts.get("value"));
                                logger.trace("Mapping: " + hashMapOfParts.get("mapping"));
                                Iterator<WikiTemplate> it3 = WikiTemplateParser.parse(hashMapOfParts.get("mapping"), false).iterator();
                                while (it3.hasNext()) {
                                    WikiTemplate next3 = it3.next();
                                    if (next3.isRoot && next3.getFirstPart().equals("TemplateMapping") && (str3 = next3.getHashMapOfParts().get("mapToClass")) != null) {
                                        stringBuffer.append(hashMapOfParts.get("operator"));
                                        stringBuffer.append(StringTable.VERTICAL_LINE);
                                        if (hashMapOfParts.get("templateProperty") != null) {
                                            stringBuffer.append(hashMapOfParts.get("templateProperty"));
                                        }
                                        stringBuffer.append(StringTable.VERTICAL_LINE);
                                        if (hashMapOfParts.get("value") != null) {
                                            stringBuffer.append(hashMapOfParts.get("value"));
                                        }
                                        stringBuffer.append(StringTable.VERTICAL_LINE);
                                        stringBuffer.append(str3);
                                        stringBuffer.append(StringTable.HORIZONTAL_TABULATION);
                                    }
                                }
                            }
                        }
                    } else if (next.getFirstPart().equals("TemplateMapping")) {
                        String str4 = next.getHashMapOfParts().get("mapToClass");
                        if (str4 != null) {
                            stringBuffer.append(str4);
                        }
                    } else {
                        continue;
                    }
                    this.cache.put(group2 + StringTable.HORIZONTAL_TABULATION + group, stringBuffer.toString());
                    StringBuffer stringBuffer2 = new StringBuffer();
                    stringBuffer2.append(group2).append(StringTable.HORIZONTAL_TABULATION);
                    stringBuffer2.append(group).append(StringTable.HORIZONTAL_TABULATION);
                    stringBuffer2.append(stringBuffer);
                    String trim2 = new String(stringBuffer2).trim();
                    if (trim2.length() > 0) {
                        logger.debug(stringBuffer2);
                        synchronized (this) {
                            this.dbpediaWriter.append((CharSequence) trim2).append((CharSequence) "\n");
                        }
                    } else {
                        continue;
                    }
                }
            }
        } catch (Exception e) {
        }
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void filePage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void start(ExtractorParameters extractorParameters) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void disambiguationPage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void categoryPage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void templatePage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void redirectPage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void portalPage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void projectPage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.AbstractWikipediaExtractor, org.fbk.cit.hlt.thewikimachine.xmldump.AbstractWikipediaXmlDumpParser
    public void endProcess() {
        if (this.redirect.size() > 0) {
            this.dbpediaWriter.append((CharSequence) "\n");
            for (String str : this.redirect.keySet()) {
                StringBuffer stringBuffer = new StringBuffer();
                stringBuffer.append(str).append(StringTable.HORIZONTAL_TABULATION);
                String str2 = this.cache.get(this.redirect.get(str));
                if (str2 != null) {
                    stringBuffer.append(str2);
                    String trim = new String(stringBuffer).trim();
                    if (trim.length() > 0) {
                        logger.debug(stringBuffer);
                        synchronized (this) {
                            this.dbpediaWriter.append((CharSequence) trim).append((CharSequence) "\n");
                        }
                    } else {
                        continue;
                    }
                }
            }
        }
        if (this.added.size() > 0) {
            this.dbpediaWriter.append((CharSequence) "\n");
            Iterator<String> it = this.added.iterator();
            while (it.hasNext()) {
                this.dbpediaWriter.append((CharSequence) it.next()).append((CharSequence) "\n");
            }
        }
        this.dbpediaWriter.flush();
        this.dbpediaWriter.close();
    }

    public static void main(String[] strArr) throws IOException {
        CommandLineWithLogger commandLineWithLogger = new CommandLineWithLogger();
        OptionBuilder.withDescription("Input file");
        OptionBuilder.hasArg();
        OptionBuilder.withArgName("file");
        OptionBuilder.withLongOpt("input");
        OptionBuilder.isRequired();
        commandLineWithLogger.addOption(OptionBuilder.create(AbstractBottomUpParser.INCOMPLETE));
        OptionBuilder.withDescription("Output file");
        OptionBuilder.hasArg();
        OptionBuilder.withArgName("file");
        OptionBuilder.withLongOpt("output");
        OptionBuilder.isRequired();
        commandLineWithLogger.addOption(OptionBuilder.create("o"));
        OptionBuilder.withDescription("Manual mappings file");
        OptionBuilder.hasArg();
        OptionBuilder.withArgName("file");
        OptionBuilder.withLongOpt("manual");
        commandLineWithLogger.addOption(OptionBuilder.create("m"));
        CommandLine commandLine = null;
        try {
            commandLine = commandLineWithLogger.getCommandLine(strArr);
            System.out.println(commandLineWithLogger.getLoggerProps());
            PropertyConfigurator.configure(commandLineWithLogger.getLoggerProps());
        } catch (Exception e) {
            System.exit(1);
        }
        String optionValue = commandLine.getOptionValue("input");
        String optionValue2 = commandLine.getOptionValue("output");
        String optionValue3 = commandLine.getOptionValue("manual");
        logger.debug("Debug message");
        new DBpediaAllMappingsExtractor().start(optionValue, optionValue2, optionValue3);
    }
}
