package nlp4j.wiki;

import java.io.File;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.List;
import nlp4j.Document;
import nlp4j.crawler.Crawler;
import nlp4j.webcrawler.AbstractWebCrawler;
import nlp4j.webcrawler.FileDownloader;
import nlp4j.wiki.util.MediaWikiMD5Util;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

/* loaded from: input_file:nlp4j/wiki/MediaWikiDownloader.class */
public class MediaWikiDownloader extends AbstractWebCrawler implements Crawler {
    private static Logger logger = LogManager.getLogger(MethodHandles.lookup().lookupClass());
    public static final String MD5_FILENAME = "%s%s-%s-md5sums.txt";
    public static final String WIKIMEDIA_URL_BASE = "https://dumps.wikimedia.org/%s%s/%s/%s";
    public static final String WIKTIONARY_DUMP_FILENAME_BASE = "%s%s-%s-pages-articles-multistream.xml.bz2";
    public static final String WIKTIONARY_INDEX_FILENAME_BASE = "%s%s-%s-pages-articles-multistream-index.txt.bz2";
    private String language = "en";
    private String media = "wiktionary";
    private String outdir = null;
    private String version = null;

    /* loaded from: input_file:nlp4j/wiki/MediaWikiDownloader$Builder.class */
    public static class Builder {
        private String version;
        private String outdir;
        private String language;
        private String media;

        public Builder version(String str) {
            this.version = str;
            return this;
        }

        public Builder outdir(String str) {
            this.outdir = str;
            return this;
        }

        public Builder language(String str) {
            this.language = str;
            return this;
        }

        public Builder media(media mediaVar) {
            this.media = mediaVar.name();
            return this;
        }

        public MediaWikiDownloader build() {
            MediaWikiDownloader mediaWikiDownloader = new MediaWikiDownloader();
            mediaWikiDownloader.setProperty("version", this.version);
            if (this.outdir != null) {
                mediaWikiDownloader.setProperty("outdir", this.outdir);
            }
            mediaWikiDownloader.setProperty("language", this.language);
            mediaWikiDownloader.setProperty("media", this.media);
            return mediaWikiDownloader;
        }
    }

    /* loaded from: input_file:nlp4j/wiki/MediaWikiDownloader$media.class */
    public enum media {
        wiki,
        wiktionary
    }

    public List<Document> crawlDocuments() {
        if (this.version == null) {
            logger.info("property is not set: version");
            return null;
        }
        if (this.outdir == null) {
            logger.info("property is not set: outdir");
            return null;
        }
        FileDownloader fileDownloader = new FileDownloader();
        String str = this.outdir + "/" + String.format(MD5_FILENAME, this.language, this.media, this.version);
        File file = new File(str);
        String uRL_md5 = getURL_md5();
        System.out.println("Download: " + uRL_md5);
        System.out.println("File: " + str);
        try {
            fileDownloader.download(uRL_md5, str);
            String str2 = this.outdir + "/" + String.format(WIKTIONARY_INDEX_FILENAME_BASE, this.language, this.media, this.version);
            File file2 = new File(str2);
            String uRL_dumpIndex = getURL_dumpIndex();
            System.out.println("Download: " + uRL_dumpIndex);
            System.out.println("File: " + str2);
            try {
                fileDownloader.download(uRL_dumpIndex, str2);
                logger.info("Checking MD5: " + file2.getAbsolutePath());
                if (!MediaWikiMD5Util.checkMD5(file, file2)) {
                    throw new IOException("Invalid MD5: " + file2.getAbsolutePath());
                }
                logger.info("Checking MD5: done");
                String str3 = this.outdir + "/" + String.format(WIKTIONARY_DUMP_FILENAME_BASE, this.language, this.media, this.version);
                File file3 = new File(str3);
                String uRL_dumpData = getURL_dumpData();
                System.out.println("Download: " + uRL_dumpData);
                System.out.println("File: " + str3);
                try {
                    fileDownloader.download(uRL_dumpData, str3);
                    logger.info("Checking MD5: " + file3.getAbsolutePath());
                    if (!MediaWikiMD5Util.checkMD5(file, file3)) {
                        throw new IOException("Invalid MD5: " + file3.getAbsolutePath());
                    }
                    logger.info("Checking MD5: done");
                    return null;
                } catch (IOException e) {
                    e.printStackTrace();
                    logger.error(e.getMessage());
                    return null;
                }
            } catch (IOException e2) {
                e2.printStackTrace();
                logger.error(e2.getMessage());
                return null;
            }
        } catch (IOException e3) {
            e3.printStackTrace();
            logger.error(e3.getMessage());
            return null;
        }
    }

    public String getURL_dumpData() {
        return String.format(WIKIMEDIA_URL_BASE, this.language, this.media, this.version, String.format(WIKTIONARY_DUMP_FILENAME_BASE, this.language, this.media, this.version));
    }

    public String getURL_dumpIndex() {
        return String.format(WIKIMEDIA_URL_BASE, this.language, this.media, this.version, String.format(WIKTIONARY_INDEX_FILENAME_BASE, this.language, this.media, this.version));
    }

    public String getURL_md5() {
        return String.format(WIKIMEDIA_URL_BASE, this.language, this.media, this.version, String.format(MD5_FILENAME, this.language, this.media, this.version));
    }

    public void setProperty(String str, String str2) {
        super.setProperty(str, str2);
        if ("version".equals(str)) {
            this.version = str2;
            return;
        }
        if ("outdir".equals(str)) {
            this.outdir = str2;
            return;
        }
        if ("language".equals(str)) {
            this.language = str2;
            return;
        }
        if ("media".equals(str)) {
            if (str2 == null || !(str2.equals("wiktionary") || str2.equals("wiki"))) {
                throw new RuntimeException("Invalid value is set: key=" + str + ",value=" + str2);
            }
            if (str2.equals("wikipedia")) {
                this.media = "wiki";
            } else {
                this.media = str2;
            }
        }
    }

    public void printURL() {
        System.out.println("MD5  : " + getURL_md5());
        System.out.println("INDEX: " + getURL_dumpIndex());
        System.out.println("DATA : " + getURL_dumpData());
    }
}
