package org.apache.lucene.benchmark.utils;

import java.io.BufferedWriter;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.attribute.FileAttribute;
import java.util.Properties;
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
import org.apache.lucene.benchmark.byTask.feeds.EnwikiContentSource;
import org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.document.Document;
import org.apache.lucene.util.IOUtils;

/* loaded from: input_file:org/apache/lucene/benchmark/utils/ExtractWikipedia.class */
public class ExtractWikipedia {
    private Path outputDir;
    public static int count = 0;
    static final int BASE = 10;
    protected DocMaker docMaker;

    public ExtractWikipedia(DocMaker docMaker, Path path) throws IOException {
        this.outputDir = path;
        this.docMaker = docMaker;
        System.out.println("Deleting all files in " + String.valueOf(path));
        IOUtils.rm(new Path[]{path});
    }

    public Path directory(int i, Path path) {
        int i2;
        if (path == null) {
            path = this.outputDir;
        }
        int i3 = 10;
        while (true) {
            i2 = i3;
            if (i2 > i) {
                break;
            }
            i3 = i2 * 10;
        }
        if (i < 10) {
            return path;
        }
        return directory(i % (i2 / 10), path.resolve(Integer.toString(i2 / 10)).resolve(Integer.toString(i / (i2 / 10))));
    }

    public void create(String str, String str2, String str3, String str4) throws IOException {
        int i = count;
        count = i + 1;
        Path directory = directory(i, null);
        Files.createDirectories(directory, new FileAttribute[0]);
        BufferedWriter newBufferedWriter = Files.newBufferedWriter(directory.resolve(str + ".txt"), StandardCharsets.UTF_8, new OpenOption[0]);
        try {
            newBufferedWriter.write(str3 + "\n\n" + str2 + "\n\n" + str4 + "\n");
            if (newBufferedWriter != null) {
                newBufferedWriter.close();
            }
        } catch (Throwable th) {
            if (newBufferedWriter != null) {
                try {
                    newBufferedWriter.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    public void extract() throws Exception {
        System.out.println("Starting Extraction");
        long currentTimeMillis = System.currentTimeMillis();
        while (true) {
            try {
                Document makeDocument = this.docMaker.makeDocument();
                if (makeDocument == null) {
                    break;
                } else {
                    create(makeDocument.get(DocMaker.ID_FIELD), makeDocument.get(DocMaker.TITLE_FIELD), makeDocument.get(DocMaker.DATE_FIELD), makeDocument.get(DocMaker.BODY_FIELD));
                }
            } catch (NoMoreDataException e) {
            }
        }
        System.out.println("Extraction took " + (System.currentTimeMillis() - currentTimeMillis) + " ms");
    }

    public static void main(String[] strArr) throws Exception {
        Path path = null;
        Path path2 = Paths.get("enwiki", new String[0]);
        boolean z = true;
        int i = 0;
        while (i < strArr.length) {
            String str = strArr[i];
            if (str.equals("--input") || str.equals("-i")) {
                path = Paths.get(strArr[i + 1], new String[0]);
                i++;
            } else if (str.equals("--output") || str.equals("-o")) {
                path2 = Paths.get(strArr[i + 1], new String[0]);
                i++;
            } else if (str.equals("--discardImageOnlyDocs") || str.equals("-d")) {
                z = false;
            }
            i++;
        }
        Properties properties = new Properties();
        properties.setProperty("docs.file", path.toAbsolutePath().toString());
        properties.setProperty("content.source.forever", "false");
        properties.setProperty("keep.image.only.docs", String.valueOf(z));
        Config config = new Config(properties);
        EnwikiContentSource enwikiContentSource = new EnwikiContentSource();
        enwikiContentSource.setConfig(config);
        DocMaker docMaker = new DocMaker();
        docMaker.setConfig(config, enwikiContentSource);
        docMaker.resetInputs();
        if (!Files.exists(path, new LinkOption[0])) {
            printUsage();
            return;
        }
        System.out.println("Extracting Wikipedia to: " + String.valueOf(path2) + " using EnwikiContentSource");
        Files.createDirectories(path2, new FileAttribute[0]);
        new ExtractWikipedia(docMaker, path2).extract();
    }

    private static void printUsage() {
        System.err.println("Usage: java -cp <...> org.apache.lucene.benchmark.utils.ExtractWikipedia --input|-i <Path to Wikipedia XML file> [--output|-o <Output Path>] [--discardImageOnlyDocs|-d]");
        System.err.println("--discardImageOnlyDocs tells the extractor to skip Wiki docs that contain only images");
    }
}
