package it.unimi.dsi.mg4j.tool;

import com.martiansoftware.jsap.FlaggedOption;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPException;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.Switch;
import com.martiansoftware.jsap.UnflaggedOption;
import com.martiansoftware.jsap.stringparsers.ClassStringParser;
import it.unimi.dsi.fastutil.ints.IntArrays;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.io.FastBufferedInputStream;
import it.unimi.dsi.mg4j.index.DowncaseTermProcessor;
import it.unimi.dsi.mg4j.index.TermProcessor;
import it.unimi.dsi.mg4j.io.WordReader;
import it.unimi.dsi.mg4j.util.Fast;
import it.unimi.dsi.mg4j.util.MutableString;
import it.unimi.dsi.mg4j.util.ProgressLogger;
import java.io.DataInput;
import java.io.DataInputStream;
import java.io.EOFException;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.Reader;
import java.lang.reflect.InvocationTargetException;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.log4j.Logger;
import org.archive.crawler.writer.TrecWebWriterProcessor;

/* loaded from: input_file:site-search/heritrix/lib/mg4j-1.0.1.jar:it/unimi/dsi/mg4j/tool/ScanVirtual.class */
public final class ScanVirtual extends Scan {
    private static final Logger LOGGER;
    private static final boolean ASSERTS = true;
    static Class class$it$unimi$dsi$mg4j$tool$ScanVirtual;
    static Class class$it$unimi$dsi$mg4j$index$NullTermProcessor;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:site-search/heritrix/lib/mg4j-1.0.1.jar:it/unimi/dsi/mg4j/tool/ScanVirtual$SelfDelimUTF8WordReader.class */
    public static final class SelfDelimUTF8WordReader implements WordReader {
        private final DataInput dataInput;
        public int count;

        @Override // it.unimi.dsi.mg4j.io.WordReader
        public final boolean next(MutableString mutableString, MutableString mutableString2) throws IOException {
            mutableString2.length(0);
            mutableString.readSelfDelimUTF8(this.dataInput);
            this.count++;
            return mutableString.length() != 0;
        }

        @Override // it.unimi.dsi.mg4j.io.WordReader
        public final WordReader setReader(Reader reader) {
            return null;
        }

        @Override // it.unimi.dsi.mg4j.io.WordReader
        public final WordReader copy() {
            return null;
        }

        public SelfDelimUTF8WordReader(DataInput dataInput) {
            this.dataInput = dataInput;
        }
    }

    public static final void run(String str, DataInput dataInput, int i, TermProcessor termProcessor, int i2, int i3, String str2, boolean z, long j, String str3) throws ConfigurationException, IOException {
        File parentFile = str3 == null ? new File(str).getParentFile() : new File(str3);
        int[] loadInts = str2 != null ? BinIO.loadInts(str2) : null;
        ProgressLogger progressLogger = new ProgressLogger(LOGGER, j, "fragments");
        ScanVirtual scanVirtual = new ScanVirtual(str, termProcessor, i2, i3, z, parentFile);
        progressLogger.start("Indexing fragments...");
        SelfDelimUTF8WordReader selfDelimUTF8WordReader = new SelfDelimUTF8WordReader(dataInput);
        int[] iArr = IntArrays.EMPTY_ARRAY;
        while (true) {
            try {
                int readInt = dataInput.readInt();
                if (loadInts != null) {
                    readInt = loadInts[readInt];
                }
                iArr = IntArrays.grow(iArr, readInt + 1);
                int i4 = readInt;
                iArr[i4] = iArr[i4] + i;
                selfDelimUTF8WordReader.count = 0;
                scanVirtual.processDocument(readInt, iArr[readInt], selfDelimUTF8WordReader);
                int i5 = readInt;
                iArr[i5] = iArr[i5] + selfDelimUTF8WordReader.count;
                progressLogger.update();
            } catch (EOFException e) {
                scanVirtual.close(null);
                progressLogger.done();
                return;
            }
        }
    }

    public static final void main(String[] strArr) throws JSAPException, ConfigurationException, IllegalArgumentException, SecurityException, IOException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {
        Class cls = class$it$unimi$dsi$mg4j$tool$ScanVirtual;
        if (cls == null) {
            cls = m461class("[Lit.unimi.dsi.mg4j.tool.ScanVirtual;", false);
            class$it$unimi$dsi$mg4j$tool$ScanVirtual = cls;
        }
        String name = cls.getName();
        Parameter[] parameterArr = new Parameter[11];
        parameterArr[0] = new FlaggedOption("virtualDocs", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'v', "virtual-documents", "A serialised sequence of virtual documents; it will be used instead of stdin.");
        parameterArr[1] = new FlaggedOption("gap", JSAP.INTEGER_PARSER, "32", false, 'g', "gap", "A gap that will be introducted between different fragments of the same virtual document.");
        ClassStringParser classStringParser = JSAP.CLASS_PARSER;
        Class cls2 = class$it$unimi$dsi$mg4j$index$NullTermProcessor;
        if (cls2 == null) {
            cls2 = m461class("[Lit.unimi.dsi.mg4j.index.NullTermProcessor;", false);
            class$it$unimi$dsi$mg4j$index$NullTermProcessor = cls2;
        }
        parameterArr[2] = new FlaggedOption("termProcessor", classStringParser, cls2.getName(), false, 't', "term-processor", "Sets the term processor to the given class.");
        parameterArr[3] = new Switch("downcase", (char) 0, "downcase", "A shortcut for setting the term processor to the downcasing processor.");
        parameterArr[4] = new FlaggedOption("batchSize", JSAP.INTSIZE_PARSER, Index.DEFAULT_BATCH_SIZE, false, 's', "batch-size", "The size of a batch.");
        parameterArr[5] = new FlaggedOption("bufferSize", JSAP.INTSIZE_PARSER, Index.DEFAULT_BUFFER_SIZE, false, 'b', "buffer-size", "The size of an I/O buffer.");
        parameterArr[6] = new FlaggedOption("renumber", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'r', "renumber", "The filename of a document renumbering.");
        parameterArr[7] = new Switch("keepUnsorted", 'u', "keep-unsorted", "Keep the unsorted term file.");
        parameterArr[8] = new FlaggedOption("logInterval", JSAP.LONG_PARSER, Long.toString(10000L), false, 'l', "log-interval", "The minimum time interval between activity logs in milliseconds.");
        parameterArr[9] = new FlaggedOption("tempDir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'T', "temp-dir", "A directory for all temporary files (e.g., batches).");
        parameterArr[10] = new UnflaggedOption(TrecWebWriterProcessor.ATTR_BASENAME, JSAP.STRING_PARSER, true, "The basename of the resulting index.");
        SimpleJSAP simpleJSAP = new SimpleJSAP(name, "Builds a set of batches from a file of virtual documents.", parameterArr);
        JSAPResult parse = simpleJSAP.parse(strArr);
        if (simpleJSAP.messagePrinted()) {
            return;
        }
        Class cls3 = parse.getClass("termProcessor");
        Fast.ensureLog4JIsConfigured();
        run(parse.getString(TrecWebWriterProcessor.ATTR_BASENAME), new DataInputStream(new FastBufferedInputStream(parse.contains("virtualDocs") ? new FileInputStream(parse.getString("virtualDocs")) : System.in)), parse.getInt("gap"), parse.getBoolean("downcase") ? DowncaseTermProcessor.getInstance() : (TermProcessor) cls3.getMethod("getInstance", null).invoke(cls3, null), parse.getInt("bufferSize"), parse.getInt("batchSize"), parse.getString("renumber"), parse.getBoolean("keepUnsorted"), parse.getLong("logInterval"), parse.getString("tempDir"));
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v5, types: [java.lang.Throwable, java.lang.Class] */
    /* renamed from: class, reason: not valid java name */
    static Class m461class(String str, boolean z) {
        ?? componentType;
        try {
            Class<?> cls = Class.forName(str);
            if (z) {
                return cls;
            }
            componentType = cls.getComponentType();
            return componentType;
        } catch (ClassNotFoundException unused) {
            throw new NoClassDefFoundError().initCause(componentType);
        }
    }

    public ScanVirtual(String str, TermProcessor termProcessor, int i, int i2, boolean z, File file) {
        super(str, termProcessor, false, i, i2, z, null, file);
    }

    static {
        Class cls = class$it$unimi$dsi$mg4j$tool$ScanVirtual;
        if (cls == null) {
            cls = m461class("[Lit.unimi.dsi.mg4j.tool.ScanVirtual;", false);
            class$it$unimi$dsi$mg4j$tool$ScanVirtual = cls;
        }
        LOGGER = Fast.getLogger(cls);
    }
}
