package it.unimi.dsi.mg4j.tool;

import com.martiansoftware.jsap.FlaggedOption;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPException;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.ParseException;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.Switch;
import com.martiansoftware.jsap.UnflaggedOption;
import com.martiansoftware.jsap.stringparsers.ClassStringParser;
import com.martiansoftware.jsap.stringparsers.LongSizeStringParser;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.mg4j.document.DocumentFactory;
import it.unimi.dsi.mg4j.document.DocumentSequence;
import it.unimi.dsi.mg4j.document.InputStreamDocumentSequence;
import it.unimi.dsi.mg4j.document.PropertyBasedDocumentFactory;
import it.unimi.dsi.mg4j.index.DowncaseTermProcessor;
import it.unimi.dsi.mg4j.index.TermProcessor;
import it.unimi.dsi.mg4j.io.FileLinesCollection;
import it.unimi.dsi.mg4j.util.Fast;
import it.unimi.dsi.mg4j.util.ImmutableExternalTriePrefixDictionary;
import it.unimi.dsi.mg4j.util.MutableString;
import java.io.IOException;
import java.lang.reflect.Field;
import java.lang.reflect.InvocationTargetException;
import java.util.Arrays;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.log4j.Logger;
import org.archive.crawler.writer.TrecWebWriterProcessor;

/* loaded from: input_file:site-search/heritrix/lib/mg4j-1.0.1.jar:it/unimi/dsi/mg4j/tool/Index.class */
public class Index {
    private static final Logger LOGGER;
    public static final String DEFAULT_BUFFER_SIZE = "1Mi";
    public static final String DEFAULT_BATCH_SIZE = "2Mi";
    public static final int DEFAULT_DELIMITER = 10;
    static Class class$it$unimi$dsi$mg4j$tool$Index;
    static Class class$it$unimi$dsi$mg4j$index$IndexWriter;
    static Class class$it$unimi$dsi$mg4j$document$IdentityDocumentFactory;
    static Class class$it$unimi$dsi$mg4j$index$NullTermProcessor;

    public static DocumentSequence getSequence(String str, Class cls, String[] strArr, int i) throws IllegalArgumentException, SecurityException, IllegalAccessException, InvocationTargetException, NoSuchMethodException, IOException, ClassNotFoundException, InstantiationException {
        if (str != null) {
            return (DocumentSequence) BinIO.loadObject(str);
        }
        LOGGER.debug(new StringBuffer("Documents will be separated by the Unicode character ").append(i).toString());
        return new InputStreamDocumentSequence(System.in, i, PropertyBasedDocumentFactory.getInstance(cls, strArr));
    }

    public static int[] parseFieldNames(String[] strArr, DocumentFactory documentFactory) {
        IntArrayList intArrayList = new IntArrayList();
        if (strArr.length == 0) {
            for (int i = 0; i < documentFactory.numberOfFields(); i++) {
                if (documentFactory.fieldType(i) == 0) {
                    intArrayList.add(i);
                }
            }
        } else {
            for (int i2 = 0; i2 < strArr.length; i2++) {
                int fieldIndex = documentFactory.fieldIndex(strArr[i2]);
                if (fieldIndex < 0) {
                    throw new IllegalArgumentException(new StringBuffer("Field ").append(strArr[i2]).append(" is not part of factory ").append(documentFactory.getClass().getName()).toString());
                }
                if (documentFactory.fieldType(fieldIndex) != 0) {
                    throw new IllegalArgumentException(new StringBuffer("You cannot index field ").append(strArr[i2]).append(" as it is not a text field").toString());
                }
                intArrayList.add(fieldIndex);
            }
        }
        int[] intArray = intArrayList.toIntArray();
        Arrays.sort(intArray);
        return intArray;
    }

    public static int[] parseQualifiedSizes(String[] strArr, String str, int[] iArr, DocumentFactory documentFactory) throws ParseException {
        int[] iArr2 = new int[iArr.length];
        String str2 = str;
        IntArrayList wrap = IntArrayList.wrap(iArr);
        for (int i = 0; i < strArr.length; i++) {
            if (strArr[i].indexOf(58) == -1) {
                str2 = strArr[i];
            }
        }
        for (int i2 = 0; i2 < iArr2.length; i2++) {
            iArr2[i2] = (int) LongSizeStringParser.parseSize(str2);
        }
        for (int i3 = 0; i3 < strArr.length; i3++) {
            int indexOf = strArr[i3].indexOf(58);
            if (indexOf >= 0) {
                String substring = strArr[i3].substring(0, indexOf);
                int fieldIndex = documentFactory.fieldIndex(substring);
                if (fieldIndex < 0) {
                    throw new IllegalArgumentException(new StringBuffer("Field ").append(substring).append(" is not part of factory ").append(documentFactory.getClass().getName()).toString());
                }
                if (!wrap.contains(fieldIndex)) {
                    throw new IllegalArgumentException(new StringBuffer("Field ").append(documentFactory.fieldName(fieldIndex)).append(" is not being indexed").toString());
                }
                iArr2[wrap.indexOf(fieldIndex)] = (int) LongSizeStringParser.parseSize(strArr[i3].substring(indexOf + 1));
            }
        }
        return iArr2;
    }

    public static long parseCompressionFlags(String[] strArr, MutableString mutableString) {
        long j = 0;
        mutableString.length(0);
        for (int i = 0; i < strArr.length; i++) {
            try {
                long j2 = j;
                Class cls = class$it$unimi$dsi$mg4j$index$IndexWriter;
                if (cls == null) {
                    cls = m453class("[Lit.unimi.dsi.mg4j.index.IndexWriter;", false);
                    class$it$unimi$dsi$mg4j$index$IndexWriter = cls;
                }
                Field field = cls.getField(strArr[i]);
                Class cls2 = class$it$unimi$dsi$mg4j$index$IndexWriter;
                if (cls2 == null) {
                    cls2 = m453class("[Lit.unimi.dsi.mg4j.index.IndexWriter;", false);
                    class$it$unimi$dsi$mg4j$index$IndexWriter = cls2;
                }
                j = j2 | field.getLong(cls2);
                if (mutableString.length() != 0) {
                    mutableString.append(" | ");
                }
                mutableString.append(strArr[i]);
            } catch (Exception e) {
                throw new IllegalArgumentException(new StringBuffer("Compression method ").append(strArr[i]).append(" unknown.").toString());
            }
        }
        return j;
    }

    public static void run(String str, DocumentSequence documentSequence, TermProcessor termProcessor, String str2, int i, boolean z, int[] iArr, int[] iArr2, String str3, boolean z2, long j, CharSequence charSequence, int i2, int i3, long j2, String str4) throws ConfigurationException, IOException {
        DocumentFactory factory = documentSequence.factory();
        String[] strArr = new String[iArr.length];
        for (int i4 = 0; i4 < iArr.length; i4++) {
            strArr[i4] = new StringBuffer().append(str).append('-').append(factory.fieldName(iArr[i4])).toString();
        }
        LOGGER.info(new StringBuffer("Creating indices ").append(Arrays.asList(strArr)).append("...").toString());
        Scan.run(str, documentSequence, termProcessor, str2, i, iArr, iArr2, str3, z, j2, str4);
        for (int i5 = 0; i5 < iArr.length; i5++) {
            if (str3 != null) {
                new Merge(strArr[i5], Combine.batches(strArr[i5]), i, j, charSequence, z2, i2, i3, j2).run();
            } else {
                new Concatenate(strArr[i5], Combine.batches(strArr[i5]), i, j, charSequence, z2, i2, i3, j2).run();
            }
        }
        LOGGER.info("Creating prefix maps...");
        for (int i6 = 0; i6 < iArr.length; i6++) {
            BinIO.storeObject(new ImmutableExternalTriePrefixDictionary(new FileLinesCollection(new StringBuffer().append(strArr[i6]).append(".terms").toString(), "UTF-8")), new StringBuffer().append(strArr[i6]).append(".termmap").toString());
        }
        LOGGER.info("Indexing completed.");
    }

    public static void main(String[] strArr) throws JSAPException, InvocationTargetException, NoSuchMethodException, IllegalArgumentException, SecurityException, IllegalAccessException, ConfigurationException, ClassNotFoundException, IOException, InstantiationException {
        Class cls = class$it$unimi$dsi$mg4j$tool$Index;
        if (cls == null) {
            cls = m453class("[Lit.unimi.dsi.mg4j.tool.Index;", false);
            class$it$unimi$dsi$mg4j$tool$Index = cls;
        }
        String name = cls.getName();
        Parameter[] parameterArr = new Parameter[19];
        parameterArr[0] = new FlaggedOption("sequence", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'S', "sequence", "A serialised document sequence that will be used instead of stdin.");
        ClassStringParser classStringParser = JSAP.CLASS_PARSER;
        Class cls2 = class$it$unimi$dsi$mg4j$document$IdentityDocumentFactory;
        if (cls2 == null) {
            cls2 = m453class("[Lit.unimi.dsi.mg4j.document.IdentityDocumentFactory;", false);
            class$it$unimi$dsi$mg4j$document$IdentityDocumentFactory = cls2;
        }
        parameterArr[1] = new FlaggedOption("factory", classStringParser, cls2.getName(), false, 'f', "factory", "A document factory with a standard constructor.");
        parameterArr[2] = new FlaggedOption("property", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'p', "property", "A 'key=value' specification, or the name of a property file").setAllowMultipleDeclarations(true);
        ClassStringParser classStringParser2 = JSAP.CLASS_PARSER;
        Class cls3 = class$it$unimi$dsi$mg4j$index$NullTermProcessor;
        if (cls3 == null) {
            cls3 = m453class("[Lit.unimi.dsi.mg4j.index.NullTermProcessor;", false);
            class$it$unimi$dsi$mg4j$index$NullTermProcessor = cls3;
        }
        parameterArr[3] = new FlaggedOption("termProcessor", classStringParser2, cls3.getName(), false, 't', "term-processor", "Sets the term processor to the given class.");
        parameterArr[4] = new Switch("downcase", (char) 0, "downcase", "A shortcut for setting the term processor to the downcasing processor.");
        parameterArr[5] = new FlaggedOption("indexedField", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'I', "indexed-field", "The field(s) of the document factory that will be indexed. (default: all fields)").setAllowMultipleDeclarations(true);
        parameterArr[6] = new FlaggedOption("batchSize", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 's', "batch-size", "The size of a batch. It can be specified several times in the form [<field>:]<size>. If the field is omitted, it sets the batch size for all fields. (default: 2Mi)").setAllowMultipleDeclarations(true);
        parameterArr[7] = new FlaggedOption("bufferSize", JSAP.INTSIZE_PARSER, DEFAULT_BUFFER_SIZE, false, 'b', "buffer-size", "The size of an I/O buffer.");
        parameterArr[8] = new FlaggedOption("delimiter", JSAP.INTEGER_PARSER, Integer.toString(10), false, 'd', "delimiter", "The document delimiter.");
        parameterArr[9] = new FlaggedOption("renumber", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'r', "renumber", "The filename of a document renumbering.");
        parameterArr[10] = new FlaggedOption("zipCollection", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'z', "zip", "Creates a support ZipDocumentCollection with given basename.");
        parameterArr[11] = new FlaggedOption("comp", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'c', "comp", "A compression flag for the index (may be specified several times).").setAllowMultipleDeclarations(true);
        parameterArr[12] = new Switch("skips", (char) 0, "skips", "Whether the resulting index should contain skips.");
        parameterArr[13] = new FlaggedOption("quantum", JSAP.INTSIZE_PARSER, "64", false, 'Q', "quantum", "Enable skips with given quantum.");
        parameterArr[14] = new FlaggedOption("height", JSAP.INTSIZE_PARSER, "8", false, 'H', "height", "Enable skips with given height.");
        parameterArr[15] = new Switch("keepUnsorted", 'u', "keep-unsorted", "Keep the unsorted term file.");
        parameterArr[16] = new FlaggedOption("logInterval", JSAP.LONG_PARSER, Long.toString(10000L), false, 'l', "log-interval", "The minimum time interval between activity logs in milliseconds.");
        parameterArr[17] = new FlaggedOption("tempDir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'T', "temp-dir", "A directory for all temporary files (e.g., batches).");
        parameterArr[18] = new UnflaggedOption(TrecWebWriterProcessor.ATTR_BASENAME, JSAP.STRING_PARSER, true, "The basename of the resulting index.");
        SimpleJSAP simpleJSAP = new SimpleJSAP(name, "Builds a set of batches from a sequence of documents.", parameterArr);
        JSAPResult parse = simpleJSAP.parse(strArr);
        if (simpleJSAP.messagePrinted()) {
            return;
        }
        DocumentSequence sequence = getSequence(parse.getString("sequence"), parse.getClass("factory"), parse.getStringArray("property"), parse.getInt("delimiter"));
        DocumentFactory factory = sequence.factory();
        Class cls4 = parse.getClass("termProcessor");
        int[] parseFieldNames = parseFieldNames(parse.getStringArray("indexedField"), factory);
        int[] parseQualifiedSizes = parseQualifiedSizes(parse.getStringArray("batchSize"), DEFAULT_BATCH_SIZE, parseFieldNames, factory);
        TermProcessor downcaseTermProcessor = parse.getBoolean("downcase") ? DowncaseTermProcessor.getInstance() : (TermProcessor) cls4.getMethod("getInstance", null).invoke(cls4, null);
        String string = parse.getString(TrecWebWriterProcessor.ATTR_BASENAME);
        int i = parse.getInt("bufferSize");
        MutableString mutableString = new MutableString();
        long parseCompressionFlags = parseCompressionFlags(parse.getStringArray("comp"), mutableString);
        boolean z = parse.getBoolean("skips");
        if (z || !(parse.userSpecified("quantum") || parse.userSpecified("height"))) {
            run(string, sequence, downcaseTermProcessor, parse.getString("zipCollection"), i, parse.getBoolean("keepUnsorted"), parseFieldNames, parseQualifiedSizes, parse.getString("renumber"), z, parseCompressionFlags, mutableString, parse.getInt("quantum"), parse.getInt("height"), parse.getLong("logInterval"), parse.getString("tempDir"));
        } else {
            System.err.println("You specified quantum or height, but did not turn on skips.");
        }
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v5, types: [java.lang.Throwable, java.lang.Class] */
    /* renamed from: class, reason: not valid java name */
    static Class m453class(String str, boolean z) {
        ?? componentType;
        try {
            Class<?> cls = Class.forName(str);
            if (z) {
                return cls;
            }
            componentType = cls.getComponentType();
            return componentType;
        } catch (ClassNotFoundException unused) {
            throw new NoClassDefFoundError().initCause(componentType);
        }
    }

    static {
        Class cls = class$it$unimi$dsi$mg4j$tool$Index;
        if (cls == null) {
            cls = m453class("[Lit.unimi.dsi.mg4j.tool.Index;", false);
            class$it$unimi$dsi$mg4j$tool$Index = cls;
        }
        LOGGER = Fast.getLogger(cls);
    }
}
