package edu.umd.cloud9.collection.aquaint2;

import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import edu.umd.cloud9.collection.DocnoMapping;
import edu.umd.cloud9.webgraph.data.AnchorTextConstants;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.LineReader;
import org.apache.log4j.Logger;

/* loaded from: input_file:edu/umd/cloud9/collection/aquaint2/Aquaint2DocnoMapping.class */
public class Aquaint2DocnoMapping implements DocnoMapping {
    private static final Logger LOG = Logger.getLogger(Aquaint2DocnoMapping.class);
    private String[] docids;

    @Override // edu.umd.cloud9.collection.DocnoMapping
    public int getDocno(String str) {
        Preconditions.checkNotNull(str);
        return Arrays.binarySearch(this.docids, str);
    }

    @Override // edu.umd.cloud9.collection.DocnoMapping
    public String getDocid(int i) {
        Preconditions.checkArgument(i > 0);
        return this.docids[i];
    }

    @Override // edu.umd.cloud9.collection.DocnoMapping
    public void loadMapping(Path path, FileSystem fileSystem) throws IOException {
        this.docids = readDocnoData(path, fileSystem);
    }

    public static void writeDocnoData(Path path, Path path2, FileSystem fileSystem) throws IOException {
        LOG.info("Writing docno data to " + path2);
        LineReader lineReader = new LineReader(fileSystem.open(path));
        ArrayList newArrayList = Lists.newArrayList();
        LOG.info("Reading " + path);
        int i = 0;
        Text text = new Text();
        while (lineReader.readLine(text) > 0) {
            newArrayList.add(text.toString().split("\\t")[0]);
            i++;
            if (i % 100000 == 0) {
                LOG.info(String.valueOf(i) + " docs");
            }
        }
        lineReader.close();
        LOG.info(String.valueOf(i) + " docs total. Done!");
        int i2 = 0;
        LOG.info("Writing " + path2);
        FSDataOutputStream create = fileSystem.create(path2, true);
        create.writeInt(newArrayList.size());
        for (int i3 = 0; i3 < newArrayList.size(); i3++) {
            create.writeUTF((String) newArrayList.get(i3));
            i2++;
            if (i2 % 100000 == 0) {
                LOG.info(String.valueOf(i2) + " docs");
            }
        }
        create.close();
        LOG.info(String.valueOf(i2) + " docs total. Done!");
    }

    public static String[] readDocnoData(Path path, FileSystem fileSystem) throws IOException {
        LOG.warn("p: " + path);
        FSDataInputStream open = fileSystem.open(path);
        int readInt = open.readInt() + 1;
        LOG.warn("creating array of length: " + readInt);
        String[] strArr = new String[readInt];
        for (int i = 1; i < readInt; i++) {
            strArr[i] = open.readUTF();
        }
        open.close();
        strArr[0] = AnchorTextConstants.EMPTY_STRING;
        return strArr;
    }

    public static void main(String[] strArr) throws IOException {
        if (strArr.length < 2) {
            System.out.println("usage: (list|getDocno|getDocid) [mapping-file] [docid/docno]");
            System.exit(-1);
        }
        FileSystem fileSystem = FileSystem.get(new Configuration());
        System.out.println("loading mapping file " + strArr[1]);
        Aquaint2DocnoMapping aquaint2DocnoMapping = new Aquaint2DocnoMapping();
        aquaint2DocnoMapping.loadMapping(new Path(strArr[1]), fileSystem);
        if (strArr[0].equals("list")) {
            for (int i = 1; i < aquaint2DocnoMapping.docids.length; i++) {
                System.out.println(String.valueOf(i) + "\t" + aquaint2DocnoMapping.docids[i]);
            }
            return;
        }
        if (strArr[0].equals("getDocno")) {
            System.out.println("looking up docno for \"" + strArr[2] + "\"");
            if (aquaint2DocnoMapping.getDocno(strArr[2]) > 0) {
                System.out.println(aquaint2DocnoMapping.getDocno(strArr[2]));
                return;
            } else {
                System.err.print("Invalid docid!");
                return;
            }
        }
        if (!strArr[0].equals("getDocid")) {
            System.out.println("Invalid command!");
            System.out.println("usage: (list|getDocno|getDocid) [mapping-file] [docid/docno]");
        } else {
            try {
                System.out.println("looking up docid for " + strArr[2]);
                System.out.println(aquaint2DocnoMapping.getDocid(Integer.parseInt(strArr[2])));
            } catch (Exception e) {
                System.err.print("Invalid docno!");
            }
        }
    }
}
