package eu.monnetproject.bliss.experiments;

import eu.monnetproject.bliss.CLIOpts;
import it.unimi.dsi.fastutil.ints.Int2IntMap;
import it.unimi.dsi.fastutil.ints.Int2IntRBTreeMap;
import it.unimi.dsi.fastutil.ints.IntBidirectionalIterator;
import it.unimi.dsi.fastutil.ints.IntRBTreeSet;
import it.unimi.dsi.fastutil.ints.IntSortedSet;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.EOFException;
import java.io.File;
import java.io.IOException;

/* loaded from: input_file:eu/monnetproject/bliss/experiments/ResampleInts.class */
public class ResampleInts {
    public static void main(String[] strArr) throws Exception {
        CLIOpts cLIOpts = new CLIOpts(strArr);
        File roFile = cLIOpts.roFile("corpus1[.gz|.bz2]", "The (train) corpus");
        File roFile2 = cLIOpts.roFile("corpus2[.gz|.bz2]", "The (test) corpus or train corpus repeated to ignore");
        File roFile3 = cLIOpts.roFile("wordMap", "The previous word map");
        File woFile = cLIOpts.woFile("newWordMap", "The new word map");
        File woFile2 = cLIOpts.woFile("corpus1out[.gz|.bz2]", "The (train) corpus out file");
        File woFile3 = cLIOpts.woFile("corpus1out[.gz|.bz2]", "The (test) corpus out file");
        if (cLIOpts.verify(ResampleInts.class)) {
            IntRBTreeSet intRBTreeSet = new IntRBTreeSet();
            System.err.println("Reading train corpus");
            readCorpus(roFile, intRBTreeSet);
            if (!roFile.equals(roFile2)) {
                System.err.println("Reading test corpus");
                readCorpus(roFile2, intRBTreeSet);
            }
            System.err.println("Building downsampling index");
            Int2IntRBTreeMap buildIndex = buildIndex(intRBTreeSet);
            System.err.println("Writing new word map");
            rewriteWordMap(roFile3, woFile, intRBTreeSet, buildIndex);
            System.err.println("Writing new train corpus");
            rewriteCorpus(roFile, woFile2, buildIndex);
            if (!roFile.equals(roFile2)) {
                System.err.println("Writing new test corpus");
                rewriteCorpus(roFile2, woFile3, buildIndex);
            }
            System.err.println("W=" + intRBTreeSet.size());
        }
    }

    private static void readCorpus(File file, IntSortedSet intSortedSet) throws IOException {
        DataInputStream dataInputStream = new DataInputStream(CLIOpts.openInputAsMaybeZipped(file));
        while (dataInputStream.available() > 0) {
            try {
                intSortedSet.add(dataInputStream.readInt());
            } catch (EOFException e) {
            }
        }
        dataInputStream.close();
    }

    private static void rewriteCorpus(File file, File file2, Int2IntMap int2IntMap) throws IOException {
        DataInputStream dataInputStream = new DataInputStream(CLIOpts.openInputAsMaybeZipped(file));
        DataOutputStream dataOutputStream = new DataOutputStream(CLIOpts.openOutputAsMaybeZipped(file2));
        while (dataInputStream.available() > 0) {
            try {
                dataOutputStream.writeInt(int2IntMap.get(dataInputStream.readInt()));
            } catch (EOFException e) {
            }
        }
        dataInputStream.close();
        dataOutputStream.flush();
        dataOutputStream.close();
    }

    private static void rewriteWordMap(File file, File file2, IntRBTreeSet intRBTreeSet, Int2IntRBTreeMap int2IntRBTreeMap) throws IOException {
        IntRBTreeSet intRBTreeSet2 = new IntRBTreeSet(intRBTreeSet);
        DataInputStream dataInputStream = new DataInputStream(CLIOpts.openInputAsMaybeZipped(file));
        DataOutputStream dataOutputStream = new DataOutputStream(CLIOpts.openOutputAsMaybeZipped(file2));
        while (dataInputStream.available() > 0) {
            try {
                String readUTF = dataInputStream.readUTF();
                int readInt = dataInputStream.readInt();
                if (intRBTreeSet2.contains(readInt)) {
                    dataOutputStream.writeUTF(readUTF);
                    dataOutputStream.writeInt(int2IntRBTreeMap.get(readInt));
                    intRBTreeSet2.remove(readInt);
                }
            } catch (EOFException e) {
            }
        }
        IntBidirectionalIterator it = intRBTreeSet2.iterator();
        while (it.hasNext()) {
            int intValue = ((Integer) it.next()).intValue();
            if (intValue != 0) {
                System.err.println(int2IntRBTreeMap.get(intValue) + " as <UNK>");
                dataOutputStream.writeUTF("<UNK>");
                dataOutputStream.writeInt(int2IntRBTreeMap.get(intValue));
            }
        }
        dataOutputStream.flush();
        dataOutputStream.close();
        dataInputStream.close();
    }

    private static Int2IntRBTreeMap buildIndex(IntRBTreeSet intRBTreeSet) {
        Int2IntRBTreeMap int2IntRBTreeMap = new Int2IntRBTreeMap();
        int i = 0;
        IntBidirectionalIterator it = intRBTreeSet.iterator();
        while (it.hasNext()) {
            int i2 = i;
            i++;
            int2IntRBTreeMap.put(it.nextInt(), i2);
        }
        return int2IntRBTreeMap;
    }
}
