/*
 * Decompiled with CFR 0.152.
 */
package org.clulab.embeddings.word2vec;

import java.io.File;
import java.io.InputStream;
import java.io.Serializable;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.clulab.embeddings.word2vec.Word2Vec;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Function0;
import scala.Function1;
import scala.MatchError;
import scala.None$;
import scala.Option;
import scala.Predef$;
import scala.StringContext;
import scala.Tuple2;
import scala.collection.Iterable;
import scala.collection.Iterator;
import scala.collection.Seq;
import scala.collection.SetLike;
import scala.collection.immutable.List;
import scala.collection.immutable.List$;
import scala.collection.immutable.Map;
import scala.collection.immutable.Set;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayBuffer;
import scala.collection.mutable.HashMap;
import scala.collection.mutable.StringBuilder;
import scala.io.BufferedSource;
import scala.io.Source;
import scala.io.Source$;
import scala.math.package$;
import scala.reflect.ClassTag$;
import scala.runtime.BooleanRef;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.IntRef;

public final class Word2Vec$ {
    public static Word2Vec$ MODULE$;
    private final Logger logger;

    static {
        new Word2Vec$();
    }

    public Option<Set<String>> $lessinit$greater$default$2() {
        return None$.MODULE$;
    }

    public Logger logger() {
        return this.logger;
    }

    public String sanitizeWord(String uw, boolean keepNumbers) {
        String w;
        block8: {
            block7: {
                String string = w = uw.toLowerCase();
                String string2 = "-lrb-";
                if (!(string == null ? string2 != null : !string.equals(string2))) break block7;
                String string3 = w;
                String string4 = "-rrb-";
                if (!(string3 == null ? string4 != null : !string3.equals(string4))) break block7;
                String string5 = w;
                String string6 = "-lsb-";
                if (!(string5 == null ? string6 != null : !string5.equals(string6))) break block7;
                String string7 = w;
                String string8 = "-rsb-";
                if (string7 != null ? !string7.equals(string8) : string8 != null) break block8;
            }
            return "";
        }
        if (w.startsWith("http") || w.contains(".com") || w.contains(".org")) {
            return "";
        }
        if (this.isNumber(w)) {
            if (keepNumbers) {
                return "xnumx";
            }
            return "";
        }
        StringBuilder os = new StringBuilder();
        for (int i = 0; i < w.length(); ++i) {
            char c = w.charAt(i);
            Object object = Character.isLetter(c) || c == '_' ? os.$plus$eq(c) : BoxedUnit.UNIT;
        }
        return os.toString();
    }

    public boolean sanitizeWord$default$2() {
        return true;
    }

    /*
     * WARNING - void declaration
     */
    public boolean isNumber(String w) {
        void var3_3;
        boolean foundDigit = false;
        for (int i = 0; i < w.length(); ++i) {
            char c = w.charAt(i);
            if (!Character.isDigit(c) && c != '-' && c != '+' && c != ',' && c != '.' && c != '/' && c != '\\') {
                return false;
            }
            if (!Character.isDigit(c)) continue;
            foundDigit = true;
        }
        return (boolean)var3_3;
    }

    public void norm(double[] weights) {
        int i;
        double len = 0.0;
        for (i = 0; i < weights.length; ++i) {
            len += weights[i] * weights[i];
        }
        len = package$.MODULE$.sqrt(len);
        i = 0;
        if (len != 0.0) {
            while (i < weights.length) {
                int n = i++;
                weights[n] = weights[n] / len;
            }
        }
    }

    /*
     * WARNING - void declaration
     */
    public double dotProduct(double[] v1, double[] v2) {
        void var3_3;
        Predef$.MODULE$.assert(v1.length == v2.length);
        double sum = 0.0;
        for (int i = 0; i < v1.length; ++i) {
            sum += v1[i] * v2[i];
        }
        return (double)var3_3;
    }

    public Tuple2<Map<String, double[]>, Object> org$clulab$embeddings$word2vec$Word2Vec$$loadMatrix(String mf, Option<Set<String>> wordsToUse) {
        this.logger().debug("Started to load word2vec matrix from file " + mf + "...");
        BufferedSource src = Source$.MODULE$.fromFile(mf, "iso-8859-1");
        Iterator lines = src.getLines();
        Tuple2<Map<String, double[]>, Object> matrix = this.buildMatrix((Iterator<String>)lines, wordsToUse);
        src.close();
        this.logger().debug("Completed matrix loading.");
        return matrix;
    }

    public Tuple2<Map<String, double[]>, Object> org$clulab$embeddings$word2vec$Word2Vec$$loadMatrixFromStream(InputStream is, Option<Set<String>> wordsToUse) {
        this.logger().debug("Started to load word2vec matrix from stream ...");
        BufferedSource src = Source$.MODULE$.fromInputStream(is, "iso-8859-1");
        Iterator lines = src.getLines();
        Tuple2<Map<String, double[]>, Object> matrix = this.buildMatrix((Iterator<String>)lines, wordsToUse);
        src.close();
        this.logger().debug("Completed matrix loading.");
        return matrix;
    }

    public Tuple2<Map<String, double[]>, Object> org$clulab$embeddings$word2vec$Word2Vec$$loadMatrixFromSource(Source src, Option<Set<String>> wordsToUse) {
        this.logger().debug("Started to load word2vec matrix from source ...");
        Iterator lines = src.getLines();
        Tuple2<Map<String, double[]>, Object> matrix = this.buildMatrix((Iterator<String>)lines, wordsToUse);
        this.logger().debug("Completed matrix loading.");
        return matrix;
    }

    private Tuple2<Map<String, double[]>, Object> buildMatrix(Iterator<String> lines, Option<Set<String>> wordsToUse) {
        HashMap m = new HashMap();
        BooleanRef first = BooleanRef.create((boolean)true);
        IntRef dims = IntRef.create((int)0);
        lines.zipWithIndex().withFilter((Function1 & Serializable & scala.Serializable)check$ifrefutable$3 -> BoxesRunTime.boxToBoolean((boolean)Word2Vec$.$anonfun$buildMatrix$1(check$ifrefutable$3))).foreach((Function1 & Serializable & scala.Serializable)x$22 -> {
            BoxedUnit boxedUnit;
            Tuple2 tuple2 = x$22;
            if (tuple2 != null) {
                String line = (String)tuple2._1();
                int index = tuple2._2$mcI$sp();
                String[] bits = line.split("\\s+");
                if (first$1.elem) {
                    dims$1.elem = new StringOps(Predef$.MODULE$.augmentString(bits[1])).toInt();
                    first$1.elem = false;
                    boxedUnit = BoxedUnit.UNIT;
                } else {
                    if (bits.length != dims$1.elem + 1) {
                        Predef$.MODULE$.println((Object)new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"", " != ", " found on line ", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{BoxesRunTime.boxToInteger((int)bits.length), BoxesRunTime.boxToInteger((int)(dims$1.elem + 1)), BoxesRunTime.boxToInteger((int)(index + 1))})));
                    }
                    Predef$.MODULE$.assert(bits.length == dims$1.elem + 1);
                    String w = bits[0];
                    if (wordsToUse.isEmpty() || ((SetLike)wordsToUse.get()).contains((Object)w)) {
                        double[] weights = new double[dims$1.elem];
                        for (int i = 0; i < dims$1.elem; ++i) {
                            weights[i] = new StringOps(Predef$.MODULE$.augmentString(bits[i + 1])).toDouble();
                        }
                        MODULE$.norm(weights);
                        boxedUnit = m.put((Object)w, (Object)weights);
                    } else {
                        boxedUnit = BoxedUnit.UNIT;
                    }
                }
            } else {
                throw new MatchError((Object)tuple2);
            }
            BoxedUnit boxedUnit2 = boxedUnit;
            return boxedUnit2;
        });
        this.logger().debug("Completed matrix loading.");
        return new Tuple2((Object)m.toMap(Predef$.MODULE$.$conforms()), (Object)BoxesRunTime.boxToInteger((int)dims.elem));
    }

    public Word2Vec fromBinary(String filename) {
        return this.fromBinary(new File(filename));
    }

    public Word2Vec fromBinary(File file) {
        return new Word2Vec((Function0<Map<String, double[]>>)(Function0 & Serializable & scala.Serializable)() -> MODULE$.readBinaryMatrix(FileUtils.readFileToByteArray((File)file)));
    }

    public Word2Vec fromBinary(InputStream inputStream) {
        return new Word2Vec((Function0<Map<String, double[]>>)(Function0 & Serializable & scala.Serializable)() -> MODULE$.readBinaryMatrix(IOUtils.toByteArray((InputStream)inputStream)));
    }

    public Word2Vec fromBinary(byte[] bytes) {
        return new Word2Vec((Function0<Map<String, double[]>>)(Function0 & Serializable & scala.Serializable)() -> MODULE$.readBinaryMatrix(bytes));
    }

    private String readNonSpace(ByteBuffer bb) {
        ArrayBuffer buffer = new ArrayBuffer();
        byte by = bb.get();
        while (by != (byte)32 && by != (byte)10) {
            buffer.$plus$eq((Object)BoxesRunTime.boxToByte((byte)by));
            by = bb.get();
        }
        return new String((byte[])buffer.toArray(ClassTag$.MODULE$.Byte()));
    }

    private Map<String, double[]> readBinaryMatrix(byte[] bytes) {
        HashMap m = new HashMap();
        ByteBuffer bb = ByteBuffer.wrap(bytes);
        bb.order(ByteOrder.nativeOrder());
        long words = new StringOps(Predef$.MODULE$.augmentString(this.readNonSpace(bb))).toLong();
        long size = new StringOps(Predef$.MODULE$.augmentString(this.readNonSpace(bb))).toLong();
        byte by = bb.get();
        while (by == (byte)32 || by == (byte)10) {
            by = bb.get();
        }
        bb.position(bb.position() - 1);
        long w = 0L;
        while (w < words) {
            Object object;
            ++w;
            String word = this.readNonSpace(bb);
            double[] embedding = new double[(int)size];
            int s = 0;
            while ((long)s < size) {
                embedding[s] = bb.getFloat();
                ++s;
            }
            this.norm(embedding);
            m.put((Object)word, (Object)embedding);
            if (bb.hasRemaining()) {
                by = bb.get();
                while (by == (byte)32 || by == (byte)10) {
                    by = bb.get();
                }
                object = bb.position(bb.position() - 1);
                continue;
            }
            object = BoxedUnit.UNIT;
        }
        return m.toMap(Predef$.MODULE$.$conforms());
    }

    public void main(String[] args) {
        Word2Vec w2v = new Word2Vec(args[0], (Option<Set<String>>)None$.MODULE$);
        Predef$.MODULE$.println((Object)"Words most similar to \"house\":");
        w2v.mostSimilarWords((Set<String>)((Set)Predef$.MODULE$.Set().apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"house"}))), 40).foreach((Function1 & Serializable & scala.Serializable)t -> {
            Predef$.MODULE$.println((Object)((String)t._1() + " " + t._2$mcD$sp()));
            return BoxedUnit.UNIT;
        });
        List t1 = List$.MODULE$.apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"a", "delicious", "apple"}));
        List t2 = List$.MODULE$.apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"the", "tasty", "pear"}));
        List t3 = List$.MODULE$.apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"computer", "oxygen"}));
        Predef$.MODULE$.println((Object)("Text similarity: " + w2v.sanitizedTextSimilarity((Iterable<String>)t1, (Iterable<String>)t2)));
        Predef$.MODULE$.println((Object)("Text similarity: " + w2v.sanitizedTextSimilarity((Iterable<String>)t1, (Iterable<String>)t3)));
        Predef$.MODULE$.println((Object)("Max similarity: " + w2v.sanitizedMaxSimilarity((Iterable<String>)t1, (Iterable<String>)t2)));
        Predef$.MODULE$.println((Object)("Avg similarity: " + w2v.sanitizedAvgSimilarity((Iterable<String>)t1, (Iterable<String>)t2)));
    }

    public static final /* synthetic */ boolean $anonfun$buildMatrix$1(Tuple2 check$ifrefutable$3) {
        Tuple2 tuple2 = check$ifrefutable$3;
        boolean bl = tuple2 != null;
        return bl;
    }

    private Word2Vec$() {
        MODULE$ = this;
        this.logger = LoggerFactory.getLogger(Word2Vec.class);
    }
}

