package org.clulab.embeddings;

import java.io.File;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.$less$colon$less$;
import scala.MatchError;
import scala.None$;
import scala.Option;
import scala.Predef$;
import scala.Tuple2;
import scala.collection.Iterator;
import scala.collection.SetOps;
import scala.collection.StringOps$;
import scala.collection.immutable.$colon;
import scala.collection.immutable.Map;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.Set;
import scala.collection.mutable.ArrayBuffer;
import scala.collection.mutable.HashMap;
import scala.io.BufferedSource;
import scala.io.Source;
import scala.io.Source$;
import scala.math.package$;
import scala.reflect.ClassTag$;
import scala.runtime.BooleanRef;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.IntRef;
import scala.runtime.ScalaRunTime$;

/* compiled from: SanitizedWordEmbeddingMap.scala */
/* loaded from: input_file:org/clulab/embeddings/SanitizedWordEmbeddingMap$.class */
public final class SanitizedWordEmbeddingMap$ {
    public static final SanitizedWordEmbeddingMap$ MODULE$ = new SanitizedWordEmbeddingMap$();
    private static final Logger logger = LoggerFactory.getLogger(SanitizedWordEmbeddingMap.class);
    private static final String UNK = "*UNK*";

    public Option<Set<String>> $lessinit$greater$default$2() {
        return None$.MODULE$;
    }

    public boolean $lessinit$greater$default$3() {
        return false;
    }

    public Logger logger() {
        return logger;
    }

    public String UNK() {
        return UNK;
    }

    public String sanitizeWord(String str, boolean z) {
        return EmbeddingUtils$.MODULE$.sanitizeWord(str, z);
    }

    public boolean sanitizeWord$default$2() {
        return true;
    }

    public boolean isNumber(String str) {
        return EmbeddingUtils$.MODULE$.isNumber(str);
    }

    public void norm(double[] dArr) {
        double d = 0.0d;
        for (int i = 0; i < dArr.length; i++) {
            d += dArr[i] * dArr[i];
        }
        double sqrt = package$.MODULE$.sqrt(d);
        if (sqrt != 0) {
            for (int i2 = 0; i2 < dArr.length; i2++) {
                int i3 = i2;
                dArr[i3] = dArr[i3] / sqrt;
            }
        }
    }

    public double dotProduct(double[] dArr, double[] dArr2) {
        Predef$.MODULE$.assert(dArr.length == dArr2.length);
        double d = 0.0d;
        for (int i = 0; i < dArr.length; i++) {
            d += dArr[i] * dArr2[i];
        }
        return d;
    }

    public Tuple2<Map<String, double[]>, Object> org$clulab$embeddings$SanitizedWordEmbeddingMap$$loadMatrix(String str, Option<Set<String>> option, boolean z) {
        logger().debug(new StringBuilder(46).append("Started to load embedding matrix from file ").append(str).append("...").toString());
        BufferedSource fromFile = Source$.MODULE$.fromFile(str, "iso-8859-1");
        Tuple2<Map<String, double[]>, Object> buildMatrix = buildMatrix(fromFile.getLines(), option, z);
        fromFile.close();
        logger().debug("Completed matrix loading.");
        return buildMatrix;
    }

    public Tuple2<Map<String, double[]>, Object> org$clulab$embeddings$SanitizedWordEmbeddingMap$$loadMatrixFromStream(InputStream inputStream, Option<Set<String>> option, boolean z) {
        logger().debug("Started to load embedding matrix from stream ...");
        BufferedSource fromInputStream = Source$.MODULE$.fromInputStream(inputStream, "iso-8859-1");
        Tuple2<Map<String, double[]>, Object> buildMatrix = buildMatrix(fromInputStream.getLines(), option, z);
        fromInputStream.close();
        logger().debug("Completed matrix loading.");
        return buildMatrix;
    }

    public Tuple2<Map<String, double[]>, Object> org$clulab$embeddings$SanitizedWordEmbeddingMap$$loadMatrixFromSource(Source source, Option<Set<String>> option, boolean z) {
        logger().debug("Started to load embedding matrix from source ...");
        Tuple2<Map<String, double[]>, Object> buildMatrix = buildMatrix(source.getLines(), option, z);
        logger().debug("Completed matrix loading.");
        return buildMatrix;
    }

    private Tuple2<Map<String, double[]>, Object> buildMatrix(Iterator<String> iterator, Option<Set<String>> option, boolean z) {
        HashMap hashMap = new HashMap();
        BooleanRef create = BooleanRef.create(true);
        IntRef create2 = IntRef.create(0);
        IntRef create3 = IntRef.create(0);
        IntRef create4 = IntRef.create(0);
        iterator.zipWithIndex().withFilter(tuple2 -> {
            return BoxesRunTime.boxToBoolean($anonfun$buildMatrix$1(tuple2));
        }).foreach(tuple22 -> {
            if (tuple22 == null) {
                throw new MatchError(tuple22);
            }
            String str = (String) tuple22._1();
            int _2$mcI$sp = tuple22._2$mcI$sp();
            create3.elem++;
            String[] split = str.split("\\s+");
            if (create.elem) {
                create2.elem = StringOps$.MODULE$.toInt$extension(Predef$.MODULE$.augmentString(split[1]));
                create.elem = false;
                return BoxedUnit.UNIT;
            }
            if (split.length != create2.elem + 1) {
                Predef$.MODULE$.println(new StringBuilder(19).append(split.length).append(" != ").append(create2.elem + 1).append(" found on line ").append(_2$mcI$sp + 1).toString());
            }
            Predef$.MODULE$.assert(split.length == create2.elem + 1);
            String str2 = split[0];
            if (!option.isEmpty()) {
                if (!((SetOps) option.get()).contains(z ? str2.toLowerCase() : str2)) {
                    return BoxedUnit.UNIT;
                }
            }
            create4.elem++;
            double[] dArr = new double[create2.elem];
            for (int i = 0; i < create2.elem; i++) {
                dArr[i] = StringOps$.MODULE$.toDouble$extension(Predef$.MODULE$.augmentString(split[i + 1]));
            }
            MODULE$.norm(dArr);
            return hashMap.put(str2, dArr);
        });
        logger().debug(new StringBuilder(63).append("Completed matrix loading. Kept ").append(create4.elem).append(" words out of a total of ").append(create3.elem).append(" words.").toString());
        return new Tuple2<>(hashMap.toMap($less$colon$less$.MODULE$.refl()), BoxesRunTime.boxToInteger(create2.elem));
    }

    public SanitizedWordEmbeddingMap fromBinary(String str) {
        return fromBinary(new File(str));
    }

    public SanitizedWordEmbeddingMap fromBinary(File file) {
        return new SanitizedWordEmbeddingMap(() -> {
            return MODULE$.readBinaryMatrix(FileUtils.readFileToByteArray(file));
        });
    }

    public SanitizedWordEmbeddingMap fromBinary(InputStream inputStream) {
        return new SanitizedWordEmbeddingMap(() -> {
            return MODULE$.readBinaryMatrix(IOUtils.toByteArray(inputStream));
        });
    }

    public SanitizedWordEmbeddingMap fromBinary(byte[] bArr) {
        return new SanitizedWordEmbeddingMap(() -> {
            return MODULE$.readBinaryMatrix(bArr);
        });
    }

    private String readNonSpace(ByteBuffer byteBuffer) {
        ArrayBuffer arrayBuffer = new ArrayBuffer();
        byte b = byteBuffer.get();
        while (true) {
            byte b2 = b;
            if (b2 == ((byte) 32) || b2 == ((byte) 10)) {
                break;
            }
            arrayBuffer.$plus$eq(BoxesRunTime.boxToByte(b2));
            b = byteBuffer.get();
        }
        return new String((byte[]) arrayBuffer.toArray(ClassTag$.MODULE$.Byte()));
    }

    /* JADX INFO: Access modifiers changed from: private */
    public Map<String, double[]> readBinaryMatrix(byte[] bArr) {
        HashMap hashMap = new HashMap();
        ByteBuffer wrap = ByteBuffer.wrap(bArr);
        wrap.order(ByteOrder.nativeOrder());
        long long$extension = StringOps$.MODULE$.toLong$extension(Predef$.MODULE$.augmentString(readNonSpace(wrap)));
        long long$extension2 = StringOps$.MODULE$.toLong$extension(Predef$.MODULE$.augmentString(readNonSpace(wrap)));
        byte b = wrap.get();
        while (true) {
            byte b2 = b;
            if (b2 != ((byte) 32) && b2 != ((byte) 10)) {
                break;
            }
            b = wrap.get();
        }
        wrap.position(wrap.position() - 1);
        long j = 0;
        while (j < long$extension) {
            j++;
            String readNonSpace = readNonSpace(wrap);
            double[] dArr = new double[(int) long$extension2];
            for (int i = 0; i < long$extension2; i++) {
                dArr[i] = wrap.getFloat();
            }
            norm(dArr);
            hashMap.put(readNonSpace, dArr);
            if (wrap.hasRemaining()) {
                byte b3 = wrap.get();
                while (true) {
                    byte b4 = b3;
                    if (b4 != ((byte) 32) && b4 != ((byte) 10)) {
                        break;
                    }
                    b3 = wrap.get();
                }
                wrap.position(wrap.position() - 1);
            } else {
                BoxedUnit boxedUnit = BoxedUnit.UNIT;
            }
        }
        return hashMap.toMap($less$colon$less$.MODULE$.refl());
    }

    public void main(String[] strArr) {
        SanitizedWordEmbeddingMap sanitizedWordEmbeddingMap = new SanitizedWordEmbeddingMap(strArr[0], (Option<Set<String>>) None$.MODULE$, $lessinit$greater$default$3());
        Predef$.MODULE$.println("Words most similar to \"house\":");
        sanitizedWordEmbeddingMap.mostSimilarWords((Set) Predef$.MODULE$.Set().apply(ScalaRunTime$.MODULE$.wrapRefArray(new String[]{"house"})), 40).foreach(tuple2 -> {
            $anonfun$main$1(tuple2);
            return BoxedUnit.UNIT;
        });
        $colon.colon colonVar = new $colon.colon("a", new $colon.colon("delicious", new $colon.colon("apple", Nil$.MODULE$)));
        $colon.colon colonVar2 = new $colon.colon("the", new $colon.colon("tasty", new $colon.colon("pear", Nil$.MODULE$)));
        $colon.colon colonVar3 = new $colon.colon("computer", new $colon.colon("oxygen", Nil$.MODULE$));
        Predef$.MODULE$.println(new StringBuilder(17).append("Text similarity: ").append(sanitizedWordEmbeddingMap.sanitizedTextSimilarity(colonVar, colonVar2)).toString());
        Predef$.MODULE$.println(new StringBuilder(17).append("Text similarity: ").append(sanitizedWordEmbeddingMap.sanitizedTextSimilarity(colonVar, colonVar3)).toString());
        Predef$.MODULE$.println(new StringBuilder(16).append("Max similarity: ").append(sanitizedWordEmbeddingMap.sanitizedMaxSimilarity(colonVar, colonVar2)).toString());
        Predef$.MODULE$.println(new StringBuilder(16).append("Avg similarity: ").append(sanitizedWordEmbeddingMap.sanitizedAvgSimilarity(colonVar, colonVar2)).toString());
    }

    public static final /* synthetic */ boolean $anonfun$buildMatrix$1(Tuple2 tuple2) {
        return tuple2 != null;
    }

    public static final /* synthetic */ void $anonfun$main$1(Tuple2 tuple2) {
        Predef$.MODULE$.println(new StringBuilder(1).append((String) tuple2._1()).append(" ").append(tuple2._2$mcD$sp()).toString());
    }

    private SanitizedWordEmbeddingMap$() {
    }
}
