package com.github.chen0040.embeddings;

import com.github.chen0040.embeddings.utils.HttpClient;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import net.lingala.zip4j.core.ZipFile;
import net.lingala.zip4j.exception.ZipException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/github/chen0040/embeddings/GloVeModel.class */
public class GloVeModel {
    private static final String url = "http://nlp.stanford.edu/data/glove.6B.zip";
    private static final Logger logger = LoggerFactory.getLogger(GloVeModel.class);
    private Map<String, float[]> word2em = new HashMap();
    private int dimension = -1;

    public static List<Integer> getAvailableDimensionList() {
        return Arrays.asList(50, 100, 200, 300);
    }

    private static final String getGloVeTextFileName(int i) {
        return "glove.6B." + i + "d.txt";
    }

    public Map<String, float[]> load100() {
        return load(100);
    }

    public Map<String, float[]> load50() {
        return load(50);
    }

    public Map<String, float[]> load200() {
        return load(200);
    }

    public Map<String, float[]> load300() {
        return load(300);
    }

    public Map<String, float[]> load(int i) {
        return load("/tmp", i);
    }

    public float[] encodeWord(String str) {
        String lowerCase = str.toLowerCase();
        if (this.word2em.containsKey(lowerCase)) {
            return this.word2em.get(lowerCase);
        }
        return null;
    }

    public float[] encodeDocument(String str) {
        float[] encodeWord;
        String[] split = filter(str).split(" ");
        float[] fArr = new float[this.dimension];
        for (String str2 : split) {
            String trim = str2.trim();
            if (!trim.equals("") && (encodeWord = encodeWord(trim)) != null) {
                for (int i = 0; i < this.dimension; i++) {
                    int i2 = i;
                    fArr[i2] = fArr[i2] + encodeWord[i];
                }
            }
        }
        return fArr;
    }

    private String filter(String str) {
        String lowerCase = str.toLowerCase();
        for (String str2 : new String[]{",", ".", ";", ":", "?", "!", "\"", "'"}) {
            lowerCase = lowerCase.replace(str2, " " + str2);
        }
        return lowerCase;
    }

    public int size() {
        return this.word2em.size();
    }

    public int getWordVecDimension() {
        return this.dimension;
    }

    public Map<String, float[]> load(String str, int i) {
        this.dimension = -1;
        this.word2em.clear();
        String str2 = str + "/" + getGloVeTextFileName(i);
        if (!new File(str2).exists()) {
            String str3 = str + "/glove.6B.zip";
            if (!new File(str3).exists()) {
                logger.info("{} not found on local machine, downloading it from {}", str3, url);
                if (!HttpClient.downloadFile(url, str3)) {
                    return this.word2em;
                }
                logger.info("{} is downloaded", str3);
            }
            if (!unZip(str3, str)) {
                return this.word2em;
            }
        }
        logger.info("loading {} into word2em", str2);
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(new File(str2))));
            Throwable th = null;
            while (true) {
                try {
                    try {
                        String readLine = bufferedReader.readLine();
                        if (readLine == null) {
                            break;
                        }
                        String[] split = readLine.split(" ");
                        String str4 = split[0];
                        float[] fArr = new float[i];
                        for (int i2 = 1; i2 <= i; i2++) {
                            fArr[i2 - 1] = Float.parseFloat(split[i2]);
                        }
                        this.word2em.put(str4, fArr);
                    } finally {
                    }
                } finally {
                }
            }
            if (bufferedReader != null) {
                if (0 != 0) {
                    try {
                        bufferedReader.close();
                    } catch (Throwable th2) {
                        th.addSuppressed(th2);
                    }
                } else {
                    bufferedReader.close();
                }
            }
            this.dimension = i;
            return this.word2em;
        } catch (IOException e) {
            logger.error("Failed to read file " + str2, e);
            this.word2em.clear();
            return new HashMap();
        }
    }

    private boolean unZip(String str, String str2) {
        logger.info("unzipping {} to {}", str, str2);
        try {
            new ZipFile(str).extractAll(str2);
            return true;
        } catch (ZipException e) {
            logger.error("Failed to unzip " + str, e);
            return false;
        }
    }

    public double distance(String str, String str2) {
        float[] encodeWord = encodeWord(str);
        float[] encodeWord2 = encodeWord(str2);
        if (encodeWord == null || encodeWord2 == null) {
            return -1.0d;
        }
        float f = 0.0f;
        for (int i = 0; i < this.dimension; i++) {
            float f2 = encodeWord[i];
            float f3 = encodeWord2[i];
            f += (f2 - f3) * (f2 - f3);
        }
        return Math.sqrt(f);
    }
}
