package de.datexis.encoder.impl;

import de.datexis.encoder.StaticEncoder;
import de.datexis.model.Span;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.shade.jackson.annotation.JsonIgnore;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/datexis/encoder/impl/SurfaceEncoder.class */
public class SurfaceEncoder extends StaticEncoder {
    static Collection<String> symbols = Arrays.asList("#", "$", "%", "@", "^", "_", "~", "¢", "£", "¥", "§", "€");
    static Collection<String> operators = Arrays.asList("&", "*", "+", "=");
    static Collection<String> oquotes = Arrays.asList("\"", "`", "``");
    static Collection<String> cquotes = Arrays.asList("'", "''");
    static Collection<String> oparanthesis = Arrays.asList("(", "<", "[", "{", "-LRB-");
    static Collection<String> cparanthesis = Arrays.asList(")", ">", "]", "}", "-RRB-");
    static Collection<String> slashes = Arrays.asList("/", "\\", "|");
    static Collection<String> commas = Arrays.asList(",");
    static Collection<String> dashes = Arrays.asList("-", "–", "--", "---");
    static Collection<String> sterminator = Arrays.asList(".", "!", "?");
    static Collection<String> colons = Arrays.asList(":", ";", "...");

    public SurfaceEncoder() {
        super("SUR");
        this.log = LoggerFactory.getLogger(SurfaceEncoder.class);
    }

    public SurfaceEncoder(String str) {
        super(str);
        this.log = LoggerFactory.getLogger(SurfaceEncoder.class);
    }

    @Override // de.datexis.annotator.AnnotatorComponent, de.datexis.annotator.IComponent
    public String getName() {
        return "Surface Form Encoder";
    }

    @Override // de.datexis.encoder.IEncoder
    @JsonIgnore
    public long getEmbeddingVectorSize() {
        return encode("Test").length();
    }

    public void setVectorSize(int i) {
        if (i != getEmbeddingVectorSize()) {
            throw new IllegalArgumentException("Vector size of saved Encoder (" + getEmbeddingVectorSize() + ") differs from implementation (" + i + ")");
        }
    }

    @Override // de.datexis.encoder.IEncoder
    public INDArray encode(Span span) {
        return encode(span.getText());
    }

    @Override // de.datexis.encoder.IEncoder
    public INDArray encode(String str) {
        String trim = str.trim();
        ArrayList arrayList = new ArrayList();
        arrayList.add(Boolean.valueOf(startsWithUppercase(trim)));
        arrayList.add(Boolean.valueOf(startsWithLowercase(trim)));
        arrayList.add(Boolean.valueOf(isAllUppercase(trim)));
        arrayList.add(Boolean.valueOf(isAllLowercase(trim)));
        arrayList.add(Boolean.valueOf(isMixedCase(trim)));
        arrayList.add(Boolean.valueOf(isAllNumeric(trim)));
        arrayList.add(Boolean.valueOf(includesNumeric(trim)));
        arrayList.add(Boolean.valueOf(startsWithNumeric(trim)));
        arrayList.add(Boolean.valueOf(endsWithNumeric(trim)));
        arrayList.add(Boolean.valueOf(startsWithPunctuation(trim)));
        arrayList.add(Boolean.valueOf(endsWithPunctuation(trim)));
        INDArray zeros = Nd4j.zeros(arrayList.size(), 1L);
        int i = 0;
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            int i2 = i;
            i++;
            zeros.put(i2, 0, Double.valueOf(((Boolean) it.next()).booleanValue() ? 1.0d : 0.0d));
        }
        return zeros;
    }

    public boolean isEmpty(String str) {
        return str.isEmpty();
    }

    public boolean is1Char(String str) {
        return str.length() == 1;
    }

    public boolean is2Chars(String str) {
        return str.length() == 2;
    }

    public boolean is3Chars(String str) {
        return str.length() == 3;
    }

    public boolean is4Chars(String str) {
        return str.length() == 4;
    }

    public boolean isOver4Chars(String str) {
        return str.length() > 4;
    }

    public boolean isOver12Chars(String str) {
        return str.length() > 12;
    }

    public boolean startsWithUppercase(String str) {
        String replaceAll = str.replaceAll("[^\\p{L}]", "");
        if (replaceAll.isEmpty()) {
            return false;
        }
        String substring = replaceAll.substring(0, 1);
        return substring.toUpperCase().equals(substring);
    }

    public boolean startsWithLowercase(String str) {
        String replaceAll = str.replaceAll("[^\\p{L}]", "");
        if (replaceAll.isEmpty()) {
            return false;
        }
        String substring = replaceAll.substring(0, 1);
        return substring.toLowerCase().equals(substring);
    }

    public boolean isAllUppercase(String str) {
        String replaceAll = str.replaceAll("[^\\p{L}]", "");
        if (replaceAll.isEmpty()) {
            return false;
        }
        return replaceAll.toUpperCase().equals(replaceAll);
    }

    public boolean isAllLowercase(String str) {
        String replaceAll = str.replaceAll("[^\\p{L}]", "");
        return replaceAll.toLowerCase().equals(replaceAll);
    }

    public boolean isMixedCase(String str) {
        return (startsWithUppercase(str) || isAllUppercase(str) || isAllLowercase(str)) ? false : true;
    }

    public boolean isAllNumeric(String str) {
        return str.equals(str.replaceAll("[^\\p{N}\\p{P}]", ""));
    }

    public boolean includesNumeric(String str) {
        return !str.replaceAll("[^\\p{N}\\p{P}]", "").isEmpty();
    }

    public boolean startsWithNumeric(String str) {
        if (str.isEmpty()) {
            return false;
        }
        String substring = str.substring(0, 1);
        return substring.equals(substring.replaceAll("[^\\p{N}\\p{P}]", ""));
    }

    public boolean endsWithNumeric(String str) {
        if (str.isEmpty()) {
            return false;
        }
        String substring = str.substring(str.length() - 1);
        return substring.equals(substring.replaceAll("[^\\p{N}\\p{P}]", ""));
    }

    public boolean startsWithPunctuation(String str) {
        if (str.isEmpty()) {
            return false;
        }
        String substring = str.substring(0, 1);
        return substring.equals(substring.replaceAll("[^\\p{P}]", ""));
    }

    public boolean endsWithPunctuation(String str) {
        if (str.isEmpty()) {
            return false;
        }
        String substring = str.substring(str.length() - 1);
        return substring.equals(substring.replaceAll("[^\\p{P}]", ""));
    }

    public boolean isSymbol(String str) {
        return symbols.contains(str);
    }

    public boolean isOperator(String str) {
        return operators.contains(str);
    }

    public boolean isOpeningQuote(String str) {
        return symbols.contains(str);
    }

    public boolean isClosingQuote(String str) {
        return symbols.contains(str);
    }

    public boolean isOpeningParanthesis(String str) {
        return oparanthesis.contains(str);
    }

    public boolean isClosingParanthesis(String str) {
        return cparanthesis.contains(str);
    }

    public boolean isSlash(String str) {
        return slashes.contains(str);
    }

    public boolean isComma(String str) {
        return commas.contains(str);
    }

    public boolean isDash(String str) {
        return dashes.contains(str);
    }

    public boolean isSentenceTerminator(String str) {
        return sterminator.contains(str);
    }

    public boolean isColon(String str) {
        return colons.contains(str);
    }
}
