package de.spieleck.app.cngram;

import com.fasterxml.jackson.core.util.BufferRecycler;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;

/* loaded from: input_file:WEB-INF/lib/cngram-1.0-0.060327.jar:de/spieleck/app/cngram/NGramProfileImpl.class */
public class NGramProfileImpl implements NGramProfile {
    public static final char SEPARATOR = '_';
    public static final int DEFAULT_MIN_NGRAM_LENGTH = 1;
    public static final int DEFAULT_MAX_NGRAM_LENGTH = 5;
    private String name;
    private volatile NGram[] sorted;
    private volatile NGram[] ordered;
    private int minNGramLength;
    private int maxNGramLength;
    private int normalization;
    private HashMap ngrams;
    private Set restricted;

    public NGramProfileImpl(String str) {
        this(str, 1, 5);
    }

    public NGramProfileImpl(String str, int i, int i2) {
        this.sorted = null;
        this.ordered = null;
        this.normalization = 0;
        this.ngrams = null;
        this.restricted = null;
        this.ngrams = new HashMap();
        this.maxNGramLength = i2;
        this.minNGramLength = i;
        setName(str);
    }

    public void setRestricted(Set set) {
        this.restricted = set;
    }

    public void analyze(CharSequence charSequence) {
        StringBuffer append = new StringBuffer(30).append('_');
        for (int i = 0; i < charSequence.length(); i++) {
            char lowerCase = Character.toLowerCase(charSequence.charAt(i));
            if (Character.isLetter(lowerCase)) {
                append.append(lowerCase);
            } else {
                addAnalyze(append);
                append.setLength(1);
            }
        }
        addAnalyze(append);
    }

    private void addAnalyze(StringBuffer stringBuffer) {
        if (stringBuffer.length() > 1) {
            stringBuffer.append('_');
            addNGrams(stringBuffer);
        }
    }

    public void clear() {
        if (this.ngrams != null) {
            this.ngrams.clear();
        }
        this.normalization = 0;
        this.sorted = null;
        this.ordered = null;
    }

    @Override // de.spieleck.app.cngram.NGramProfile
    public int getCount() {
        return this.ngrams.size();
    }

    @Override // de.spieleck.app.cngram.NGramProfile
    public int getNormalization() {
        return this.normalization;
    }

    public void addNGrams(CharSequence charSequence) {
        for (int i = this.minNGramLength; i <= this.maxNGramLength && i < charSequence.length(); i++) {
            addNGrams(charSequence, i);
        }
    }

    private void addNGrams(CharSequence charSequence, int i) {
        int length = charSequence.length() - i;
        for (int i2 = 0; i2 <= length; i2++) {
            CharSequence subSequence = charSequence.subSequence(i2, i2 + i);
            NGram nGram = (NGram) this.ngrams.get(subSequence);
            if (nGram == null) {
                nGram = new NGramImpl(subSequence);
                if (this.restricted == null || this.restricted.contains(nGram)) {
                    this.ngrams.put(subSequence, nGram);
                    this.ordered = null;
                }
            }
            nGram.inc();
            this.normalization++;
            this.sorted = null;
        }
    }

    @Override // de.spieleck.app.cngram.NGramProfile
    public Iterator getSorted() {
        if (this.sorted == null) {
            this.sorted = (NGram[]) this.ngrams.values().toArray(NO_NGRAM);
            Arrays.sort(this.sorted);
        }
        return Arrays.asList(this.sorted).iterator();
    }

    @Override // de.spieleck.app.cngram.NGramProfile
    public NGram get(CharSequence charSequence) {
        if (this.ordered == null) {
            this.ordered = (NGram[]) this.ngrams.values().toArray(NO_NGRAM);
            Arrays.sort(this.ordered, CHAR_SEQ_COMPARATOR);
        }
        int binarySearch = Arrays.binarySearch(this.ordered, charSequence, CHAR_SEQ_COMPARATOR);
        if (binarySearch < 0) {
            return null;
        }
        return this.ordered[binarySearch];
    }

    public String toString() {
        StringBuffer stringBuffer = new StringBuffer(BufferRecycler.DEFAULT_WRITE_CONCAT_BUFFER_LEN);
        Iterator sorted = getSorted();
        stringBuffer.append("NGramProfile: ").append(this.name).append('\n');
        while (sorted.hasNext()) {
            NGram nGram = (NGram) sorted.next();
            stringBuffer.append((CharSequence) nGram).append(' ').append(nGram.getCount()).append('\n');
        }
        return stringBuffer.toString();
    }

    public void load(InputStream inputStream) throws IOException {
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"));
        this.ngrams.clear();
        int i = -1;
        String str = "";
        int i2 = 0;
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                break;
            }
            String trim = readLine.trim();
            if (trim.length() >= 2) {
                if (trim.charAt(0) == '-') {
                    str = new StringBuffer().append(str).append(trim.charAt(1)).toString();
                } else {
                    if (trim.startsWith(NGramProfile.FINISHREAD_STR)) {
                        break;
                    }
                    if (trim.charAt(0) != '#') {
                        int indexOf = trim.indexOf(32);
                        String replace = trim.substring(0, indexOf).trim().replace('_', ' ');
                        if (!" ".equals(replace)) {
                            int parseInt = Integer.parseInt(trim.substring(indexOf + 1).trim());
                            if (trim.startsWith(NGramProfile.NORMALIZATION_STR)) {
                                i = parseInt;
                            } else if (replace.length() >= this.minNGramLength && replace.length() <= this.maxNGramLength) {
                                int i3 = 0;
                                while (i3 < str.length() && replace.indexOf(str.charAt(i3)) < 0) {
                                    i3++;
                                }
                                if (i3 < str.length()) {
                                    i2++;
                                } else {
                                    this.ngrams.put(replace, new NGramImpl(replace, parseInt));
                                    this.normalization += parseInt;
                                }
                            }
                        }
                    }
                }
            }
        }
        if (i == -1 || i == this.normalization) {
            return;
        }
        System.err.println(new StringBuffer().append(" WARNING ").append(i).append(" != ").append(this.normalization).toString());
    }

    public static NGramProfileImpl createProfile(String str, InputStream inputStream, String str2) throws IOException {
        NGramProfileImpl nGramProfileImpl = new NGramProfileImpl(str);
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, str2));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                return nGramProfileImpl;
            }
            nGramProfileImpl.analyze(readLine);
        }
    }

    public void save(OutputStream outputStream) throws IOException {
        Iterator sorted = getSorted();
        outputStream.write(new StringBuffer().append("# NgramProfile generated at ").append(new Date()).append(" for Language Identification\n").toString().getBytes());
        outputStream.write(new StringBuffer().append("ngram_count ").append(this.normalization).append("\n").toString().getBytes());
        while (sorted.hasNext()) {
            NGram nGram = (NGram) sorted.next();
            outputStream.write(new StringBuffer().append((Object) nGram).append(" ").append(nGram.getCount()).append("\n").toString().getBytes("UTF-8"));
        }
        outputStream.flush();
    }

    @Override // de.spieleck.app.cngram.NGramProfile
    public String getName() {
        return this.name;
    }

    public void setName(String str) {
        this.name = str;
    }
}
