/*
 * Decompiled with CFR 0.152.
 */
package org.carrot2.language;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import java.util.regex.Pattern;
import org.carrot2.language.LexicalData;
import org.carrot2.util.ResourceLookup;

public final class LexicalDataImpl
implements LexicalData {
    private final HashSet<String> stopwords;
    private final Pattern stoplabelPattern;

    public LexicalDataImpl(HashSet<String> stopwords, Pattern stoplabelPattern) {
        this.stopwords = stopwords;
        this.stoplabelPattern = stoplabelPattern;
    }

    public LexicalDataImpl(ResourceLookup loader, String stopwordsResource, String stoplabelsResource) throws IOException {
        this(LexicalDataImpl.loadStopwords(loader, stopwordsResource), LexicalDataImpl.loadStoplabels(loader, stoplabelsResource));
    }

    @Override
    public boolean ignoreWord(CharSequence word) {
        return this.stopwords.contains(word.toString());
    }

    @Override
    public boolean ignoreLabel(CharSequence label) {
        if (this.stoplabelPattern == null) {
            return false;
        }
        return this.stoplabelPattern.matcher(label).matches();
    }

    private static Pattern loadStoplabels(ResourceLookup loader, String stoplabelsResource) throws IOException {
        List<Pattern> stoplabels;
        try (InputStream is = loader.open(stoplabelsResource);
             BufferedReader reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8));){
            stoplabels = LexicalDataImpl.compile(LexicalDataImpl.readLines(reader));
        }
        return LexicalDataImpl.union(stoplabels);
    }

    private static HashSet<String> loadStopwords(ResourceLookup loader, String stopwordsResource) throws IOException {
        HashSet<String> stopwords = new HashSet<String>();
        try (InputStream is = loader.open(stopwordsResource);
             BufferedReader reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8));){
            LexicalDataImpl.readLines(reader).forEach(word -> stopwords.add(word.toLowerCase(Locale.ROOT)));
        }
        return stopwords;
    }

    private static HashSet<String> readLines(BufferedReader reader) throws IOException {
        String line;
        HashSet<String> words = new HashSet<String>();
        while ((line = reader.readLine()) != null) {
            if ((line = line.trim()).startsWith("#") || line.isEmpty()) continue;
            words.add(line);
        }
        return words;
    }

    private static List<Pattern> compile(Set<String> patterns) {
        ArrayList<Pattern> compiled = new ArrayList<Pattern>();
        for (String p : patterns) {
            compiled.add(Pattern.compile(p));
        }
        return compiled;
    }

    private static Pattern union(List<Pattern> patterns) {
        StringBuilder union = new StringBuilder();
        if (patterns.size() > 0) {
            union.append("(");
            for (int i = 0; i < patterns.size(); ++i) {
                if (i > 0) {
                    union.append(")|(");
                }
                union.append(patterns.get(i).toString());
            }
            union.append(")");
            return Pattern.compile(union.toString());
        }
        return null;
    }
}

