package lv.semti.morphology.corpus;

import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Multimap;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:lv/semti/morphology/corpus/Document.class */
public class Document {
    private List<Sentence> sentences = new LinkedList();
    public Map<String, String> metadata = new HashMap();
    public Multimap<String, Token> index;
    private static final int optimal_sentence_length = 10;
    private static Map<String, Pattern> patterns = new HashMap();
    private static String[] fields = {"id", "reference", "section", "title", "source", "author", "authorgender", "published", "genre", "keywords", "fileref"};

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:lv/semti/morphology/corpus/Document$Sentence.class */
    public class Sentence {
        String sentence = "";
        List<Token> tokens = new LinkedList();

        Sentence() {
        }

        boolean isEmpty() {
            return this.tokens.isEmpty();
        }

        public void add(Token token, boolean z) {
            this.tokens.add(token);
            token.sentence = this;
            if (z) {
                this.sentence += ' ';
            }
            this.sentence += token.token;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:lv/semti/morphology/corpus/Document$Token.class */
    public class Token {
        String token;
        String tag;
        String lemma;
        Sentence sentence;

        Token(String str) throws IOException {
            String[] split = str.split("\t");
            if (split.length != 3) {
                throw new IOException(String.format("Bad corpus file format - line '%s'", str));
            }
            this.token = split[0];
            this.tag = split[1];
            this.lemma = split[2];
        }
    }

    public List<Example> findExamples(String str) {
        LinkedList linkedList = new LinkedList();
        HashSet hashSet = new HashSet();
        for (Token token : this.index.get(str)) {
            String replaceAll = token.sentence.sentence.replaceAll(" ([.,?!\"])", "$1");
            if (!hashSet.contains(replaceAll) && !Blacklist.is_blacklisted(replaceAll, str)) {
                linkedList.add(new Example(replaceAll, this, Math.abs(token.sentence.tokens.size() - optimal_sentence_length)));
                hashSet.add(replaceAll);
            }
        }
        return linkedList;
    }

    public Document(String str, List<String> list) throws IOException {
        parseHeader(str);
        Sentence sentence = new Sentence();
        this.index = ArrayListMultimap.create();
        boolean z = false;
        for (String str2 : list) {
            if (str2.startsWith("</s") && !sentence.isEmpty()) {
                this.sentences.add(sentence);
                sentence = new Sentence();
            }
            z = str2.startsWith("<g />") ? false : z;
            if (!str2.startsWith("<")) {
                Token token = new Token(str2);
                sentence.add(token, z);
                this.index.put(token.lemma, token);
                z = true;
            }
        }
    }

    private static Pattern getPattern(String str) {
        Pattern pattern = patterns.get(str);
        if (pattern != null) {
            return pattern;
        }
        Pattern compile = Pattern.compile(str + "='([^']*)'");
        patterns.put(str, compile);
        return compile;
    }

    private static String getValue(String str, String str2) {
        Matcher matcher = getPattern(str2).matcher(str);
        if (matcher.find()) {
            return matcher.group(1);
        }
        return null;
    }

    private void parseHeader(String str) {
        for (String str2 : fields) {
            String value = getValue(str, str2);
            if (value != null) {
                this.metadata.put(str2, value);
            }
        }
    }
}
