package lv.semti.morphology.corpus;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;

/* loaded from: input_file:lv/semti/morphology/corpus/TaggedCorpus.class */
public class TaggedCorpus {
    private List<Document> documents;

    public List<Example> findExamples(String str) {
        LinkedList linkedList = new LinkedList();
        Iterator<Document> it = this.documents.iterator();
        while (it.hasNext()) {
            linkedList.addAll(it.next().findExamples(str));
        }
        Collections.sort(linkedList);
        return linkedList;
    }

    public TaggedCorpus(String str) throws IOException {
        InputStream resourceAsStream = getClass().getClassLoader().getResourceAsStream(str);
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(resourceAsStream == null ? new FileInputStream(str) : resourceAsStream, "UTF-8"));
        this.documents = loadDocuments(bufferedReader);
        bufferedReader.close();
    }

    private List<Document> loadDocuments(BufferedReader bufferedReader) throws IOException {
        this.documents = new LinkedList();
        Document loadDocument = loadDocument(bufferedReader);
        while (true) {
            Document document = loadDocument;
            if (document == null) {
                return this.documents;
            }
            this.documents.add(document);
            loadDocument = loadDocument(bufferedReader);
        }
    }

    private Document loadDocument(BufferedReader bufferedReader) throws IOException {
        String readLine = bufferedReader.readLine();
        if (readLine == null) {
            return null;
        }
        if (!readLine.startsWith("<doc")) {
            throw new IOException(String.format("Bad corpus file format, expected <doc found %s", readLine));
        }
        String readLine2 = bufferedReader.readLine();
        LinkedList linkedList = new LinkedList();
        while (readLine2 != null && !readLine2.startsWith("</doc>")) {
            linkedList.add(readLine2);
            readLine2 = bufferedReader.readLine();
        }
        return new Document(readLine, linkedList);
    }
}
