package org.cleartk.token.pos.genia.util;

import java.io.File;
import java.io.IOException;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.jdom2.Element;
import org.jdom2.JDOMException;
import org.jdom2.Text;
import org.jdom2.input.SAXBuilder;
import org.jdom2.output.XMLOutputter;
import org.xml.sax.DTDHandler;

/* loaded from: input_file:org/cleartk/token/pos/genia/util/GeniaPOSParser.class */
public class GeniaPOSParser implements Iterator<GeniaParse> {
    Element root;
    Iterator<?> articles;
    Set<String> posLabels;
    XMLOutputter outputter;

    public GeniaPOSParser(File file) throws IOException, JDOMException {
        this();
        SAXBuilder sAXBuilder = new SAXBuilder();
        sAXBuilder.setDTDHandler((DTDHandler) null);
        this.root = sAXBuilder.build(file).getRootElement();
        this.articles = this.root.getChildren("article").iterator();
        this.outputter = new XMLOutputter();
    }

    public GeniaPOSParser() {
        this.posLabels = new HashSet();
    }

    @Override // java.util.Iterator
    public boolean hasNext() {
        return this.articles.hasNext();
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // java.util.Iterator
    public GeniaParse next() {
        return parse((Element) this.articles.next());
    }

    @Override // java.util.Iterator
    public void remove() {
    }

    public GeniaParse parse(Element element) {
        GeniaParse geniaParse = new GeniaParse();
        try {
            StringWriter stringWriter = new StringWriter();
            new XMLOutputter().output(element, stringWriter);
            geniaParse.setXml(stringWriter.toString());
            geniaParse.setMedline(element.getChild("articleinfo").getChild("bibliomisc").getText().split(":")[1]);
            StringBuffer stringBuffer = new StringBuffer();
            int i = 0;
            Element child = element.getChild("title");
            Element child2 = element.getChild("abstract");
            if (child != null) {
                i = parse(child, geniaParse, stringBuffer, 0);
                if (child2 != null) {
                    stringBuffer.append("\n\n");
                    i += 2;
                }
            }
            if (child2 != null) {
                parse(child2, geniaParse, stringBuffer, i);
            }
            geniaParse.setText(stringBuffer.toString());
            return geniaParse;
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    private int parse(Element element, GeniaParse geniaParse, StringBuffer stringBuffer, int i) {
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        ArrayList arrayList3 = new ArrayList();
        for (Element element2 : element.getChildren("sentence")) {
            arrayList2.clear();
            arrayList3.clear();
            int i2 = i;
            for (Element element3 : element2.getContent()) {
                if (element3 instanceof Text) {
                    Text text = (Text) element3;
                    stringBuffer.append(text.getText());
                    i += text.getText().length();
                } else if (element3 instanceof Element) {
                    Element element4 = element3;
                    if (!element4.getName().equals("w")) {
                        throw new RuntimeException("non-word element in sentence: " + element4);
                    }
                    String text2 = element4.getText();
                    stringBuffer.append(text2);
                    String attributeValue = element4.getAttributeValue("c");
                    if (attributeValue.indexOf(124) != -1) {
                        attributeValue = attributeValue.substring(0, attributeValue.indexOf(124));
                    }
                    GeniaTag geniaTag = new GeniaTag(attributeValue, new Span(i, i + text2.length()));
                    if (attributeValue.equals("*")) {
                        arrayList3.add(geniaTag);
                    } else {
                        if (arrayList3.size() > 0) {
                            geniaTag = new GeniaTag(attributeValue, new Span(((GeniaTag) arrayList3.get(0)).getSpans().get(0).getBegin(), i + text2.length()));
                            arrayList3.clear();
                        }
                        arrayList.add(geniaTag);
                        arrayList2.add(geniaTag);
                    }
                    i += text2.length();
                } else {
                    continue;
                }
            }
            Span span = new Span(i2, i);
            GeniaSentence geniaSentence = new GeniaSentence();
            geniaSentence.setSpan(span);
            geniaSentence.addPosTags(arrayList2);
            geniaParse.addSentence(geniaSentence);
            stringBuffer.append("  ");
            i += 2;
        }
        geniaParse.addPosTags(arrayList);
        return i;
    }

    public static void main(String[] strArr) {
        try {
            System.out.print("loading GENIA...");
            GeniaPOSParser geniaPOSParser = new GeniaPOSParser(new File(strArr[0]));
            System.out.println("done.");
            HashSet hashSet = new HashSet();
            while (geniaPOSParser.hasNext()) {
                Iterator<GeniaTag> it = geniaPOSParser.next().getPosTags().iterator();
                while (it.hasNext()) {
                    hashSet.add(it.next().getLabel());
                }
            }
            ArrayList arrayList = new ArrayList(hashSet);
            Collections.sort(arrayList);
            System.out.println("number of tags=" + arrayList.size());
            Iterator it2 = arrayList.iterator();
            while (it2.hasNext()) {
                System.out.println((String) it2.next());
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
