package com.qwazr.extractor.parser;

import com.qwazr.extractor.ParserAbstract;
import com.qwazr.extractor.ParserDocument;
import com.qwazr.extractor.ParserField;
import com.qwazr.utils.CharsetUtils;
import java.io.InputStream;
import org.apache.commons.io.IOUtils;
import org.pegdown.LinkRenderer;
import org.pegdown.PegDownProcessor;
import org.pegdown.ToHtmlSerializer;
import org.pegdown.ast.DefinitionNode;
import org.pegdown.ast.DefinitionTermNode;
import org.pegdown.ast.HeaderNode;
import org.pegdown.ast.ListItemNode;
import org.pegdown.ast.ParaNode;
import org.pegdown.ast.RootNode;
import org.pegdown.ast.SuperNode;
import org.pegdown.ast.TextNode;

/* loaded from: input_file:com/qwazr/extractor/parser/Markdown.class */
public class Markdown extends ParserAbstract {
    public static final String[] DEFAULT_MIMETYPES = {"text/x-markdown", "text/markdown"};
    public static final String[] DEFAULT_EXTENSIONS = {"md", "markdown"};
    protected static final ParserField CONTENT = ParserField.newString("content", "The content of the document");
    protected static final ParserField URL = ParserField.newString("url", "Detected URLs");
    protected static final ParserField LANG_DETECTION = ParserField.newString("lang_detection", "Detection of the language");
    protected static final ParserField[] FIELDS = {CONTENT, URL, LANG_DETECTION};
    private ParserDocument result;

    /* loaded from: input_file:com/qwazr/extractor/parser/Markdown$ExtractorSerializer.class */
    public class ExtractorSerializer extends ToHtmlSerializer {
        public void visit(RootNode rootNode) {
            super.visit(rootNode);
            nextContent();
        }

        protected void nextContent() {
            if (this.printer.sb.length() == 0) {
                return;
            }
            Markdown.this.result.add(Markdown.CONTENT, this.printer.sb.toString());
            this.printer.clear();
        }

        public ExtractorSerializer() {
            super(new LinkRenderer());
        }

        public void visit(DefinitionNode definitionNode) {
            super.visit(definitionNode);
            nextContent();
        }

        public void visit(DefinitionTermNode definitionTermNode) {
            super.visit(definitionTermNode);
            nextContent();
        }

        public void visit(ParaNode paraNode) {
            super.visit(paraNode);
            nextContent();
        }

        public void visit(HeaderNode headerNode) {
            super.visit(headerNode);
            nextContent();
        }

        public void visit(ListItemNode listItemNode) {
            super.visit(listItemNode);
            nextContent();
        }

        protected void printTag(TextNode textNode, String str) {
            this.printer.print(textNode.getText());
        }

        protected void printTag(SuperNode superNode, String str) {
            visitChildren(superNode);
        }

        protected void printIndentedTag(SuperNode superNode, String str) {
            nextContent();
            visitChildren(superNode);
            nextContent();
        }

        protected void printImageTag(LinkRenderer.Rendering rendering) {
            Markdown.this.result.add(Markdown.URL, rendering.href);
            this.printer.print(rendering.text);
        }

        protected void printLink(LinkRenderer.Rendering rendering) {
            Markdown.this.result.add(Markdown.URL, rendering.href);
            this.printer.print(rendering.text);
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // com.qwazr.extractor.ParserAbstract
    public ParserField[] getParameters() {
        return null;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // com.qwazr.extractor.ParserAbstract
    public ParserField[] getFields() {
        return FIELDS;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // com.qwazr.extractor.ParserAbstract
    public String[] getDefaultExtensions() {
        return DEFAULT_EXTENSIONS;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // com.qwazr.extractor.ParserAbstract
    public String[] getDefaultMimeTypes() {
        return DEFAULT_MIMETYPES;
    }

    private void parseContent(char[] cArr) throws Exception {
        RootNode parseMarkdown = new PegDownProcessor(30000).parseMarkdown(cArr);
        this.result = getNewParserDocument();
        parseMarkdown.accept(new ExtractorSerializer());
        this.result.add(LANG_DETECTION, languageDetection(CONTENT, 10000));
    }

    @Override // com.qwazr.extractor.ParserAbstract
    protected void parseContent(InputStream inputStream, String str, String str2) throws Exception {
        parseContent(IOUtils.toCharArray(inputStream, CharsetUtils.CharsetUTF8));
    }
}
