package com.eshore.kg.qa.extract.html;

import com.eshore.framework.StandardComponent;
import com.eshore.framework.impl.PipelineNodeWorker;
import com.eshore.kg.qa.extract.Paragraph;
import java.util.Iterator;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;

@StandardComponent("提取正文内容")
/* loaded from: input_file:com/eshore/kg/qa/extract/html/BodyTextExtraction.class */
public class BodyTextExtraction extends PipelineNodeWorker<JsoupDocument, ExtractionContext> {
    private void getText(Node node, Paragraph paragraph, StringBuilder sb) {
        if (!(node instanceof Element)) {
            if (node instanceof TextNode) {
                sb.append(((TextNode) node).text());
            }
        } else {
            boolean isBlock = ((Element) node).isBlock();
            finishBlock(paragraph, sb, isBlock);
            Iterator it = node.childNodes().iterator();
            while (it.hasNext()) {
                getText((Node) it.next(), paragraph, sb);
            }
            finishBlock(paragraph, sb, isBlock);
        }
    }

    private void finishBlock(Paragraph paragraph, StringBuilder sb, boolean z) {
        if (z && sb.length() > 0) {
            String trim = sb.toString().trim();
            if (trim.length() > 0) {
                paragraph.appendText(trim);
            }
            sb.setLength(0);
        }
    }

    public void go(JsoupDocument jsoupDocument, ExtractionContext extractionContext) {
        if (jsoupDocument.getChildren() == null && jsoupDocument.getTexts() == null) {
            getText(jsoupDocument.getDocument().body(), jsoupDocument, new StringBuilder());
        }
    }
}
