/*
 * Decompiled with CFR 0.152.
 */
package ai.platon.pulsar.boilerpipe.filters.heuristics;

import ai.platon.pulsar.boilerpipe.document.TextBlock;
import ai.platon.pulsar.boilerpipe.document.TextDocument;
import ai.platon.pulsar.boilerpipe.filters.TextBlockFilter;
import ai.platon.pulsar.boilerpipe.utils.ProcessingException;
import java.util.List;
import java.util.ListIterator;

public final class KeepLargestBlockFilter
implements TextBlockFilter {
    public static final KeepLargestBlockFilter INSTANCE = new KeepLargestBlockFilter(false, 0);
    public static final KeepLargestBlockFilter INSTANCE_EXPAND_TO_SAME_TAGLEVEL = new KeepLargestBlockFilter(true, 0);
    public static final KeepLargestBlockFilter INSTANCE_EXPAND_TO_SAME_TAGLEVEL_MIN_WORDS = new KeepLargestBlockFilter(true, 150);
    private final boolean expandToSameLevelText;
    private final int minWords;

    public KeepLargestBlockFilter(boolean expandToSameLevelText, int minWords) {
        this.expandToSameLevelText = expandToSameLevelText;
        this.minWords = minWords;
    }

    @Override
    public boolean process(TextDocument doc) throws ProcessingException {
        List<TextBlock> textBlocks = doc.getTextBlocks();
        if (textBlocks.size() < 2) {
            return false;
        }
        int maxNumWords = -1;
        TextBlock largestBlock = null;
        int level = -1;
        int i = 0;
        int n = -1;
        for (TextBlock tb : textBlocks) {
            int nw;
            if (tb.isContent() && (nw = tb.getNumWords()) > maxNumWords) {
                largestBlock = tb;
                maxNumWords = nw;
                n = i;
                if (this.expandToSameLevelText) {
                    level = tb.getTagLevel();
                }
            }
            ++i;
        }
        for (TextBlock tb : textBlocks) {
            if (tb == largestBlock) {
                tb.setIsContent(true);
                tb.addLabel("pulsar.text/VERY_LIKELY_CONTENT");
                continue;
            }
            tb.setIsContent(false);
            tb.addLabel("pulsar.text/MIGHT_BE_CONTENT");
        }
        if (this.expandToSameLevelText && n != -1) {
            int tl;
            TextBlock tb;
            ListIterator<TextBlock> it = textBlocks.listIterator(n);
            while (it.hasPrevious() && (tl = (tb = it.previous()).getTagLevel()) >= level) {
                if (tl != level || tb.getNumWords() < this.minWords) continue;
                tb.setIsContent(true);
            }
            it = textBlocks.listIterator(n);
            while (it.hasNext() && (tl = (tb = it.next()).getTagLevel()) >= level) {
                if (tl != level || tb.getNumWords() < this.minWords) continue;
                tb.setIsContent(true);
            }
        }
        return true;
    }
}

