/*
 * Decompiled with CFR 0.152.
 */
package ai.platon.pulsar.boilerpipe.filters.heuristics;

import ai.platon.pulsar.boilerpipe.document.TextBlock;
import ai.platon.pulsar.boilerpipe.document.TextDocument;
import ai.platon.pulsar.boilerpipe.filters.TextBlockFilter;
import ai.platon.pulsar.boilerpipe.filters.heuristics.HeuristicFilterBase;
import ai.platon.pulsar.boilerpipe.utils.ProcessingException;

public final class IgnoreBlocksAfterContentFilter
extends HeuristicFilterBase
implements TextBlockFilter {
    public static final IgnoreBlocksAfterContentFilter DEFAULT_INSTANCE = new IgnoreBlocksAfterContentFilter(60);
    public static final IgnoreBlocksAfterContentFilter INSTANCE_200 = new IgnoreBlocksAfterContentFilter(200);
    private final int minNumWords;

    public static IgnoreBlocksAfterContentFilter getDefaultInstance() {
        return DEFAULT_INSTANCE;
    }

    public IgnoreBlocksAfterContentFilter(int minNumWords) {
        this.minNumWords = minNumWords;
    }

    @Override
    public boolean process(TextDocument doc) throws ProcessingException {
        boolean changes = false;
        int numWords = 0;
        boolean foundEndOfText = false;
        for (TextBlock block : doc.getTextBlocks()) {
            boolean endOfText = block.hasLabel("pulsar.text/INDICATES_END_OF_TEXT");
            if (block.isContent()) {
                numWords += IgnoreBlocksAfterContentFilter.getNumFullTextWords(block);
            }
            if (endOfText && numWords >= this.minNumWords) {
                foundEndOfText = true;
            }
            if (!foundEndOfText) continue;
            changes = true;
            block.setIsContent(false);
        }
        return changes;
    }
}

