/*
 * Decompiled with CFR 0.152.
 */
package ai.platon.pulsar.boilerpipe.filters.heuristics;

import ai.platon.pulsar.boilerpipe.document.TextBlock;
import ai.platon.pulsar.boilerpipe.document.TextDocument;
import ai.platon.pulsar.boilerpipe.filters.TextBlockFilter;
import ai.platon.pulsar.boilerpipe.utils.BoiConstants;
import ai.platon.pulsar.boilerpipe.utils.ProcessingException;
import ai.platon.pulsar.common.DateTimeDetector;
import java.time.Duration;
import java.time.Instant;
import java.time.OffsetDateTime;
import java.time.ZoneId;
import java.util.regex.Pattern;

public class ArticleMetadataFilter
implements TextBlockFilter {
    public static final ArticleMetadataFilter INSTANCE = new ArticleMetadataFilter();
    private DateTimeDetector dateTimeDetector = new DateTimeDetector();

    public ArticleMetadataFilter() {
    }

    public ArticleMetadataFilter(ZoneId zoneId) {
        this.dateTimeDetector.setZoneId(zoneId);
    }

    @Override
    public boolean process(TextDocument doc) throws ProcessingException {
        Instant now = Instant.now();
        Instant publishTime = null;
        int sniffedDateTimeCount = 0;
        boolean changed = false;
        boolean foundLongText = false;
        for (TextBlock tb : doc.getTextBlocks()) {
            Instant sniffedTime;
            String text = tb.getText();
            if (text.length() > 200) {
                foundLongText = true;
            }
            if ((sniffedTime = this.sniffValidDateTime(text, now)).isAfter(doc.getModifiedTime())) {
                ++sniffedDateTimeCount;
                doc.setModifiedTime(sniffedTime);
            }
            if (!foundLongText && text.length() > 15 && publishTime == null && sniffedTime.isAfter(doc.getPublishTime())) {
                publishTime = sniffedTime;
                doc.setPublishTime(publishTime);
                tb.setIsContent(true);
                tb.addLabel("pulsar.text/ARTICLE_METADATA");
                changed = true;
            }
            if (tb.getNumWords() >= 10) continue;
            for (Pattern p : BoiConstants.PATTERNS_SHORT) {
                if (!p.matcher(text).find()) continue;
                changed = true;
                tb.setIsContent(true);
                tb.addLabel("pulsar.text/ARTICLE_METADATA");
            }
        }
        doc.setDateTimeCount(sniffedDateTimeCount);
        return changed;
    }

    private Instant sniffValidDateTime(String text, Instant now) {
        Instant dateTime;
        long days;
        OffsetDateTime offsetDateTime = this.dateTimeDetector.detectDateTimeLeniently(text);
        if (offsetDateTime != null && (days = Duration.between(dateTime = offsetDateTime.toInstant(), now).toDays()) < 1825L) {
            return dateTime;
        }
        return Instant.EPOCH;
    }
}

