package ai.platon.pulsar.index;

import ai.platon.pulsar.common.DateTimes;
import ai.platon.pulsar.common.config.ImmutableConfig;
import ai.platon.pulsar.common.config.Params;
import ai.platon.pulsar.crawl.common.URLUtil;
import ai.platon.pulsar.crawl.index.IndexDocument;
import ai.platon.pulsar.crawl.index.IndexingException;
import ai.platon.pulsar.crawl.index.IndexingFilter;
import ai.platon.pulsar.persist.WebPage;
import java.net.MalformedURLException;
import java.net.URL;
import java.time.Instant;
import java.util.Arrays;
import java.util.Locale;
import java.util.Map;
import java.util.TreeMap;
import java.util.function.Consumer;
import kotlin.Metadata;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.text.Regex;
import kotlin.text.StringsKt;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;

/* compiled from: MetadataIndexer.kt */
@Metadata(mv = {1, 4, 2}, bv = {1, 0, 3}, k = 1, d1 = {"��4\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n\u0002\b\u0005\n\u0002\u0010\u0002\n��\n\u0002\u0018\u0002\n��\n\u0002\u0010\u000e\n��\n\u0002\u0018\u0002\n\u0002\b\u0005\n\u0002\u0018\u0002\n\u0002\b\u0003\u0018�� \u00172\u00020\u0001:\u0001\u0017B\r\u0012\u0006\u0010\u0002\u001a\u00020\u0003¢\u0006\u0002\u0010\u0004J \u0010\b\u001a\u00020\t2\u0006\u0010\n\u001a\u00020\u000b2\u0006\u0010\f\u001a\u00020\r2\u0006\u0010\u000e\u001a\u00020\u000fH\u0002J \u0010\u0010\u001a\u00020\t2\u0006\u0010\n\u001a\u00020\u000b2\u0006\u0010\f\u001a\u00020\r2\u0006\u0010\u000e\u001a\u00020\u000fH\u0002J$\u0010\u0011\u001a\u0004\u0018\u00010\u000b2\b\u0010\n\u001a\u0004\u0018\u00010\u000b2\u0006\u0010\f\u001a\u00020\r2\u0006\u0010\u000e\u001a\u00020\u000fH\u0002J \u0010\u0012\u001a\u00020\t2\u0006\u0010\n\u001a\u00020\u000b2\u0006\u0010\f\u001a\u00020\r2\u0006\u0010\u000e\u001a\u00020\u000fH\u0002J\"\u0010\u0013\u001a\u0004\u0018\u00010\u000b2\u0006\u0010\n\u001a\u00020\u000b2\u0006\u0010\f\u001a\u00020\r2\u0006\u0010\u000e\u001a\u00020\u000fH\u0016J\b\u0010\u0014\u001a\u00020\u0015H\u0016J\u0010\u0010\u0016\u001a\u00020\t2\u0006\u0010\u0002\u001a\u00020\u0003H\u0016R\u001a\u0010\u0002\u001a\u00020\u0003X\u0096\u000e¢\u0006\u000e\n��\u001a\u0004\b\u0005\u0010\u0006\"\u0004\b\u0007\u0010\u0004¨\u0006\u0018"}, d2 = {"Lai/platon/pulsar/index/MetadataIndexer;", "Lai/platon/pulsar/crawl/index/IndexingFilter;", "conf", "Lai/platon/pulsar/common/config/ImmutableConfig;", "(Lai/platon/pulsar/common/config/ImmutableConfig;)V", "getConf", "()Lai/platon/pulsar/common/config/ImmutableConfig;", "setConf", "addGeneralMetadata", "", "doc", "Lai/platon/pulsar/crawl/index/IndexDocument;", "url", "", "page", "Lai/platon/pulsar/persist/WebPage;", "addHost", "addPageMetadata", "addTime", "filter", "getParams", "Lai/platon/pulsar/common/config/Params;", "setup", "Companion", "pulsar-index"})
/* loaded from: input_file:ai/platon/pulsar/index/MetadataIndexer.class */
public final class MetadataIndexer implements IndexingFilter {

    @NotNull
    private ImmutableConfig conf;
    private static final String PARSE_CONF_PROPERTY = "index.metadata";
    private static final String INDEX_PREFIX = "meta_";
    private static final String PARSE_META_PREFIX = "meta_";

    @NotNull
    public static final Companion Companion = new Companion(null);
    private static final Map<String, String> parseFieldnames = new TreeMap();

    /* compiled from: MetadataIndexer.kt */
    @Metadata(mv = {1, 4, 2}, bv = {1, 0, 3}, k = 1, d1 = {"��\u001a\n\u0002\u0018\u0002\n\u0002\u0010��\n\u0002\b\u0002\n\u0002\u0010\u000e\n\u0002\b\u0003\n\u0002\u0010%\n��\b\u0086\u0003\u0018��2\u00020\u0001B\u0007\b\u0002¢\u0006\u0002\u0010\u0002R\u000e\u0010\u0003\u001a\u00020\u0004X\u0082T¢\u0006\u0002\n��R\u000e\u0010\u0005\u001a\u00020\u0004X\u0082T¢\u0006\u0002\n��R\u000e\u0010\u0006\u001a\u00020\u0004X\u0082T¢\u0006\u0002\n��R\u001a\u0010\u0007\u001a\u000e\u0012\u0004\u0012\u00020\u0004\u0012\u0004\u0012\u00020\u00040\bX\u0082\u0004¢\u0006\u0002\n��¨\u0006\t"}, d2 = {"Lai/platon/pulsar/index/MetadataIndexer$Companion;", "", "()V", "INDEX_PREFIX", "", "PARSE_CONF_PROPERTY", "PARSE_META_PREFIX", "parseFieldnames", "", "pulsar-index"})
    /* loaded from: input_file:ai/platon/pulsar/index/MetadataIndexer$Companion.class */
    public static final class Companion {
        private Companion() {
        }

        public /* synthetic */ Companion(DefaultConstructorMarker defaultConstructorMarker) {
            this();
        }
    }

    public void setup(@NotNull ImmutableConfig immutableConfig) {
        Intrinsics.checkNotNullParameter(immutableConfig, "conf");
        setConf(immutableConfig);
        immutableConfig.getStringCollection(PARSE_CONF_PROPERTY).forEach(new Consumer<String>() { // from class: ai.platon.pulsar.index.MetadataIndexer$setup$1
            @Override // java.util.function.Consumer
            public final void accept(@NotNull String str) {
                Map map;
                Intrinsics.checkNotNullParameter(str, "metatag");
                StringBuilder append = new StringBuilder().append("meta_");
                Locale locale = Locale.ROOT;
                Intrinsics.checkNotNullExpressionValue(locale, "Locale.ROOT");
                String lowerCase = str.toLowerCase(locale);
                Intrinsics.checkNotNullExpressionValue(lowerCase, "(this as java.lang.String).toLowerCase(locale)");
                map = MetadataIndexer.parseFieldnames;
                map.put(append.append(lowerCase).toString(), "meta_" + str);
            }
        });
    }

    @NotNull
    public Params getParams() {
        return new Params();
    }

    @Nullable
    public IndexDocument filter(@NotNull IndexDocument indexDocument, @NotNull String str, @NotNull WebPage webPage) throws IndexingException {
        Intrinsics.checkNotNullParameter(indexDocument, "doc");
        Intrinsics.checkNotNullParameter(str, "url");
        Intrinsics.checkNotNullParameter(webPage, "page");
        try {
            addTime(indexDocument, str, webPage);
            addHost(indexDocument, str, webPage);
            addGeneralMetadata(indexDocument, str, webPage);
            addPageMetadata(indexDocument, str, webPage);
        } catch (IndexingException e) {
            IndexingFilter.LOG.error(e.toString());
        }
        return indexDocument;
    }

    private final void addHost(IndexDocument indexDocument, String str, WebPage webPage) throws IndexingException {
        String reprUrl = webPage.getReprUrl();
        Intrinsics.checkNotNullExpressionValue(reprUrl, "page.reprUrl");
        String str2 = reprUrl.length() == 0 ? str : reprUrl;
        if (str2 != null) {
            if (str2.length() == 0) {
                return;
            }
            try {
                URL url = new URL(str2);
                String domainName = URLUtil.INSTANCE.getDomainName(url);
                indexDocument.add("url", str2);
                indexDocument.add("domain", domainName);
                indexDocument.addIfNotNull("host", url.getHost());
            } catch (MalformedURLException e) {
                throw new IndexingException(e);
            }
        }
    }

    private final void addTime(IndexDocument indexDocument, String str, WebPage webPage) {
        Instant now = Instant.now();
        Intrinsics.checkNotNullExpressionValue(now, "now");
        String isoInstantFormat = DateTimes.isoInstantFormat(now);
        Instant firstFetchTime = webPage.getFirstFetchTime();
        if (firstFetchTime == null) {
            firstFetchTime = now;
        }
        Instant instant = firstFetchTime;
        String fetchTimeHistory = webPage.getFetchTimeHistory(isoInstantFormat);
        Intrinsics.checkNotNullExpressionValue(fetchTimeHistory, "page.getFetchTimeHistory(crawlTimeStr)");
        Intrinsics.checkNotNullExpressionValue(instant, "firstFetchTime");
        indexDocument.add("first_crawl_time", DateTimes.isoInstantFormat(instant));
        indexDocument.add("last_crawl_time", isoInstantFormat);
        indexDocument.add("fetch_time_history", fetchTimeHistory);
        String isoInstantFormat2 = DateTimes.isoInstantFormat(now);
        Instant firstIndexTime = webPage.getFirstIndexTime(now);
        String indexTimeHistory = webPage.getIndexTimeHistory(isoInstantFormat2);
        Intrinsics.checkNotNullExpressionValue(firstIndexTime, "firstIndexTime");
        indexDocument.add("first_index_time", DateTimes.isoInstantFormat(firstIndexTime));
        indexDocument.add("last_index_time", isoInstantFormat2);
        indexDocument.add("index_time_history", indexTimeHistory);
    }

    private final void addGeneralMetadata(IndexDocument indexDocument, String str, WebPage webPage) throws IndexingException {
        String contentType = webPage.getContentType();
        Intrinsics.checkNotNullExpressionValue(contentType, "page.contentType");
        if (!StringsKt.contains$default(contentType, "html", false, 2, (Object) null)) {
            IndexingFilter.LOG.warn("Content type " + contentType + " is not fully supported");
        }
        indexDocument.add("content_type", contentType);
    }

    private final IndexDocument addPageMetadata(final IndexDocument indexDocument, String str, WebPage webPage) {
        if (indexDocument == null || parseFieldnames.isEmpty()) {
            return indexDocument;
        }
        for (Map.Entry<String, String> entry : parseFieldnames.entrySet()) {
            String value = entry.getValue();
            String str2 = webPage.getMetadata().get(entry.getKey());
            if (value != null && str2 != null) {
                String str3 = value;
                int i = 0;
                int length = str3.length() - 1;
                boolean z = false;
                while (i <= length) {
                    boolean z2 = Intrinsics.compare(str3.charAt(!z ? i : length), 32) <= 0;
                    if (z) {
                        if (!z2) {
                            break;
                        }
                        length--;
                    } else if (z2) {
                        i++;
                    } else {
                        z = true;
                    }
                }
                final String obj = str3.subSequence(i, length + 1).toString();
                String str4 = str2;
                int i2 = 0;
                int length2 = str4.length() - 1;
                boolean z3 = false;
                while (i2 <= length2) {
                    boolean z4 = Intrinsics.compare(str4.charAt(!z3 ? i2 : length2), 32) <= 0;
                    if (z3) {
                        if (!z4) {
                            break;
                        }
                        length2--;
                    } else if (z4) {
                        i2++;
                    } else {
                        z3 = true;
                    }
                }
                String obj2 = str4.subSequence(i2, length2 + 1).toString();
                if (obj.length() == 0) {
                    continue;
                } else if (obj2.length() == 0) {
                    continue;
                } else if (StringsKt.equals(obj, "meta_description", true)) {
                    Object[] array = new Regex("\t").split(obj2, 0).toArray(new String[0]);
                    if (array == null) {
                        throw new NullPointerException("null cannot be cast to non-null type kotlin.Array<T>");
                    }
                    Arrays.stream(array).forEach(new Consumer<String>() { // from class: ai.platon.pulsar.index.MetadataIndexer$addPageMetadata$3
                        @Override // java.util.function.Consumer
                        public final void accept(@Nullable String str5) {
                            IndexDocument indexDocument2 = indexDocument;
                            String str6 = obj;
                            Intrinsics.checkNotNull(str5);
                            indexDocument2.addIfAbsent(str6, str5);
                        }
                    });
                } else {
                    Object[] array2 = new Regex("\t").split(obj2, 0).toArray(new String[0]);
                    if (array2 == null) {
                        throw new NullPointerException("null cannot be cast to non-null type kotlin.Array<T>");
                    }
                    Arrays.stream(array2).forEach(new Consumer<String>() { // from class: ai.platon.pulsar.index.MetadataIndexer$addPageMetadata$4
                        @Override // java.util.function.Consumer
                        public final void accept(@Nullable String str5) {
                            indexDocument.add(obj, str5);
                        }
                    });
                }
            }
        }
        return indexDocument;
    }

    @NotNull
    public ImmutableConfig getConf() {
        return this.conf;
    }

    public void setConf(@NotNull ImmutableConfig immutableConfig) {
        Intrinsics.checkNotNullParameter(immutableConfig, "<set-?>");
        this.conf = immutableConfig;
    }

    public MetadataIndexer(@NotNull ImmutableConfig immutableConfig) {
        Intrinsics.checkNotNullParameter(immutableConfig, "conf");
        this.conf = immutableConfig;
    }
}
