package org.jesterj.ingest.processors;

import com.copyright.easiertest.SimpleProperty;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import org.apache.commons.io.IOUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jesterj.ingest.model.Document;
import org.jesterj.ingest.model.DocumentProcessor;
import org.jesterj.ingest.model.impl.NamedBuilder;

/* loaded from: input_file:org/jesterj/ingest/processors/FetchUrl.class */
public class FetchUrl implements DocumentProcessor {
    private static final Logger log = LogManager.getLogger();
    private String linkField;
    private boolean failOnIOError;
    private String name;
    private long throttleMs;
    private String errorField;
    private String httpStatusField;
    Cache<String, Long> visitedSiteCache = CacheBuilder.newBuilder().maximumSize(50000).build();
    private int connectionTimeout = 5000;
    private int readTimeout = 5000;

    /* loaded from: input_file:org/jesterj/ingest/processors/FetchUrl$Builder.class */
    public static class Builder extends NamedBuilder<FetchUrl> {
        FetchUrl obj = new FetchUrl();

        @Override // org.jesterj.ingest.model.impl.NamedBuilder
        /* renamed from: named */
        public NamedBuilder<FetchUrl> named2(String str) {
            getObj().name = str;
            return this;
        }

        /* JADX INFO: Access modifiers changed from: protected */
        /* JADX WARN: Can't rename method to resolve collision */
        @Override // org.jesterj.ingest.model.impl.NamedBuilder
        public FetchUrl getObj() {
            return this.obj;
        }

        public Builder fromLinkIn(String str) {
            getObj().linkField = str;
            return this;
        }

        public Builder reportErrorsIn(String str) {
            getObj().errorField = str;
            return this;
        }

        public Builder reportHttpStatusIn(String str) {
            getObj().httpStatusField = str;
            return this;
        }

        public Builder sameSiteAccessOncePer(long j) {
            getObj().throttleMs = j;
            return this;
        }

        public Builder withSiteCachSize(long j) {
            getObj().visitedSiteCache = CacheBuilder.newBuilder().maximumSize(j).build();
            return this;
        }

        public Builder failDocOnError(boolean z) {
            getObj().failOnIOError = z;
            return this;
        }

        public Builder withConnectionTimeOut(int i) {
            getObj().connectionTimeout = i;
            return this;
        }

        public Builder withReadTimeOut(int i) {
            getObj().readTimeout = i;
            return this;
        }

        private void setObj(FetchUrl fetchUrl) {
            this.obj = fetchUrl;
        }

        @Override // org.jesterj.ingest.model.Buildable
        public FetchUrl build() {
            FetchUrl obj = getObj();
            setObj(new FetchUrl());
            return obj;
        }
    }

    @Override // org.jesterj.ingest.model.DocumentProcessor
    public Document[] processDocument(Document document) {
        try {
            URL url = new URL(document.getFirstValue(this.linkField));
            String protocol = url.getProtocol();
            String host = url.getHost();
            Long l = (Long) this.visitedSiteCache.getIfPresent(host);
            long currentTimeMillis = System.currentTimeMillis();
            if (l == null) {
                this.visitedSiteCache.put(host, Long.valueOf(currentTimeMillis));
            } else {
                long longValue = currentTimeMillis - l.longValue();
                if (longValue < this.throttleMs) {
                    try {
                        Thread.sleep(this.throttleMs - longValue);
                    } catch (InterruptedException e) {
                    }
                }
            }
            URLConnection openConnection = url.openConnection();
            openConnection.setConnectTimeout(this.connectionTimeout);
            openConnection.setReadTimeout(this.readTimeout);
            openConnection.connect();
            ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
            if ("http".equals(protocol) || "https".equals(protocol)) {
                HttpURLConnection httpURLConnection = (HttpURLConnection) openConnection;
                int responseCode = httpURLConnection.getResponseCode();
                if (this.httpStatusField != null) {
                    document.put(this.httpStatusField, String.valueOf(responseCode));
                }
                if (responseCode >= 400) {
                    String str = "HTTP server responded " + responseCode + " " + httpURLConnection.getResponseMessage();
                    if (this.errorField != null) {
                        document.put(this.errorField, str);
                    }
                    throw new IOException(str);
                }
            }
            IOUtils.copy(openConnection.getInputStream(), byteArrayOutputStream);
            document.setRawData(byteArrayOutputStream.toByteArray());
        } catch (IOException e2) {
            if (this.failOnIOError) {
                throw new RuntimeException(e2);
            }
            log.warn("Could not fetch " + 0 + " for " + document.getId(), e2);
            if (this.errorField != null) {
                document.put(this.errorField, e2.getMessage());
            }
        }
        return new Document[]{document};
    }

    @SimpleProperty
    public String getLinkField() {
        return this.linkField;
    }

    @Override // org.jesterj.ingest.model.Configurable
    public String getName() {
        return this.name;
    }
}
