package org.archive.crawler.util;

import it.unimi.dsi.fastutil.longs.LongIterator;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.PrintWriter;
import java.util.Iterator;
import java.util.TreeSet;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.archive.crawler.datamodel.CandidateURI;
import org.archive.crawler.datamodel.UriUniqFilter;
import org.archive.util.fingerprint.ArrayLongFPCache;
import st.ata.util.FPGenerator;

/* loaded from: input_file:site-search/heritrix/heritrix-1.12.1.jar:org/archive/crawler/util/FPMergeUriUniqFilter.class */
public abstract class FPMergeUriUniqFilter implements UriUniqFilter {
    private static Logger LOGGER = Logger.getLogger(FPMergeUriUniqFilter.class.getName());
    protected UriUniqFilter.HasUriReceiver receiver;
    protected PrintWriter profileLog;
    public static final int DEFAULT_MAX_PENDING = 10000;
    public static final long FLUSH_DELAY_FACTOR = 100;
    protected long quickDuplicateCount = 0;
    protected long quickDupAtLast = 0;
    protected long pendDuplicateCount = 0;
    protected long pendDupAtLast = 0;
    protected long mergeDuplicateCount = 0;
    protected long mergeDupAtLast = 0;
    protected TreeSet<PendingItem> pendingSet = new TreeSet<>();
    protected int maxPending = DEFAULT_MAX_PENDING;
    protected long nextFlushAllowableAfter = 0;
    protected ArrayLongFPCache quickCache = new ArrayLongFPCache();

    /* loaded from: input_file:site-search/heritrix/heritrix-1.12.1.jar:org/archive/crawler/util/FPMergeUriUniqFilter$PendingItem.class */
    public class PendingItem implements Comparable {
        long fp;
        CandidateURI caUri;

        public PendingItem(long j, CandidateURI candidateURI) {
            this.fp = j;
            this.caUri = candidateURI;
        }

        @Override // java.lang.Comparable
        public int compareTo(Object obj) {
            PendingItem pendingItem = (PendingItem) obj;
            if (this.fp < pendingItem.fp) {
                return -1;
            }
            return this.fp == pendingItem.fp ? 0 : 1;
        }
    }

    public FPMergeUriUniqFilter() {
        String property = System.getProperty(FPMergeUriUniqFilter.class.getName() + ".profileLogFile");
        if (property != null) {
            setProfileLog(new File(property));
        }
    }

    public void setMaxPending(int i) {
        this.maxPending = i;
    }

    @Override // org.archive.crawler.datamodel.UriUniqFilter
    public long pending() {
        return this.pendingSet.size();
    }

    @Override // org.archive.crawler.datamodel.UriUniqFilter
    public void setDestination(UriUniqFilter.HasUriReceiver hasUriReceiver) {
        this.receiver = hasUriReceiver;
    }

    protected void profileLog(String str) {
        if (this.profileLog != null) {
            this.profileLog.println(str);
        }
    }

    @Override // org.archive.crawler.datamodel.UriUniqFilter
    public synchronized void add(String str, CandidateURI candidateURI) {
        profileLog(str);
        long createFp = createFp(str);
        if (!quickCheck(createFp)) {
            this.quickDuplicateCount++;
            return;
        }
        pend(createFp, candidateURI);
        if (this.pendingSet.size() >= this.maxPending) {
            flush();
        }
    }

    protected void pend(long j, CandidateURI candidateURI) {
        if (count() != 0) {
            if (this.pendingSet.add(new PendingItem(j, candidateURI))) {
                return;
            }
            this.pendDuplicateCount++;
        } else if (!this.pendingSet.add(new PendingItem(j, null))) {
            this.pendDuplicateCount++;
        } else if (candidateURI != null) {
            this.receiver.receive(candidateURI);
        }
    }

    private boolean quickCheck(long j) {
        return this.quickCache.add(j);
    }

    public static long createFp(CharSequence charSequence) {
        return FPGenerator.std64.fp(charSequence);
    }

    @Override // org.archive.crawler.datamodel.UriUniqFilter
    public void addNow(String str, CandidateURI candidateURI) {
        add(str, candidateURI);
        flush();
    }

    @Override // org.archive.crawler.datamodel.UriUniqFilter
    public void addForce(String str, CandidateURI candidateURI) {
        add(str, null);
        this.receiver.receive(candidateURI);
    }

    @Override // org.archive.crawler.datamodel.UriUniqFilter
    public void note(String str) {
        add(str, null);
    }

    @Override // org.archive.crawler.datamodel.UriUniqFilter
    public void forget(String str, CandidateURI candidateURI) {
        throw new UnsupportedOperationException();
    }

    @Override // org.archive.crawler.datamodel.UriUniqFilter
    public synchronized long requestFlush() {
        if (System.currentTimeMillis() > this.nextFlushAllowableAfter) {
            return flush();
        }
        return -1L;
    }

    public synchronized long flush() {
        if (pending() == 0) {
            return 0L;
        }
        long currentTimeMillis = System.currentTimeMillis();
        long j = 0;
        long j2 = 0;
        Iterator<PendingItem> it2 = this.pendingSet.iterator();
        LongIterator beginFpMerge = beginFpMerge();
        PendingItem next = it2.hasNext() ? it2.next() : null;
        Long next2 = beginFpMerge.hasNext() ? beginFpMerge.next() : null;
        while (true) {
            if (next2 != null && (next == null || next2.longValue() <= next.fp)) {
                addNewFp(next2.longValue());
                if (next != null && next2.longValue() == next.fp) {
                    this.mergeDuplicateCount++;
                }
                if (beginFpMerge.hasNext()) {
                    next2 = beginFpMerge.next();
                } else {
                    next2 = null;
                }
            }
            while (true) {
                if (next == null || (next2 != null && next2.longValue() <= next.fp)) {
                    break;
                }
                addNewFp(next.fp);
                if (next.caUri != null) {
                    j++;
                    this.receiver.receive(next.caUri);
                } else {
                    j2++;
                }
                if (!it2.hasNext()) {
                    next = null;
                    break;
                }
                next = it2.next();
            }
            if (next2 == null) {
                break;
            }
        }
        long currentTimeMillis2 = System.currentTimeMillis() - currentTimeMillis;
        this.nextFlushAllowableAfter = currentTimeMillis + (100 * currentTimeMillis2);
        if (LOGGER.isLoggable(Level.INFO)) {
            long j3 = this.mergeDuplicateCount - this.mergeDupAtLast;
            long j4 = this.pendDuplicateCount - this.pendDupAtLast;
            long j5 = this.quickDuplicateCount - this.quickDupAtLast;
            LOGGER.info("flush took " + currentTimeMillis2 + "ms: " + j + " adds, " + j2 + " fpOnlydds, " + j3 + " mergeDups, " + j4 + " pendDups, " + j5 + " quickDups ");
            if (j == 0 && j2 == 0 && j3 == 0 && j4 == 0 && j5 == 0) {
                LOGGER.info("that's odd");
            }
        }
        this.mergeDupAtLast = this.mergeDuplicateCount;
        this.pendDupAtLast = this.pendDuplicateCount;
        this.quickDupAtLast = this.quickDuplicateCount;
        this.pendingSet.clear();
        finishFpMerge();
        return j;
    }

    protected abstract LongIterator beginFpMerge();

    protected abstract void addNewFp(long j);

    protected abstract void finishFpMerge();

    @Override // org.archive.crawler.datamodel.UriUniqFilter
    public void close() {
        if (this.profileLog != null) {
            this.profileLog.close();
        }
    }

    @Override // org.archive.crawler.datamodel.UriUniqFilter
    public void setProfileLog(File file) {
        try {
            this.profileLog = new PrintWriter(new BufferedOutputStream(new FileOutputStream(file)));
        } catch (FileNotFoundException e) {
            throw new RuntimeException(e);
        }
    }
}
