package org.archive.extractor;

import java.util.LinkedList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.httpclient.URIException;
import org.archive.crawler.extractor.Link;
import org.archive.net.UURIFactory;
import org.archive.util.TextUtils;

/* loaded from: input_file:site-search/heritrix/heritrix-1.12.1.jar:org/archive/extractor/RegexpJSLinkExtractor.class */
public class RegexpJSLinkExtractor extends CharSequenceLinkExtractor {
    static final String AMP = "&";
    static final String ESCAPED_AMP = "&amp;";
    static final String WHITESPACE = "\\s";
    static final Pattern JAVASCRIPT_STRING_EXTRACTOR = Pattern.compile("(\\\\{0,8}+(?:\"|'))(.+?)(?:\\1)");
    static final Pattern STRING_URI_DETECTOR = Pattern.compile("(?:\\w|[\\.]{0,2}/)[\\S&&[^<>]]*(?:\\.|/)[\\S&&[^<>]]*(?:\\w|/)");
    Matcher strings;
    LinkedList<Matcher> matcherStack = new LinkedList<>();

    @Override // org.archive.extractor.CharSequenceLinkExtractor
    protected boolean findNextLink() {
        if (this.strings == null) {
            this.strings = JAVASCRIPT_STRING_EXTRACTOR.matcher(this.sourceContent);
        }
        while (this.strings != null) {
            while (this.strings.find()) {
                CharSequence subSequence = this.sourceContent.subSequence(this.strings.start(2), this.strings.end(2));
                Matcher matcher = STRING_URI_DETECTOR.matcher(subSequence);
                if (subSequence.length() > 2083 || !matcher.matches()) {
                    this.matcherStack.addFirst(this.strings);
                    this.strings = JAVASCRIPT_STRING_EXTRACTOR.matcher(subSequence);
                } else {
                    String replaceAll = TextUtils.replaceAll(ESCAPED_AMP, matcher.group(), AMP);
                    try {
                        this.next.add(new Link(this.source, UURIFactory.getInstance(this.source, replaceAll), Link.JS_MISC, 'X'));
                        return true;
                    } catch (URIException e) {
                        this.extractErrorListener.noteExtractError(e, this.source, replaceAll);
                    }
                }
            }
            this.strings = this.matcherStack.isEmpty() ? null : this.matcherStack.removeFirst();
        }
        return false;
    }

    @Override // org.archive.extractor.CharSequenceLinkExtractor, org.archive.extractor.LinkExtractor
    public void reset() {
        super.reset();
        this.matcherStack.clear();
        this.strings = null;
    }

    protected static CharSequenceLinkExtractor newDefaultInstance() {
        return new RegexpJSLinkExtractor();
    }
}
