package org.archive.crawler.url.canonicalize;

import java.util.regex.Pattern;

/* loaded from: input_file:site-search/heritrix/heritrix-1.12.1.jar:org/archive/crawler/url/canonicalize/StripSessionIDs.class */
public class StripSessionIDs extends BaseRule {
    private static final long serialVersionUID = -3737115200690525641L;
    private static final String DESCRIPTION = "Strip known session IDs. Use this rule to remove all of a set of known session IDs. For example, this rule will strip JSESSIONID and its value from 'http://archive.org/index.html?JSESSIONID=DDDSSE233232333355FFSXXXXDSDSDS'.  The resulting canonicalization returns 'http://archive.org/index.html'. This rule strips JSESSIONID, ASPSESSIONID, PHPSESSID, and 'sid' session ids.";
    private static final Pattern BASE_PATTERN = Pattern.compile("^(.+)(?:(?:(?:jsessionid)|(?:phpsessid))=[0-9a-zA-Z]{32})(?:&(.*))?$", 2);
    private static final Pattern SID_PATTERN = Pattern.compile("^(.+)(?:sid=[0-9a-zA-Z]{32})(?:&(.*))?$", 2);
    private static final Pattern ASPSESSION_PATTERN = Pattern.compile("^(.+)(?:ASPSESSIONID[a-zA-Z]{8}=[a-zA-Z]{24})(?:&(.*))?$", 2);

    public StripSessionIDs(String str) {
        super(str, DESCRIPTION);
    }

    @Override // org.archive.crawler.url.CanonicalizationRule
    public String canonicalize(String str, Object obj) {
        String doStripRegexMatch = doStripRegexMatch(str, BASE_PATTERN.matcher(str));
        String doStripRegexMatch2 = doStripRegexMatch(doStripRegexMatch, SID_PATTERN.matcher(doStripRegexMatch));
        return doStripRegexMatch(doStripRegexMatch2, ASPSESSION_PATTERN.matcher(doStripRegexMatch2));
    }
}
