package org.archive.crawler.url;

import java.io.File;
import org.apache.commons.httpclient.URIException;
import org.archive.crawler.datamodel.CrawlOrder;
import org.archive.crawler.settings.MapType;
import org.archive.crawler.settings.XMLSettingsHandler;
import org.archive.crawler.url.canonicalize.FixupQueryStr;
import org.archive.crawler.url.canonicalize.LowercaseRule;
import org.archive.crawler.url.canonicalize.StripSessionIDs;
import org.archive.crawler.url.canonicalize.StripUserinfoRule;
import org.archive.crawler.url.canonicalize.StripWWWRule;
import org.archive.net.UURIFactory;
import org.archive.util.TmpDirTestCase;

/* loaded from: input_file:site-search/heritrix/heritrix-1.12.1.jar:org/archive/crawler/url/CanonicalizerTest.class */
public class CanonicalizerTest extends TmpDirTestCase {
    private File orderFile;
    protected XMLSettingsHandler settingsHandler;
    private MapType rules = null;

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.archive.util.TmpDirTestCase, junit.framework.TestCase
    public void setUp() throws Exception {
        super.setUp();
        this.orderFile = new File(getTmpDir(), getClass().getName() + ".order.xml");
        this.settingsHandler = new XMLSettingsHandler(this.orderFile);
        this.settingsHandler.initialize();
        this.rules = (MapType) this.settingsHandler.getSettingsObject(null).getModule(CrawlOrder.ATTR_NAME).getAttribute(CrawlOrder.ATTR_RULES);
        this.rules.addElement(null, new LowercaseRule("lowercase"));
        this.rules.addElement(null, new StripUserinfoRule("userinfo"));
        this.rules.addElement(null, new StripWWWRule("www"));
        this.rules.addElement(null, new StripSessionIDs("ids"));
        this.rules.addElement(null, new FixupQueryStr("querystr"));
    }

    public void testCanonicalize() throws URIException {
        assertTrue("Mangled original", "http://archive.org/index.html".equals(Canonicalizer.canonicalize(UURIFactory.getInstance("http://archive.org/index.html"), this.rules.iterator(UURIFactory.getInstance("http://archive.org/index.html")))));
        assertTrue("Mangled www", "http://archive.org/index.html".equals(Canonicalizer.canonicalize(UURIFactory.getInstance("http://www.archive.org/index.html"), this.rules.iterator(UURIFactory.getInstance("http://archive.org/index.html")))));
        assertTrue("Mangled sessionid", "http://archive.org/index.html".equals(Canonicalizer.canonicalize(UURIFactory.getInstance("http://www.archive.org/index.html?jsessionid=01234567890123456789012345678901"), this.rules.iterator(UURIFactory.getInstance("http://archive.org/index.html")))));
        assertTrue("Mangled sessionid", "http://archive.org/index.html".equals(Canonicalizer.canonicalize(UURIFactory.getInstance("http://www.archive.org/index.html?jsessionid=01234567890123456789012345678901"), this.rules.iterator(UURIFactory.getInstance("http://archive.org/index.html")))));
    }
}
