package org.sakaiproject.search.component.adapter.contenthosting;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.sakaiproject.content.api.ContentResource;
import org.sakaiproject.exception.ServerOverloadException;
import org.sakaiproject.search.api.SearchUtils;
import org.sakaiproject.search.component.adapter.util.DigestHtml;
import org.sakaiproject.search.util.HTMLParser;
import org.w3c.tidy.Tidy;

/* loaded from: input_file:WEB-INF/lib/search-impl-1.4.0-b07.jar:org/sakaiproject/search/component/adapter/contenthosting/HtmlContentDigester.class */
public class HtmlContentDigester extends BaseContentDigester {
    private static Log log = LogFactory.getLog(HtmlContentDigester.class);
    private boolean useDirectParser = true;

    @Override // org.sakaiproject.search.component.adapter.contenthosting.ContentDigester
    public String getContent(ContentResource contentResource) {
        if (contentResource == null) {
            throw new RuntimeException("null contentResource passed to getContent");
        }
        if (this.useDirectParser) {
            try {
                String str = new String(contentResource.getContent(), "UTF-8");
                StringBuilder sb = new StringBuilder();
                HTMLParser hTMLParser = new HTMLParser(str);
                while (hTMLParser.hasNext()) {
                    String next = hTMLParser.next();
                    if (next.length() > 0) {
                        SearchUtils.appendCleanString(next, sb);
                    }
                }
                return sb.toString();
            } catch (ServerOverloadException e) {
                throw new RuntimeException("Failed get Resource Content ", e);
            } catch (UnsupportedEncodingException e2) {
                throw new RuntimeException("Failed get Resource Content ", e2);
            }
        }
        InputStream inputStream = null;
        Tidy tidy = new Tidy();
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        try {
            try {
                inputStream = contentResource.streamContent();
                log.info("Raw Content was " + inputStream);
                tidy.setQuiet(true);
                tidy.setShowWarnings(false);
                tidy.setOnlyErrors(true);
                tidy.parse(inputStream, byteArrayOutputStream);
                String sb2 = SearchUtils.appendCleanString(new String(byteArrayOutputStream.toByteArray(), "UTF-8"), (StringBuilder) null).toString();
                log.info(contentResource.getReference() + " Tidy Output was " + sb2);
                log.debug("Tidy Output was " + sb2);
                String digest = DigestHtml.digest(sb2);
                if (byteArrayOutputStream != null) {
                    try {
                        byteArrayOutputStream.close();
                    } catch (IOException e3) {
                        log.debug(e3);
                    }
                }
                if (inputStream != null) {
                    try {
                        inputStream.close();
                    } catch (IOException e4) {
                        log.debug(e4);
                    }
                }
                return digest;
            } catch (Throwable th) {
                if (byteArrayOutputStream != null) {
                    try {
                        byteArrayOutputStream.close();
                    } catch (IOException e5) {
                        log.debug(e5);
                    }
                }
                if (inputStream != null) {
                    try {
                        inputStream.close();
                    } catch (IOException e6) {
                        log.debug(e6);
                    }
                }
                throw th;
            }
        } catch (UnsupportedEncodingException e7) {
            throw new RuntimeException("Failed get Resource Content ", e7);
        } catch (ServerOverloadException e8) {
            throw new RuntimeException("Failed get Resource Content ", e8);
        }
    }

    @Override // org.sakaiproject.search.component.adapter.contenthosting.ContentDigester
    public Reader getContentReader(ContentResource contentResource) {
        return new StringReader(getContent(contentResource));
    }
}
