package net.sf.okapi.filters.html;

import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.EnumSet;
import net.htmlparser.jericho.Attribute;
import net.htmlparser.jericho.EndTag;
import net.htmlparser.jericho.EndTagType;
import net.htmlparser.jericho.Segment;
import net.htmlparser.jericho.StartTag;
import net.htmlparser.jericho.StartTagType;
import net.htmlparser.jericho.Tag;
import net.sf.okapi.common.IParameters;
import net.sf.okapi.common.MimeTypeMapper;
import net.sf.okapi.common.UsingParameters;
import net.sf.okapi.common.encoder.HtmlEncoder;
import net.sf.okapi.common.exceptions.OkapiIOException;
import net.sf.okapi.common.filters.FilterConfiguration;
import net.sf.okapi.common.filters.PropertyTextUnitPlaceholder;
import net.sf.okapi.common.resource.ITextUnit;
import net.sf.okapi.common.resource.Property;
import net.sf.okapi.common.resource.RawDocument;
import net.sf.okapi.common.resource.TextFragment;
import net.sf.okapi.common.skeleton.GenericSkeleton;
import net.sf.okapi.common.skeleton.ISkeletonWriter;
import net.sf.okapi.filters.abstractmarkup.AbstractMarkupFilter;
import net.sf.okapi.filters.abstractmarkup.ExtractionRuleState;
import net.sf.okapi.filters.abstractmarkup.config.TaggedFilterConfiguration;
import net.sf.okapi.lib.xliff2.Const;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.its.Main;

@UsingParameters(Parameters.class)
/* loaded from: input_file:net/sf/okapi/filters/html/HtmlFilter.class */
public class HtmlFilter extends AbstractMarkupFilter {
    private final Logger LOGGER = LoggerFactory.getLogger(getClass());
    private Parameters parameters;
    private RawDocument tempSourceInput;
    private File tempSourceFile;

    public HtmlFilter() {
        setMimeType(MimeTypeMapper.HTML_MIME_TYPE);
        setFilterWriter(createFilterWriter());
        setParameters(new Parameters());
        setName("okf_html");
        setDisplayName("HTML/XHTML Filter");
        addConfiguration(new FilterConfiguration(getName(), MimeTypeMapper.HTML_MIME_TYPE, getClass().getName(), "HTML", "HTML or XHTML documents", Parameters.NONWELLFORMED_PARAMETERS, ".html;.htm;"));
        addConfiguration(new FilterConfiguration(getName() + "-wellFormed", MimeTypeMapper.XHTML_MIME_TYPE, getClass().getName(), "HTML (Well-Formed)", "XHTML and well-formed HTML documents", Parameters.WELLFORMED_PARAMETERS, ".xhtml"));
    }

    @Override // net.sf.okapi.common.filters.AbstractFilter, net.sf.okapi.common.filters.IFilter
    public ISkeletonWriter createSkeletonWriter() {
        return new HtmlSkeletonWriter();
    }

    @Override // net.sf.okapi.filters.abstractmarkup.AbstractMarkupFilter, net.sf.okapi.common.filters.AbstractFilter, net.sf.okapi.common.filters.IFilter
    public void open(RawDocument rawDocument, boolean z) {
        String detectEncoding = detectEncoding(rawDocument);
        setCurrentDocName(rawDocument.getInputURI() == null ? "" : rawDocument.getInputURI().getPath());
        if (!getConfig().shouldCleanupHtml()) {
            super.open(rawDocument, z);
            return;
        }
        try {
            this.tempSourceInput = StreamedSourceCopy.htmlTidiedRewrite(rawDocument, isDocumentEncoding(), detectEncoding, isBOM());
            this.tempSourceFile = new File(this.tempSourceInput.getInputURI());
            super.open(this.tempSourceInput, z);
        } catch (IOException e) {
            throw new OkapiIOException("Error generating tidied source temp file", e);
        }
    }

    @Override // net.sf.okapi.filters.abstractmarkup.AbstractMarkupFilter, net.sf.okapi.common.filters.AbstractFilter, net.sf.okapi.common.filters.IFilter, java.lang.AutoCloseable
    public void close() {
        super.close();
        if (this.tempSourceFile != null) {
            this.tempSourceFile.delete();
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // net.sf.okapi.filters.abstractmarkup.AbstractMarkupFilter
    public void startFilter() {
        super.startFilter();
        if (!getConfig().isGlobalPreserveWhitespace()) {
            this.LOGGER.debug("By default the HTML filter will collapse whitespace unless overridden in the configuration");
        }
        getEventBuilder().initializeCodeFinder(getConfig().isUseCodeFinder(), getConfig().getCodeFinderRules());
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // net.sf.okapi.filters.abstractmarkup.AbstractMarkupFilter
    public void endFilter() {
        super.endFilter();
        if (this.tempSourceInput != null) {
            this.tempSourceInput.close();
            if (new File(this.tempSourceInput.getInputURI()).delete()) {
                return;
            }
            this.LOGGER.warn("Couldn't delete HTML Filter tidied temp file");
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // net.sf.okapi.filters.abstractmarkup.AbstractMarkupFilter
    public void preProcess(Segment segment) {
        super.preProcess(segment);
        if (getConfig().isWellformed()) {
            return;
        }
        boolean isInline = isInline(segment);
        if (!getEventBuilder().isCurrentTextUnit() || isInline) {
            return;
        }
        ExtractionRuleState.ExtractionRule extractionRule = new ExtractionRuleState.ExtractionRule("", TaggedFilterConfiguration.RULE_TYPE.RULE_NOT_FOUND, true);
        if (segment instanceof Tag) {
            extractionRule = getMainElementRule((Tag) segment);
        }
        if ((extractionRule.ruleType == TaggedFilterConfiguration.RULE_TYPE.TEXT_UNIT_ELEMENT || extractionRule.ruleType == TaggedFilterConfiguration.RULE_TYPE.GROUP_ELEMENT) && (segment instanceof StartTag)) {
            getEventBuilder().endTextUnit();
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // net.sf.okapi.filters.abstractmarkup.AbstractMarkupFilter
    public void updateStartTagRuleState(StartTag startTag, ExtractionRuleState.ExtractionRule extractionRule) {
        if (extractionRule.ruleType == TaggedFilterConfiguration.RULE_TYPE.INLINE_ELEMENT) {
            if (!getRuleState().isInlineExcludedState() || (startTag.getAttributeValue("translate") != null && startTag.getAttributeValue("translate").equalsIgnoreCase(Const.VALUE_YES))) {
                getRuleState().pushInlineExcludedIncludedRule(new ExtractionRuleState.ExtractionRule(startTag.getName(), TaggedFilterConfiguration.RULE_TYPE.INLINE_INCLUDED_ELEMENT, true));
            } else {
                getRuleState().pushInlineExcludedIncludedRule(new ExtractionRuleState.ExtractionRule(startTag.getName(), TaggedFilterConfiguration.RULE_TYPE.INLINE_EXCLUDED_ELEMENT, true));
            }
        }
        super.updateStartTagRuleState(startTag, extractionRule);
    }

    @Override // net.sf.okapi.filters.abstractmarkup.AbstractMarkupFilter
    public ExtractionRuleState.ExtractionRule getRuleTypeFromStartTag(EndTag endTag, EnumSet<TaggedFilterConfiguration.RULE_TYPE> enumSet) {
        ExtractionRuleState.ExtractionRule ruleTypeFromStartTag = super.getRuleTypeFromStartTag(endTag, enumSet);
        if (!getConfig().isWellformed() && ruleTypeFromStartTag == null) {
            ruleTypeFromStartTag = enumSet.contains(TaggedFilterConfiguration.RULE_TYPE.INLINE_ELEMENT) ? new ExtractionRuleState.ExtractionRule(endTag.toString().toLowerCase(), TaggedFilterConfiguration.RULE_TYPE.INLINE_ELEMENT, true) : new ExtractionRuleState.ExtractionRule(endTag.toString().toLowerCase(), TaggedFilterConfiguration.RULE_TYPE.RULE_NOT_FOUND, true);
        }
        return ruleTypeFromStartTag;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // net.sf.okapi.filters.abstractmarkup.AbstractMarkupFilter
    public void updateEndTagRuleState(EndTag endTag, ExtractionRuleState.ExtractionRule extractionRule) {
        if (extractionRule != null && extractionRule.ruleType == TaggedFilterConfiguration.RULE_TYPE.INLINE_ELEMENT) {
            if (getRuleState().isInlineExcludedState()) {
                extractionRule = getRuleState().peekExcludedIncludedInlineRule();
            } else if (getRuleState().peekExcludedIncludedInlineRule() != null) {
                getRuleState().popInlineExcludedIncludedRule();
            }
            if (getRuleState().peekInlineRule() != null) {
                getRuleState().popInlineRule();
            }
        }
        super.updateEndTagRuleState(endTag, extractionRule);
    }

    @Override // net.sf.okapi.filters.abstractmarkup.AbstractMarkupFilter
    protected void handleEndTag(EndTag endTag) {
        ExtractionRuleState.ExtractionRule mainElementRule = getMainElementRule(endTag);
        if (getRuleState().isExcludedState()) {
            addToDocumentPart(endTag.toString());
            updateEndTagRuleState(endTag, mainElementRule);
            return;
        }
        if (getRuleState().isInlineExcludedState()) {
            updateEndTagRuleState(endTag, mainElementRule);
            getEventBuilder().appendCodeInlineExcludedData(endTag.toString());
            return;
        }
        switch ((mainElementRule == null || !mainElementRule.ruleApplies) ? TaggedFilterConfiguration.RULE_TYPE.RULE_NOT_FOUND : mainElementRule.ruleType) {
            case INLINE_ELEMENT:
            case INLINE_INCLUDED_ELEMENT:
                handleInlineElement(endTag);
                break;
            case GROUP_ELEMENT:
                handleGroupElement(endTag);
                break;
            case TEXT_UNIT_ELEMENT:
                handleTextUnitElement(endTag);
                break;
            default:
                addToDocumentPart(endTag.toString());
                break;
        }
        updateEndTagRuleState(endTag, mainElementRule);
    }

    private void handleTextUnitElement(EndTag endTag) {
        if (!isInsideTextRun()) {
            endTextUnit(new GenericSkeleton(endTag.toString()));
            return;
        }
        ITextUnit textUnit = peekTempEvent().getTextUnit();
        if (textUnit.getSource().hasCode() || textUnit.getSource().hasText(isPreserveWhitespace())) {
            endTextUnit(new GenericSkeleton(endTag.toString()));
        } else {
            getEventBuilder().convertTempTextUnitToDocumentPart();
            addToDocumentPart(endTag.toString());
        }
    }

    private void handleGroupElement(EndTag endTag) {
        if (isInsideTextRun()) {
            getEventBuilder().endTextUnit();
        }
        if (getEventBuilder().isCurrentGroup()) {
            endGroup(new GenericSkeleton(endTag.toString()));
        } else {
            addToDocumentPart(endTag.toString());
        }
    }

    private void handleInlineElement(EndTag endTag) {
        if (canStartNewTextUnit()) {
            startTextUnit();
        }
        addCodeToCurrentTextUnit(endTag);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // net.sf.okapi.filters.abstractmarkup.AbstractMarkupFilter
    public PropertyTextUnitPlaceholder createPropertyTextUnitPlaceholder(PropertyTextUnitPlaceholder.PlaceholderAccessType placeholderAccessType, String str, String str2, Tag tag, Attribute attribute) {
        String normalizeAttributeName = normalizeAttributeName(str, str2, tag);
        if (!isMetaCharset(str, str2, tag) || !str2.toLowerCase().contains("charset=")) {
            return super.createPropertyTextUnitPlaceholder(placeholderAccessType, str, getEventBuilder().normalizeHtmlText(str2, true, isPreserveWhitespace()), tag, attribute);
        }
        int begin = attribute.getBegin() - tag.getBegin();
        int end = attribute.getEnd() - tag.getBegin();
        int begin2 = (attribute.getValueSegment().getBegin() + (str2.toLowerCase().lastIndexOf("charset=") + "charset=".length())) - tag.getBegin();
        int end2 = attribute.getValueSegment().getEnd() - tag.getBegin();
        return new PropertyTextUnitPlaceholder(placeholderAccessType, normalizeAttributeName, tag.toString().substring(begin2, end2), begin, end, begin2, end2);
    }

    @Override // net.sf.okapi.filters.abstractmarkup.AbstractMarkupFilter
    protected String normalizeAttributeName(String str, String str2, Tag tag) {
        if (isMetaCharset(str, str2, tag)) {
            return Property.ENCODING;
        }
        if (tag.getName().equalsIgnoreCase(Const.ELEM_CUSTPROP) && str.equalsIgnoreCase(HtmlEncoder.CHARSET)) {
            return Property.ENCODING;
        }
        if (tag.getName().equalsIgnoreCase(Const.ELEM_CUSTPROP) && str.equalsIgnoreCase("content")) {
            StartTag startTag = (StartTag) tag;
            if (startTag.getAttributeValue("http-equiv") != null && startTag.getAttributeValue("http-equiv").equalsIgnoreCase("Content-Language")) {
                return Property.LANGUAGE;
            }
        }
        return (str.equalsIgnoreCase(Main.DC_LANGUAGEINFORMATION) || str.equalsIgnoreCase("xml:lang")) ? Property.LANGUAGE : str;
    }

    private boolean isMetaCharset(String str, String str2, Tag tag) {
        if (!tag.getName().equalsIgnoreCase(Const.ELEM_CUSTPROP) || !str.equalsIgnoreCase("content")) {
            return false;
        }
        StartTag startTag = (StartTag) tag;
        return startTag.getAttributeValue("http-equiv") != null && startTag.getAttributeValue("content") != null && startTag.getAttributeValue("http-equiv").equalsIgnoreCase("Content-Type") && startTag.getAttributeValue("content").toLowerCase().contains("charset=");
    }

    @Override // net.sf.okapi.filters.abstractmarkup.AbstractMarkupFilter
    protected TaggedFilterConfiguration getConfig() {
        return this.parameters.getTaggedConfig();
    }

    @Override // net.sf.okapi.common.filters.AbstractFilter, net.sf.okapi.common.filters.IFilter
    public void setParameters(IParameters iParameters) {
        this.parameters = (Parameters) iParameters;
    }

    @Override // net.sf.okapi.common.filters.AbstractFilter, net.sf.okapi.common.filters.IFilter
    public Parameters getParameters() {
        return this.parameters;
    }

    public void setParametersFromURL(URL url) {
        this.parameters = new Parameters(url);
    }

    public void setParametersFromFile(File file) {
        this.parameters = new Parameters(file);
    }

    public void setParametersFromString(String str) {
        this.parameters = new Parameters(str);
    }

    @Override // net.sf.okapi.filters.abstractmarkup.AbstractMarkupFilter
    protected TextFragment.TagType determineTagType(Tag tag) {
        TextFragment.TagType tagType;
        if (tag.getTagType() == StartTagType.NORMAL || tag.getTagType() == StartTagType.UNREGISTERED) {
            StartTag startTag = (StartTag) tag;
            tagType = startTag.isSyntacticalEmptyElementTag() ? TextFragment.TagType.PLACEHOLDER : startTag.isEndTagRequired() ? getRuleState().isInlineExcludedState() ? TextFragment.TagType.PLACEHOLDER : TextFragment.TagType.OPENING : getConfig().isWellformed() ? TextFragment.TagType.OPENING : TextFragment.TagType.PLACEHOLDER;
        } else {
            tagType = (tag.getTagType() == EndTagType.NORMAL || tag.getTagType() == EndTagType.UNREGISTERED) ? TextFragment.TagType.CLOSING : TextFragment.TagType.PLACEHOLDER;
        }
        return tagType;
    }
}
