package net.sf.okapi.steps.encodingconversion;

import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.net.URI;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.sf.okapi.common.BOMNewlineEncodingDetector;
import net.sf.okapi.common.Event;
import net.sf.okapi.common.HTMLCharacterEntities;
import net.sf.okapi.common.IParameters;
import net.sf.okapi.common.UsingParameters;
import net.sf.okapi.common.Util;
import net.sf.okapi.common.exceptions.OkapiException;
import net.sf.okapi.common.exceptions.OkapiIOException;
import net.sf.okapi.common.pipeline.BasePipelineStep;
import net.sf.okapi.common.pipeline.annotations.StepParameterMapping;
import net.sf.okapi.common.pipeline.annotations.StepParameterType;
import net.sf.okapi.common.resource.Code;
import net.sf.okapi.common.resource.RawDocument;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@UsingParameters(Parameters.class)
/* loaded from: input_file:net/sf/okapi/steps/encodingconversion/EncodingConversionStep.class */
public class EncodingConversionStep extends BasePipelineStep {
    private static final int MAXBUF = 1024;
    private String outFormat;
    private CharsetEncoder outputEncoder;
    private boolean useCER;
    private CharBuffer buffer;
    private Pattern pattern;
    private Pattern xmlEncDecl;
    private Pattern xmlDecl;
    private Pattern htmlEncDecl;
    private Pattern htmlDecl;
    private Pattern htmlHead;
    private String prevBuf;
    private boolean isXML;
    private boolean isHTML;
    private URI outputURI;
    private URI inputURI;
    private String outputEncoding;
    private final Logger logger = LoggerFactory.getLogger(getClass());
    private Parameters params = new Parameters();
    private HTMLCharacterEntities entities = new HTMLCharacterEntities();

    @StepParameterMapping(parameterType = StepParameterType.OUTPUT_URI)
    public void setOutputURI(URI uri) {
        this.outputURI = uri;
    }

    @StepParameterMapping(parameterType = StepParameterType.INPUT_URI)
    public void setInputURI(URI uri) {
        this.inputURI = uri;
    }

    @StepParameterMapping(parameterType = StepParameterType.OUTPUT_ENCODING)
    public void setOutputEncoding(String str) {
        this.outputEncoding = str;
    }

    @Override // net.sf.okapi.common.pipeline.IPipelineStep
    public String getDescription() {
        return "Convert the character set encoding of a text-based file. Expects: raw document. Sends back: raw document.";
    }

    @Override // net.sf.okapi.common.pipeline.IPipelineStep
    public String getName() {
        return "Encoding Conversion";
    }

    @Override // net.sf.okapi.common.pipeline.BasePipelineStep, net.sf.okapi.common.pipeline.IPipelineStep
    public Parameters getParameters() {
        return this.params;
    }

    @Override // net.sf.okapi.common.pipeline.BasePipelineStep, net.sf.okapi.common.pipeline.IPipelineStep
    public void setParameters(IParameters iParameters) {
        this.params = (Parameters) iParameters;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // net.sf.okapi.common.pipeline.BasePipelineStep
    public Event handleStartBatch(Event event) {
        String str;
        this.buffer = CharBuffer.allocate(MAXBUF);
        this.xmlEncDecl = Pattern.compile("((<\\?xml)(.*?)(encoding(\\s*?)=(\\s*?)(\\'|\\\")))", 32);
        this.xmlDecl = Pattern.compile("((<\\?xml)(.*?)(version(\\s*?)=(\\s*?)(\\'|\\\")))", 32);
        this.htmlEncDecl = Pattern.compile("(<meta)([^>]*?)(content)(\\s*?)=(\\s*?)[\\'|\\\"](\\s*?)text/html(\\s*?);(\\s*?)charset(\\s*?)=(\\s*?)([^\\s]+?)(\\s|\\\"|\\')", 34);
        this.htmlDecl = Pattern.compile("(<html)", 2);
        this.htmlHead = Pattern.compile("<head>", 2);
        str = "";
        str = this.params.getUnescapeNCR() ? str + "&#([0-9]*?);|&#[xX]([0-9a-fA-F]*?);" : "";
        if (this.params.getUnescapeCER()) {
            if (str.length() > 0) {
                str = str + "|";
            }
            str = str + "(&\\w*?;)";
        }
        if (this.params.getUnescapeJava()) {
            if (str.length() > 0) {
                str = str + "|";
            }
            str = str + "(\\\\[Uu]([0-9a-fA-F]{1,4}))";
        }
        if (str.length() > 0) {
            this.pattern = Pattern.compile(str, 2);
            this.entities.ensureInitialization(false);
        } else {
            this.pattern = null;
        }
        this.useCER = false;
        switch (this.params.getEscapeNotation()) {
            case 0:
            default:
                this.outFormat = "&#x%X;";
                break;
            case 1:
                this.outFormat = "&#x%x;";
                break;
            case 2:
                this.outFormat = "&#%d;";
                break;
            case 3:
                this.useCER = true;
                this.entities.ensureInitialization(false);
                this.outFormat = "&#x%X;";
                break;
            case 4:
                this.outFormat = "\\u%04X";
                break;
            case 5:
                this.outFormat = "\\u%04x";
                break;
            case 6:
                this.outFormat = this.params.getUserFormat();
                break;
        }
        return event;
    }

    @Override // net.sf.okapi.common.pipeline.BasePipelineStep
    protected Event handleRawDocument(Event event) {
        File createTempFile;
        RawDocument rawDocument = (RawDocument) event.getResource();
        BufferedReader bufferedReader = null;
        OutputStreamWriter outputStreamWriter = null;
        try {
            try {
                this.isXML = false;
                this.isHTML = false;
                String extension = Util.getExtension(this.inputURI.getPath());
                if (!Util.isEmpty(extension)) {
                    this.isHTML = extension.toLowerCase().indexOf(".htm") == 0;
                    this.isXML = extension.equalsIgnoreCase(".xml");
                }
                BOMNewlineEncodingDetector bOMNewlineEncodingDetector = new BOMNewlineEncodingDetector(rawDocument.getStream(), rawDocument.getEncoding());
                bOMNewlineEncodingDetector.detectAndRemoveBom();
                rawDocument.setEncoding(bOMNewlineEncodingDetector.getEncoding());
                String encoding = rawDocument.getEncoding();
                if (!bOMNewlineEncodingDetector.isAutodetected()) {
                    BufferedReader bufferedReader2 = new BufferedReader(rawDocument.getReader());
                    bufferedReader2.read(this.buffer);
                    String checkDeclaration = checkDeclaration(encoding);
                    if (!checkDeclaration.equalsIgnoreCase(encoding)) {
                        encoding = checkDeclaration;
                    }
                    bufferedReader2.close();
                }
                BufferedReader bufferedReader3 = new BufferedReader(rawDocument.getReader());
                this.logger.info("Input encoding: {}", encoding);
                if (isLastOutputStep()) {
                    createTempFile = rawDocument.createOutputFile(this.outputURI);
                } else {
                    try {
                        createTempFile = File.createTempFile("~okapi-40_okp-enc_", ".tmp");
                    } catch (Throwable th) {
                        throw new OkapiIOException("Cannot create temporary output.", th);
                    }
                }
                OutputStreamWriter outputStreamWriter2 = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(createTempFile)), this.outputEncoding);
                this.outputEncoder = Charset.forName(this.outputEncoding).newEncoder();
                this.logger.info("Output encoding: {}", this.outputEncoding);
                Util.writeBOMIfNeeded(outputStreamWriter2, this.params.getBOMonUTF8(), this.outputEncoding);
                CharBuffer allocate = CharBuffer.allocate(1);
                boolean z = true;
                while (true) {
                    this.buffer.clear();
                    if (this.prevBuf != null) {
                        this.buffer.append((CharSequence) this.prevBuf);
                    }
                    boolean z2 = true;
                    if (bufferedReader3.read(this.buffer) == -1) {
                        if (this.prevBuf == null) {
                            break;
                        }
                        z2 = false;
                        this.prevBuf = null;
                        this.buffer.limit(this.buffer.position());
                    }
                    if (z) {
                        checkDeclaration(encoding);
                        z = false;
                    }
                    if (this.pattern != null) {
                        if (z2) {
                            checkSplitSequence();
                        }
                        unescape();
                    }
                    int position = this.buffer.position();
                    this.buffer.position(0);
                    for (int i = 0; i < position; i++) {
                        boolean canEncode = this.outputEncoder.canEncode(this.buffer.get(i));
                        if (!canEncode && this.params.getReportUnsupported()) {
                            this.logger.warn(String.format("Un-supported character: U+%04X ('%c')", Integer.valueOf(this.buffer.get(i)), Character.valueOf(this.buffer.get(i))));
                        }
                        if ((!this.params.getEscapeAll() || this.buffer.get(i) <= 127) && canEncode) {
                            outputStreamWriter2.write(this.buffer.get(i));
                        } else {
                            boolean z3 = false;
                            if (this.useCER) {
                                String name = this.entities.getName(this.buffer.get(i));
                                if (name == null) {
                                    z3 = true;
                                } else {
                                    outputStreamWriter2.write("&" + name + Code.EXTENDED_CODE_TYPE_DELIMITER);
                                }
                            } else if (!this.params.getUseBytes()) {
                                outputStreamWriter2.write(String.format(this.outFormat, Integer.valueOf(this.buffer.get(i))));
                            } else if (canEncode) {
                                allocate.put(0, this.buffer.get(i));
                                allocate.position(0);
                                ByteBuffer encode = this.outputEncoder.encode(allocate);
                                for (int i2 = 0; i2 < encode.limit(); i2++) {
                                    String str = this.outFormat;
                                    Object[] objArr = new Object[1];
                                    objArr[0] = Integer.valueOf(encode.get(i2) < 0 ? 255 ^ (encode.get(i2) ^ (-1)) : encode.get(i2));
                                    outputStreamWriter2.write(String.format(str, objArr));
                                }
                            } else {
                                z3 = true;
                            }
                            if (z3) {
                                outputStreamWriter2.write(String.format("&#x%X;", Integer.valueOf(this.buffer.get(i))));
                            }
                        }
                    }
                }
                bufferedReader3.close();
                BufferedReader bufferedReader4 = null;
                outputStreamWriter2.close();
                OutputStreamWriter outputStreamWriter3 = null;
                rawDocument.finalizeOutput();
                event.setResource(new RawDocument(createTempFile.toURI(), this.outputEncoding, rawDocument.getSourceLocale(), rawDocument.getTargetLocale()));
                if (0 != 0) {
                    try {
                        outputStreamWriter3.close();
                    } catch (IOException e) {
                        throw new OkapiException(e);
                    }
                }
                if (0 != 0) {
                    bufferedReader4.close();
                }
                return event;
            } catch (Throwable th2) {
                if (0 != 0) {
                    try {
                        outputStreamWriter.close();
                    } catch (IOException e2) {
                        throw new OkapiException(e2);
                    }
                }
                if (0 != 0) {
                    bufferedReader.close();
                }
                throw th2;
            }
        } catch (IOException e3) {
            throw new OkapiException(e3);
        }
    }

    private String checkDeclaration(String str) {
        int indexOf;
        this.buffer.limit(this.buffer.position());
        this.buffer.position(0);
        StringBuffer stringBuffer = new StringBuffer(this.buffer.toString());
        String str2 = str;
        Matcher matcher = this.xmlEncDecl.matcher(stringBuffer);
        if (matcher.find()) {
            this.isXML = true;
            int indexOf2 = stringBuffer.indexOf(String.valueOf(stringBuffer.charAt(matcher.end() - 1)), matcher.end());
            if (indexOf2 != -1) {
                str2 = stringBuffer.substring(matcher.end(), indexOf2);
                stringBuffer.replace(matcher.end(), indexOf2, this.outputEncoding);
            }
        } else {
            Matcher matcher2 = this.xmlDecl.matcher(stringBuffer);
            if (matcher2.find()) {
                this.isXML = true;
                str2 = BOMNewlineEncodingDetector.UTF_8;
                int indexOf3 = stringBuffer.indexOf(String.valueOf(stringBuffer.charAt(matcher2.end() - 1)), matcher2.end());
                if (indexOf3 != -1) {
                    stringBuffer.insert(indexOf3 + 1, " encoding=\"" + this.outputEncoding + "\"");
                }
            } else if (this.isXML) {
                str2 = BOMNewlineEncodingDetector.UTF_8;
                stringBuffer.insert(0, "<?xml version=\"1.0\" encoding=\"" + this.outputEncoding + "\" ?>");
            }
        }
        Matcher matcher3 = this.htmlEncDecl.matcher(stringBuffer);
        if (matcher3.find()) {
            this.isHTML = true;
            str2 = matcher3.group(11);
            int indexOf4 = stringBuffer.indexOf(str2, matcher3.start());
            stringBuffer.replace(indexOf4, indexOf4 + str2.length(), this.outputEncoding);
        } else if (this.isHTML) {
            Matcher matcher4 = this.htmlHead.matcher(stringBuffer);
            if (matcher4.find()) {
                stringBuffer.insert(matcher4.end(), String.format("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"></meta>", this.outputEncoding));
            } else {
                Matcher matcher5 = this.htmlDecl.matcher(stringBuffer);
                if (matcher5.find() && (indexOf = stringBuffer.indexOf(">", matcher5.end())) != -1) {
                    stringBuffer.insert(indexOf + 1, String.format("<head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"></meta></head>", this.outputEncoding));
                }
            }
        }
        int length = stringBuffer.length();
        if (length > this.buffer.capacity()) {
            this.buffer = CharBuffer.allocate(length);
        } else {
            this.buffer.clear();
        }
        this.buffer.append((CharSequence) stringBuffer.toString());
        this.buffer.limit(length);
        return str2;
    }

    private void checkSplitSequence() {
        int position = this.buffer.position();
        this.buffer.position(0);
        this.prevBuf = null;
        int i = 0;
        for (int i2 = position - 1; i2 >= 0 && i < 10; i2--) {
            if (this.buffer.charAt(i2) == '&' || this.buffer.charAt(i2) == '\\') {
                this.prevBuf = this.buffer.subSequence(i2, position).toString();
                position = i2;
                break;
            }
            i++;
        }
        this.buffer.position(position);
        this.buffer.limit(position);
    }

    private void unescape() {
        int lookupName;
        int position = this.buffer.position();
        this.buffer.position(0);
        Matcher matcher = this.pattern.matcher(this.buffer);
        int i = 0;
        StringBuilder sb = new StringBuilder(position);
        String str = null;
        while (matcher.find(i)) {
            if (matcher.start() > i) {
                sb.append((CharSequence) this.buffer.subSequence(i, matcher.start()));
            }
            i = matcher.end();
            str = matcher.group();
            int indexOf = str.indexOf(117);
            if (str.indexOf(120) == 2) {
                lookupName = Integer.parseInt(str.substring(3, str.length() - 1), 16);
            } else if (indexOf == 1 && str.charAt(indexOf - 1) == '\\') {
                lookupName = Integer.parseInt(str.substring(2), 16);
            } else if (str.indexOf(35) == 1) {
                lookupName = Integer.parseInt(str.substring(2, str.length() - 1));
            } else {
                str = str.substring(1, str.length() - 1);
                lookupName = this.entities.lookupName(str);
            }
            switch (lookupName) {
                case -1:
                case 34:
                case 38:
                case 39:
                case 60:
                case 62:
                    sb.append(matcher.group());
                    break;
                default:
                    sb.append((char) lookupName);
                    break;
            }
        }
        if (str == null) {
            this.buffer.position(position);
            return;
        }
        if (i < position) {
            sb.append((CharSequence) this.buffer.subSequence(i, position));
        }
        this.buffer.clear();
        this.buffer.append((CharSequence) sb.toString(), 0, sb.length());
    }
}
