package ch.epfl.bbp.uima.pdf.cleanup;

import ch.epfl.bbp.StringUtils;
import java.io.LineNumberReader;
import java.io.StringReader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:ch/epfl/bbp/uima/pdf/cleanup/HyphenRemover.class */
public class HyphenRemover {
    private static final String WORD_SEPARATOR = "\\W+";
    private static Logger LOG = LoggerFactory.getLogger(HyphenRemover.class);
    public static final Pattern patternHyphen = Pattern.compile("[\\u002D\\u00AD\\u2010]\\s*$");
    private static final Pattern numberEnd = Pattern.compile("[0-9]-$");
    private static final Pattern numberStart = Pattern.compile("^[0-9]");
    private static final Pattern greekEnd = Pattern.compile("\\p{InGreek}-$");
    private static final Pattern greekStart = Pattern.compile("^\\p{InGreek}+");

    private static boolean shouldDehyphenate(String str, String str2) {
        boolean z = true;
        String str3 = "DH";
        if (str.length() <= 1) {
            z = false;
            str3 = "N11";
        } else if (str2.length() <= 1) {
            z = false;
            str3 = "N12";
        } else if (numberEnd.matcher(str).find()) {
            z = false;
            str3 = "N21";
        } else if (numberStart.matcher(str2).find()) {
            z = false;
            str3 = "N22";
        } else if (greekEnd.matcher(str).find()) {
            z = false;
            str3 = "N31";
        } else if (greekStart.matcher(str2).find()) {
            z = false;
            str3 = "N32";
        }
        if (z) {
            LOG.debug("\"" + str + "\",\"" + str2 + "\",\"" + str3 + "\",\"" + (str.substring(0, str.length() - 1) + str2) + "\"\n");
        } else {
            LOG.debug("\"" + str + "\",\"" + str2 + "\",\"" + str3 + "\",\"" + str + str2 + "\"\n");
        }
        return z;
    }

    public static String dehyphenate(LineNumberReader lineNumberReader, String str) {
        String readLine;
        StringBuilder sb = new StringBuilder();
        while (true) {
            try {
                String readLine2 = lineNumberReader.readLine();
                if (readLine2 == null) {
                    break;
                }
                String str2 = readLine2;
                Matcher matcher = patternHyphen.matcher(readLine2);
                while (matcher.find() && (readLine = lineNumberReader.readLine()) != null) {
                    String[] split = str2.split(WORD_SEPARATOR);
                    String[] split2 = readLine.trim().split(WORD_SEPARATOR);
                    if (split.length == 0) {
                        str2 = str2 + "\n" + readLine;
                    } else {
                        str2 = shouldDehyphenate(split[split.length - 1], split2[0]) ? str2.substring(0, str2.length() - 1).trim() + readLine.trim() : str2 + readLine;
                        matcher = patternHyphen.matcher(str2);
                    }
                }
                sb.append(str2.trim());
                sb.append("\n");
            } catch (Throwable th) {
                LOG.warn("failed to dehyphenate, docId " + str, StringUtils.print(th));
            }
        }
        return sb.toString();
    }

    public static String dehyphenate(String str, String str2) {
        return dehyphenate(new LineNumberReader(new StringReader(str)), str2);
    }
}
