package net.sf.okapi.steps.tokenization;

import net.sf.okapi.common.Event;
import net.sf.okapi.common.EventType;
import net.sf.okapi.common.LocaleFilter;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.Util;
import net.sf.okapi.common.resource.ITextUnit;
import net.sf.okapi.common.resource.StartDocument;
import net.sf.okapi.common.resource.TextContainer;
import net.sf.okapi.common.resource.TextFragment;
import net.sf.okapi.common.resource.TextUnitUtil;
import net.sf.okapi.steps.tokenization.common.TokensAnnotation;
import net.sf.okapi.steps.tokenization.tokens.Tokens;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:net/sf/okapi/steps/tokenization/Tokenizer.class */
public class Tokenizer {
    protected static TokenizationStep ts = new TokenizationStep();

    protected static synchronized Tokens tokenizeString(String str, LocaleId localeId, String... strArr) {
        Tokens tokens = new Tokens();
        if (ts == null) {
            return tokens;
        }
        Parameters parameters = ts.getParameters();
        parameters.reset();
        parameters.tokenizeSource = true;
        parameters.tokenizeTargets = false;
        parameters.setLocaleFilter(LocaleFilter.anyOf(new LocaleId[]{localeId}));
        parameters.setTokenNames(strArr);
        ts.handleEvent(new Event(EventType.START_BATCH));
        StartDocument startDocument = new StartDocument("tokenization");
        startDocument.setLocale(localeId);
        startDocument.setMultilingual(false);
        ts.handleEvent(new Event(EventType.START_DOCUMENT, startDocument));
        ITextUnit buildTU = TextUnitUtil.buildTU(str);
        ts.handleEvent(new Event(EventType.TEXT_UNIT, buildTU));
        TokensAnnotation tokensAnnotation = (TokensAnnotation) TextUnitUtil.getSourceAnnotation(buildTU, TokensAnnotation.class);
        if (tokensAnnotation != null) {
            tokens.addAll(tokensAnnotation.getTokens());
        }
        ts.handleEvent(new Event(EventType.END_BATCH));
        return tokens;
    }

    private static Tokens doTokenize(Object obj, LocaleId localeId, String... strArr) {
        if (obj == null) {
            return null;
        }
        if (Util.isNullOrEmpty(localeId)) {
            LoggerFactory.getLogger(Tokenizer.class).warn("Language is not set, cannot tokenize.");
            return null;
        }
        if (obj instanceof ITextUnit) {
            ITextUnit iTextUnit = (ITextUnit) obj;
            return iTextUnit.hasTarget(localeId) ? doTokenize(iTextUnit.getTarget(localeId), localeId, strArr) : doTokenize(iTextUnit.getSource(), localeId, strArr);
        }
        if (obj instanceof TextContainer) {
            TextContainer textContainer = (TextContainer) obj;
            return textContainer.contentIsOneSegment() ? doTokenize(textContainer.getFirstContent(), localeId, strArr) : doTokenize(textContainer.getUnSegmentedContentCopy(), localeId, strArr);
        }
        if (obj instanceof TextFragment) {
            return doTokenize(TextUnitUtil.getText((TextFragment) obj), localeId, strArr);
        }
        if (obj instanceof String) {
            return tokenizeString((String) obj, localeId, strArr);
        }
        return null;
    }

    public static Tokens tokenize(ITextUnit iTextUnit, LocaleId localeId, String... strArr) {
        return doTokenize(iTextUnit, localeId, strArr);
    }

    public static Tokens tokenize(TextContainer textContainer, LocaleId localeId, String... strArr) {
        return doTokenize(textContainer, localeId, strArr);
    }

    public static Tokens tokenize(TextFragment textFragment, LocaleId localeId, String... strArr) {
        return doTokenize(textFragment, localeId, strArr);
    }

    public static Tokens tokenize(String str, LocaleId localeId, String... strArr) {
        return doTokenize(str, localeId, strArr);
    }
}
