package banner.tagging;

import banner.BannerProperties;
import banner.Sentence;
import banner.tagging.TaggedToken;
import dragon.nlp.tool.Lemmatiser;
import edu.umass.cs.mallet.base.fst.CRF4;
import edu.umass.cs.mallet.base.fst.MultiSegmentationEvaluator;
import edu.umass.cs.mallet.base.fst.TransducerEvaluator;
import edu.umass.cs.mallet.base.pipe.Pipe;
import edu.umass.cs.mallet.base.pipe.SerialPipes;
import edu.umass.cs.mallet.base.pipe.TokenSequence2FeatureVectorSequence;
import edu.umass.cs.mallet.base.pipe.tsf.OffsetConjunctions;
import edu.umass.cs.mallet.base.pipe.tsf.RegexMatches;
import edu.umass.cs.mallet.base.pipe.tsf.TokenTextCharNGrams;
import edu.umass.cs.mallet.base.pipe.tsf.TokenTextCharPrefix;
import edu.umass.cs.mallet.base.pipe.tsf.TokenTextCharSuffix;
import edu.umass.cs.mallet.base.types.Instance;
import edu.umass.cs.mallet.base.types.InstanceList;
import edu.umass.cs.mallet.base.types.Sequence;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

/* loaded from: input_file:banner/tagging/CRFTagger.class */
public class CRFTagger implements Tagger {
    private static String GREEK = "(alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|kappa|lambda|mu|nu|xi|omicron|pi|rho|sigma|tau|upsilon|phi|chi|psi|omega)";
    private CRF4 forwardCRF;
    private CRF4 reverseCRF;
    private String2TokenSequencePipe basePipe;
    private int order;
    private boolean useFeatureInduction;
    private TaggedToken.TagFormat format;
    private BannerProperties.TextDirection textDirection;

    private CRFTagger(CRF4 crf4, CRF4 crf42, String2TokenSequencePipe string2TokenSequencePipe, int i, boolean z, TaggedToken.TagFormat tagFormat, BannerProperties.TextDirection textDirection) {
        this.forwardCRF = crf4;
        this.reverseCRF = crf42;
        this.basePipe = string2TokenSequencePipe;
        this.order = i;
        this.useFeatureInduction = z;
        this.format = tagFormat;
        this.textDirection = textDirection;
    }

    public static CRFTagger load(File file, Lemmatiser lemmatiser, dragon.nlp.tool.Tagger tagger) throws IOException {
        try {
            ObjectInputStream objectInputStream = new ObjectInputStream(new GZIPInputStream(new FileInputStream(file)));
            BannerProperties.TextDirection textDirection = (BannerProperties.TextDirection) objectInputStream.readObject();
            CRF4 crf4 = null;
            if (textDirection.doForward()) {
                crf4 = (CRF4) objectInputStream.readObject();
            }
            CRF4 crf42 = null;
            if (textDirection.doReverse()) {
                crf42 = (CRF4) objectInputStream.readObject();
            }
            String2TokenSequencePipe string2TokenSequencePipe = (String2TokenSequencePipe) objectInputStream.readObject();
            string2TokenSequencePipe.setLemmatiser(lemmatiser);
            string2TokenSequencePipe.setPosTagger(tagger);
            int readInt = objectInputStream.readInt();
            boolean readBoolean = objectInputStream.readBoolean();
            TaggedToken.TagFormat tagFormat = (TaggedToken.TagFormat) objectInputStream.readObject();
            objectInputStream.close();
            return new CRFTagger(crf4, crf42, string2TokenSequencePipe, readInt, readBoolean, tagFormat, textDirection);
        } catch (ClassNotFoundException e) {
            throw new RuntimeException(e);
        }
    }

    /* JADX WARN: Type inference failed for: r3v34, types: [int[], int[][]] */
    private static void setupPipes(ArrayList<Pipe> arrayList) {
        arrayList.add(new RegexMatches("ALPHA", Pattern.compile("[A-Za-z]+")));
        arrayList.add(new RegexMatches("INITCAPS", Pattern.compile("[A-Z].*")));
        arrayList.add(new RegexMatches("UPPER-LOWER", Pattern.compile("[A-Z][a-z].*")));
        arrayList.add(new RegexMatches("LOWER-UPPER", Pattern.compile("[a-z]+[A-Z]+.*")));
        arrayList.add(new RegexMatches("ALLCAPS", Pattern.compile("[A-Z]+")));
        arrayList.add(new RegexMatches("MIXEDCAPS", Pattern.compile("[A-Z][a-z]+[A-Z][A-Za-z]*")));
        arrayList.add(new RegexMatches("SINGLECHAR", Pattern.compile("[A-Za-z]")));
        arrayList.add(new RegexMatches("SINGLEDIGIT", Pattern.compile("[0-9]")));
        arrayList.add(new RegexMatches("DOUBLEDIGIT", Pattern.compile("[0-9][0-9]")));
        arrayList.add(new RegexMatches("NUMBER", Pattern.compile("[0-9,]+")));
        arrayList.add(new RegexMatches("HASDIGIT", Pattern.compile(".*[0-9].*")));
        arrayList.add(new RegexMatches("ALPHANUMERIC", Pattern.compile(".*[0-9].*[A-Za-z].*")));
        arrayList.add(new RegexMatches("ALPHANUMERIC", Pattern.compile(".*[A-Za-z].*[0-9].*")));
        arrayList.add(new RegexMatches("LETTERS_NUMBERS", Pattern.compile("[0-9]+[A-Za-z]+")));
        arrayList.add(new RegexMatches("NUMBERS_LETTERS", Pattern.compile("[A-Za-z]+[0-9]+")));
        arrayList.add(new RegexMatches("HAS_DASH", Pattern.compile(".*-.*")));
        arrayList.add(new RegexMatches("HAS_QUOTE", Pattern.compile(".*'.*")));
        arrayList.add(new RegexMatches("HAS_SLASH", Pattern.compile(".*/.*")));
        arrayList.add(new RegexMatches("REALNUMBER", Pattern.compile("(-|\\+)?[0-9,]+(\\.[0-9]*)?%?")));
        arrayList.add(new RegexMatches("REALNUMBER", Pattern.compile("(-|\\+)?[0-9,]*(\\.[0-9]+)?%?")));
        arrayList.add(new RegexMatches("START_MINUS", Pattern.compile("-.*")));
        arrayList.add(new RegexMatches("START_PLUS", Pattern.compile("\\+.*")));
        arrayList.add(new RegexMatches("END_PERCENT", Pattern.compile(".*%")));
        arrayList.add(new TokenTextCharPrefix("2PREFIX=", 2));
        arrayList.add(new TokenTextCharPrefix("3PREFIX=", 3));
        arrayList.add(new TokenTextCharPrefix("4PREFIX=", 4));
        arrayList.add(new TokenTextCharSuffix("2SUFFIX=", 2));
        arrayList.add(new TokenTextCharSuffix("3SUFFIX=", 3));
        arrayList.add(new TokenTextCharSuffix("4SUFFIX=", 4));
        arrayList.add(new TokenTextCharNGrams("CHARNGRAM=", new int[]{2, 3}, true));
        arrayList.add(new RegexMatches("ROMAN", Pattern.compile("[IVXDLCM]+", 2)));
        arrayList.add(new RegexMatches("GREEK", Pattern.compile(GREEK, 2)));
        arrayList.add(new RegexMatches("ISPUNCT", Pattern.compile("[`~!@#$%^&*()-=_+\\[\\]\\\\{}|;':\\\",./<>?]+")));
        arrayList.add(new OffsetConjunctions((int[][]) new int[]{new int[]{-2}, new int[]{2}}));
        arrayList.add(new TokenSequence2FeatureVectorSequence(true, true));
    }

    public static CRFTagger train(List<Sentence> list, int i, boolean z, TaggedToken.TagFormat tagFormat, BannerProperties.TextDirection textDirection, Lemmatiser lemmatiser, dragon.nlp.tool.Tagger tagger, boolean z2) {
        if (list.size() == 0) {
            throw new RuntimeException("Number of sentences must be greater than zero");
        }
        String2TokenSequencePipe string2TokenSequencePipe = new String2TokenSequencePipe(lemmatiser, tagger, z2);
        ArrayList arrayList = new ArrayList();
        arrayList.add(string2TokenSequencePipe);
        setupPipes(arrayList);
        SerialPipes serialPipes = new SerialPipes(arrayList);
        CRF4 crf4 = null;
        if (textDirection == BannerProperties.TextDirection.Intersection) {
            throw new UnsupportedOperationException("TextDirection.Intersection not yet supported");
        }
        if (textDirection.doForward()) {
            crf4 = train(list, i, z, tagFormat, serialPipes, false);
        }
        CRF4 crf42 = null;
        if (textDirection.doReverse()) {
            crf42 = train(list, i, z, tagFormat, serialPipes, true);
        }
        return new CRFTagger(crf4, crf42, string2TokenSequencePipe, i, z, tagFormat, textDirection);
    }

    private static CRF4 train(List<Sentence> list, int i, boolean z, TaggedToken.TagFormat tagFormat, Pipe pipe, boolean z2) {
        InstanceList instanceList = new InstanceList(pipe);
        for (Sentence sentence : list) {
            instanceList.add(new Instance(sentence.getTrainingText(tagFormat, z2), (Object) null, sentence.getTag(), (Object) null, pipe));
        }
        CRF4 crf4 = new CRF4(pipe, (Pipe) null);
        if (i == 1) {
            crf4.addStatesForLabelsConnectedAsIn(instanceList);
        } else {
            if (i != 2) {
                throw new IllegalArgumentException("Order must be equal to 1 or 2");
            }
            crf4.addStatesForBiLabelsConnectedAsIn(instanceList);
        }
        if (z) {
            crf4.trainWithFeatureInduction(instanceList, (InstanceList) null, (InstanceList) null, (TransducerEvaluator) null, 99999, 100, 10, 1000, 0.5d, false, new double[]{0.2d, 0.5d, 0.8d});
        } else {
            crf4.train(instanceList, (InstanceList) null, (InstanceList) null, (MultiSegmentationEvaluator) null, 99999, 10, new double[]{0.2d, 0.5d, 0.8d});
        }
        return crf4;
    }

    public void write(File file) {
        try {
            ObjectOutputStream objectOutputStream = new ObjectOutputStream(new GZIPOutputStream(new FileOutputStream(file)));
            objectOutputStream.writeObject(this.textDirection);
            if (this.textDirection.doForward()) {
                objectOutputStream.writeObject(this.forwardCRF);
            }
            if (this.textDirection.doReverse()) {
                objectOutputStream.writeObject(this.reverseCRF);
            }
            objectOutputStream.writeObject(this.basePipe);
            objectOutputStream.writeInt(this.order);
            objectOutputStream.writeBoolean(this.useFeatureInduction);
            objectOutputStream.writeObject(this.format);
            objectOutputStream.close();
        } catch (IOException e) {
            System.err.println("Exception writing file " + file + ": " + e);
        }
    }

    @Override // banner.tagging.Tagger
    public void tag(Sentence sentence) {
        int size = sentence.getTokens().size();
        if (this.textDirection.doForward()) {
            TaggedToken.TagPosition[] tagPositionArr = new TaggedToken.TagPosition[size];
            MentionType[] mentionTypeArr = new MentionType[size];
            getPositionsAndTypes(sentence, tagPositionArr, mentionTypeArr, false);
            sentence.addMentions(tagPositionArr, mentionTypeArr);
        }
        if (this.textDirection.doReverse()) {
            TaggedToken.TagPosition[] tagPositionArr2 = new TaggedToken.TagPosition[size];
            MentionType[] mentionTypeArr2 = new MentionType[size];
            getPositionsAndTypes(sentence, tagPositionArr2, mentionTypeArr2, true);
            sentence.addMentions(tagPositionArr2, mentionTypeArr2);
        }
    }

    private static void reverse(Object[] objArr) {
        Object[] objArr2 = new Object[objArr.length];
        System.arraycopy(objArr, 0, objArr2, 0, objArr.length);
        int length = objArr.length - 1;
        for (int i = 0; i < objArr.length; i++) {
            objArr[i] = objArr2[length - i];
        }
    }

    private void getPositionsAndTypes(Sentence sentence, TaggedToken.TagPosition[] tagPositionArr, MentionType[] mentionTypeArr, boolean z) {
        Sequence output = this.forwardCRF.viterbiPath((Sequence) new Instance(sentence.getTrainingText(this.format, z), (Object) null, sentence.getTag(), (Object) null, this.forwardCRF.getInputPipe()).getData()).output();
        if (tagPositionArr.length != output.size()) {
            throw new IllegalArgumentException();
        }
        if (mentionTypeArr.length != output.size()) {
            throw new IllegalArgumentException();
        }
        for (int i = 0; i < output.size(); i++) {
            String[] split = output.get(i).toString().split("-");
            tagPositionArr[i] = TaggedToken.TagPosition.valueOf(split[0]);
            if (split.length == 2) {
                mentionTypeArr[i] = MentionType.getType(split[1]);
            }
        }
        if (z) {
            reverse(tagPositionArr);
            reverse(mentionTypeArr);
        }
    }

    public TaggedToken.TagFormat getFormat() {
        return this.format;
    }

    public int getOrder() {
        return this.order;
    }

    public boolean isUseFeatureInduction() {
        return this.useFeatureInduction;
    }

    public BannerProperties.TextDirection getTextDirection() {
        return this.textDirection;
    }
}
