package com.hankcs.hanlp.model.crf;

import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.corpus.document.sentence.Sentence;
import com.hankcs.hanlp.corpus.document.sentence.word.Word;
import com.hankcs.hanlp.model.crf.crfpp.Encoder;
import com.hankcs.hanlp.model.crf.crfpp.FeatureIndex;
import com.hankcs.hanlp.model.crf.crfpp.crf_learn;
import com.hankcs.hanlp.model.perceptron.PerceptronPOSTagger;
import com.hankcs.hanlp.model.perceptron.feature.FeatureMap;
import com.hankcs.hanlp.model.perceptron.instance.POSInstance;
import com.hankcs.hanlp.tokenizer.lexical.POSTagger;
import java.io.BufferedWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;

/* loaded from: input_file:com/hankcs/hanlp/model/crf/CRFPOSTagger.class */
public class CRFPOSTagger extends CRFTagger implements POSTagger {
    private PerceptronPOSTagger perceptronPOSTagger;

    public CRFPOSTagger() throws IOException {
        this(HanLP.Config.CRFPOSModelPath);
    }

    public CRFPOSTagger(String str) throws IOException {
        super(str);
        if (str != null) {
            this.perceptronPOSTagger = new PerceptronPOSTagger(this.model);
        }
    }

    @Override // com.hankcs.hanlp.model.crf.CRFTagger
    public void train(String str, String str2) throws IOException {
        crf_learn.Option option = new crf_learn.Option();
        train(str, str2, option.maxiter.intValue(), 10, option.eta.doubleValue(), option.cost.doubleValue(), option.thread.intValue(), option.shrinking_size.intValue(), Encoder.Algorithm.fromString(option.algorithm));
    }

    @Override // com.hankcs.hanlp.model.crf.CRFTagger
    protected void convertCorpus(Sentence sentence, BufferedWriter bufferedWriter) throws IOException {
        List<Word> simpleWordList = sentence.toSimpleWordList();
        ArrayList arrayList = new ArrayList(simpleWordList.size());
        Iterator<Word> it = simpleWordList.iterator();
        while (it.hasNext()) {
            arrayList.add(it.next().value);
        }
        String[] strArr = (String[]) arrayList.toArray(new String[0]);
        Iterator<Word> it2 = simpleWordList.iterator();
        for (String str : strArr) {
            String[] createCells = createCells(true);
            extractFeature(str, createCells);
            createCells[5] = it2.next().label;
            for (int i = 0; i < createCells.length; i++) {
                bufferedWriter.write(createCells[i]);
                if (i != createCells.length - 1) {
                    bufferedWriter.write(9);
                }
            }
            bufferedWriter.newLine();
        }
    }

    private String[] createCells(boolean z) {
        return z ? new String[6] : new String[5];
    }

    private void extractFeature(String str, String[] strArr) {
        int length = str.length();
        strArr[0] = str;
        strArr[1] = str.substring(0, 1);
        strArr[2] = length > 1 ? str.substring(0, 2) : "_";
        strArr[3] = str.substring(length - 1);
        strArr[4] = length > 1 ? str.substring(length - 2) : "_";
    }

    @Override // com.hankcs.hanlp.model.crf.CRFTagger
    protected String getDefaultFeatureTemplate() {
        return "# Unigram\nU0:%x[-1,0]\nU1:%x[0,0]\nU2:%x[1,0]\nU3:%x[0,1]\nU4:%x[0,2]\nU5:%x[0,3]\nU6:%x[0,4]\n\n# Bigram\nB";
    }

    @Override // com.hankcs.hanlp.tokenizer.lexical.POSTagger
    public String[] tag(List<String> list) {
        String[] strArr = new String[list.size()];
        list.toArray(strArr);
        return tag(strArr);
    }

    @Override // com.hankcs.hanlp.tokenizer.lexical.POSTagger
    public String[] tag(String... strArr) {
        return this.perceptronPOSTagger.tag(createInstance(strArr));
    }

    private POSInstance createInstance(String[] strArr) {
        final FeatureTemplate[] featureTemplateArray = this.model.getFeatureTemplateArray();
        final String[][] strArr2 = new String[strArr.length][5];
        for (int i = 0; i < strArr.length; i++) {
            extractFeature(strArr[i], strArr2[i]);
        }
        return new POSInstance(strArr, this.model.featureMap) { // from class: com.hankcs.hanlp.model.crf.CRFPOSTagger.1
            @Override // com.hankcs.hanlp.model.perceptron.instance.POSInstance
            protected int[] extractFeature(String[] strArr3, FeatureMap featureMap, int i2) {
                StringBuilder sb = new StringBuilder();
                LinkedList linkedList = new LinkedList();
                for (int i3 = 0; i3 < featureTemplateArray.length; i3++) {
                    Iterator<int[]> it = featureTemplateArray[i3].offsetList.iterator();
                    Iterator<String> it2 = featureTemplateArray[i3].delimiterList.iterator();
                    it2.next();
                    while (it.hasNext()) {
                        int[] next = it.next();
                        int i4 = next[0] + i2;
                        int i5 = next[1];
                        if (i4 < 0) {
                            sb.append(FeatureIndex.BOS[-(i4 + 1)]);
                        } else if (i4 >= strArr3.length) {
                            sb.append(FeatureIndex.EOS[i4 - strArr3.length]);
                        } else {
                            sb.append(strArr2[i4][i5]);
                        }
                        if (it2.hasNext()) {
                            sb.append(it2.next());
                        } else {
                            sb.append(i3);
                        }
                    }
                    addFeatureThenClear(sb, linkedList, featureMap);
                }
                return toFeatureArray(linkedList);
            }
        };
    }
}
