package org.apache.lucene.benchmark.byTask.feeds;

import java.io.IOException;
import java.util.Date;
import org.apache.lucene.benchmark.byTask.feeds.TrecDocParser;

/* loaded from: input_file:org/apache/lucene/benchmark/byTask/feeds/TrecFR94Parser.class */
public class TrecFR94Parser extends TrecDocParser {
    private static final String TEXT_END = "</TEXT>";
    private static final String DATE = "<DATE>";
    private static final String DATE_END = "</DATE>";
    private static final String TEXT = "<TEXT>";
    private static final int TEXT_LENGTH = TEXT.length();
    private static final String[] DATE_NOISE_PREFIXES = {"DATE:", "date:", "t.c."};

    @Override // org.apache.lucene.benchmark.byTask.feeds.TrecDocParser
    public DocData parse(DocData docData, String str, TrecContentSource trecContentSource, StringBuilder sb, TrecDocParser.ParsePathType parsePathType) throws IOException {
        int i = 0;
        Date date = null;
        int indexOf = sb.indexOf(TEXT);
        if (indexOf >= 0) {
            int indexOf2 = sb.indexOf(TEXT_END, indexOf);
            i = indexOf + TEXT_LENGTH;
            String extract = extract(sb, DATE, DATE_END, indexOf2, DATE_NOISE_PREFIXES);
            if (extract != null) {
                date = trecContentSource.parseDate(stripTags(extract, 0).trim());
            }
        }
        docData.clear();
        docData.setName(str);
        docData.setDate(date);
        docData.setBody(stripTags(sb, i));
        return docData;
    }
}
