package org.apache.lucene.benchmark.byTask.feeds;

import java.io.IOException;
import java.util.Date;
import org.apache.lucene.benchmark.byTask.feeds.TrecDocParser;

/* loaded from: input_file:org/apache/lucene/benchmark/byTask/feeds/TrecLATimesParser.class */
public class TrecLATimesParser extends TrecDocParser {
    private static final String DATE = "<DATE>";
    private static final String DATE_END = "</DATE>";
    private static final String DATE_NOISE = "day,";
    private static final String SUBJECT = "<SUBJECT>";
    private static final String SUBJECT_END = "</SUBJECT>";
    private static final String HEADLINE = "<HEADLINE>";
    private static final String HEADLINE_END = "</HEADLINE>";

    @Override // org.apache.lucene.benchmark.byTask.feeds.TrecDocParser
    public DocData parse(DocData docData, String str, TrecContentSource trecContentSource, StringBuilder sb, TrecDocParser.ParsePathType parsePathType) throws IOException {
        Date date = null;
        String extract = extract(sb, DATE, DATE_END, -1, null);
        if (extract != null) {
            int indexOf = extract.indexOf(DATE_NOISE);
            if (indexOf > 0) {
                extract = extract.substring(0, indexOf + 3);
            }
            date = trecContentSource.parseDate(stripTags(extract, 0).trim());
        }
        String extract2 = extract(sb, SUBJECT, SUBJECT_END, -1, null);
        if (extract2 == null) {
            extract2 = extract(sb, HEADLINE, HEADLINE_END, -1, null);
        }
        if (extract2 != null) {
            extract2 = stripTags(extract2, 0).trim();
        }
        docData.clear();
        docData.setName(str);
        docData.setDate(date);
        docData.setTitle(extract2);
        docData.setBody(stripTags(sb, 0));
        return docData;
    }
}
