package gov.sandia.cognition.text.document.extractor;

import gov.sandia.cognition.io.FileUtil;
import gov.sandia.cognition.text.convert.DocumentFieldConcatenator;
import gov.sandia.cognition.text.document.DefaultDocument;
import gov.sandia.cognition.text.document.Document;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URI;
import java.net.URLConnection;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

/* loaded from: input_file:gov/sandia/cognition/text/document/extractor/TextDocumentExtractor.class */
public class TextDocumentExtractor extends AbstractSingleDocumentExtractor {
    public static final String CONTENT_TYPE = "text/plain";
    public static final List<String> DEFAULT_TEXT_FILE_EXTENSIONS = Collections.unmodifiableList(Arrays.asList("txt", "text"));

    @Override // gov.sandia.cognition.text.document.extractor.DocumentExtractor
    public boolean canExtract(URI uri) throws IOException {
        String extension = FileUtil.getExtension(uri.getPath());
        if (extension == null) {
            return false;
        }
        return DEFAULT_TEXT_FILE_EXTENSIONS.contains(extension.toLowerCase());
    }

    @Override // gov.sandia.cognition.text.document.extractor.DocumentExtractor
    public boolean canExtract(URLConnection uRLConnection) throws IOException {
        return CONTENT_TYPE.equals(uRLConnection.getContentType());
    }

    @Override // gov.sandia.cognition.text.document.extractor.SingleDocumentExtractor
    public Document extractDocument(URLConnection uRLConnection) throws IOException {
        StringBuffer stringBuffer = new StringBuffer();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(uRLConnection.getInputStream()));
        while (true) {
            try {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    DefaultDocument defaultDocument = new DefaultDocument();
                    defaultDocument.readMetaData(uRLConnection);
                    defaultDocument.setBody(stringBuffer.toString());
                    return defaultDocument;
                }
                stringBuffer.append(readLine);
                stringBuffer.append(DocumentFieldConcatenator.DEFAULT_FIELD_SEPARATOR);
            } finally {
                bufferedReader.close();
            }
        }
    }
}
