package org.apache.vxquery.metadata;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.InetAddress;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.filefilter.TrueFileFilter;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hyracks.api.client.NodeControllerInfo;
import org.apache.hyracks.api.comm.VSizeFrame;
import org.apache.hyracks.api.context.IHyracksTaskContext;
import org.apache.hyracks.api.dataflow.IOperatorNodePushable;
import org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider;
import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.api.job.IOperatorDescriptorRegistry;
import org.apache.hyracks.dataflow.common.comm.io.FrameFixedFieldTupleAppender;
import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
import org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor;
import org.apache.hyracks.dataflow.std.base.AbstractUnaryInputUnaryOutputOperatorNodePushable;
import org.apache.hyracks.hdfs.ContextFactory;
import org.apache.hyracks.hdfs2.dataflow.FileSplitsFactory;
import org.apache.vxquery.context.DynamicContext;
import org.apache.vxquery.hdfs2.HDFSFunctions;
import org.apache.vxquery.xmlparser.TreeNodeIdProvider;
import org.apache.vxquery.xmlparser.XMLParser;
import org.xml.sax.SAXException;

/* loaded from: input_file:org/apache/vxquery/metadata/VXQueryCollectionOperatorDescriptor.class */
public class VXQueryCollectionOperatorDescriptor extends AbstractSingleActivityOperatorDescriptor {
    private static final long serialVersionUID = 1;
    private short dataSourceId;
    private short totalDataSources;
    private String[] collectionPartitions;
    private List<Integer> childSeq;
    protected static final Logger LOGGER = Logger.getLogger(VXQueryCollectionOperatorDescriptor.class.getName());
    private HDFSFunctions hdfs;
    private String tag;
    private final String START_TAG = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n";
    private final String hdfsConf;
    private final Map<String, NodeControllerInfo> nodeControllerInfos;

    public VXQueryCollectionOperatorDescriptor(IOperatorDescriptorRegistry iOperatorDescriptorRegistry, VXQueryCollectionDataSource vXQueryCollectionDataSource, RecordDescriptor recordDescriptor, String str, Map<String, NodeControllerInfo> map) {
        super(iOperatorDescriptorRegistry, 1, 1);
        this.START_TAG = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n";
        this.collectionPartitions = vXQueryCollectionDataSource.getPartitions();
        this.dataSourceId = (short) vXQueryCollectionDataSource.getDataSourceId();
        this.totalDataSources = (short) vXQueryCollectionDataSource.getTotalDataSources();
        this.childSeq = vXQueryCollectionDataSource.getChildSeq();
        this.recordDescriptors[0] = recordDescriptor;
        this.tag = vXQueryCollectionDataSource.getTag();
        this.hdfsConf = str;
        this.nodeControllerInfos = map;
    }

    public IOperatorNodePushable createPushRuntime(IHyracksTaskContext iHyracksTaskContext, IRecordDescriptorProvider iRecordDescriptorProvider, int i, int i2) throws HyracksDataException {
        final FrameTupleAccessor frameTupleAccessor = new FrameTupleAccessor(iRecordDescriptorProvider.getInputRecordDescriptor(getActivityId(), 0));
        int fieldCount = iRecordDescriptorProvider.getOutputRecordDescriptor(getActivityId(), 0).getFieldCount();
        final VSizeFrame vSizeFrame = new VSizeFrame(iHyracksTaskContext);
        final FrameFixedFieldTupleAppender frameFixedFieldTupleAppender = new FrameFixedFieldTupleAppender(fieldCount);
        TreeNodeIdProvider treeNodeIdProvider = new TreeNodeIdProvider((short) iHyracksTaskContext.getTaskAttemptId().getTaskId().getPartition(), this.dataSourceId, this.totalDataSources);
        final String nodeId = iHyracksTaskContext.getJobletContext().getApplicationContext().getNodeId();
        DynamicContext dynamicContext = (DynamicContext) iHyracksTaskContext.getJobletContext().getGlobalJobData();
        final String str = this.collectionPartitions[i % this.collectionPartitions.length];
        final XMLParser xMLParser = new XMLParser(false, treeNodeIdProvider, nodeId, frameFixedFieldTupleAppender, this.childSeq, dynamicContext.getStaticContext());
        return new AbstractUnaryInputUnaryOutputOperatorNodePushable() { // from class: org.apache.vxquery.metadata.VXQueryCollectionOperatorDescriptor.1
            public void open() throws HyracksDataException {
                frameFixedFieldTupleAppender.reset(vSizeFrame, true);
                this.writer.open();
                VXQueryCollectionOperatorDescriptor.this.hdfs = new HDFSFunctions(VXQueryCollectionOperatorDescriptor.this.nodeControllerInfos, VXQueryCollectionOperatorDescriptor.this.hdfsConf);
            }

            public void nextFrame(ByteBuffer byteBuffer) throws HyracksDataException {
                frameTupleAccessor.reset(byteBuffer);
                String replace = str.replace("${nodeId}", nodeId);
                if (!replace.contains("hdfs:/")) {
                    File file = new File(replace);
                    if (file.exists()) {
                        if (!file.isDirectory()) {
                            throw new HyracksDataException("Invalid directory parameter (" + nodeId + ":" + file.getAbsolutePath() + ") passed to collection.");
                        }
                        for (int i3 = 0; i3 < frameTupleAccessor.getTupleCount(); i3++) {
                            Iterator iterateFiles = FileUtils.iterateFiles(file, new VXQueryIOFileFilter(), TrueFileFilter.INSTANCE);
                            while (iterateFiles.hasNext()) {
                                File file2 = (File) iterateFiles.next();
                                if (VXQueryCollectionOperatorDescriptor.LOGGER.isLoggable(Level.FINE)) {
                                    VXQueryCollectionOperatorDescriptor.LOGGER.fine("Starting to read XML document: " + file2.getAbsolutePath());
                                }
                                xMLParser.parseElements(file2, this.writer, i3);
                            }
                        }
                        return;
                    }
                    return;
                }
                FileSystem fileSystem = VXQueryCollectionOperatorDescriptor.this.hdfs.getFileSystem();
                if (fileSystem != null) {
                    Path path = new Path(replace.replaceAll("hdfs:/", ""));
                    if (VXQueryCollectionOperatorDescriptor.this.tag != null) {
                        VXQueryCollectionOperatorDescriptor.this.hdfs.setJob(path.toString(), VXQueryCollectionOperatorDescriptor.this.tag);
                        VXQueryCollectionOperatorDescriptor.this.tag = "<" + VXQueryCollectionOperatorDescriptor.this.tag + ">";
                        Job job = VXQueryCollectionOperatorDescriptor.this.hdfs.getJob();
                        InputFormat inputFormat = VXQueryCollectionOperatorDescriptor.this.hdfs.getinputFormat();
                        try {
                            VXQueryCollectionOperatorDescriptor.this.hdfs.scheduleSplits();
                            ArrayList<Integer> scheduleForNode = VXQueryCollectionOperatorDescriptor.this.hdfs.getScheduleForNode(InetAddress.getLocalHost().getHostAddress());
                            List<InputSplit> splits = VXQueryCollectionOperatorDescriptor.this.hdfs.getSplits();
                            ArrayList arrayList = new ArrayList();
                            Iterator<Integer> it = scheduleForNode.iterator();
                            while (it.hasNext()) {
                                arrayList.add(splits.get(it.next().intValue()));
                            }
                            List splits2 = new FileSplitsFactory(arrayList).getSplits();
                            ContextFactory contextFactory = new ContextFactory();
                            int size = splits2.size();
                            for (int i4 = 0; i4 < size; i4++) {
                                TaskAttemptContext createContext = contextFactory.createContext(job.getConfiguration(), i4);
                                try {
                                    RecordReader createRecordReader = inputFormat.createRecordReader((InputSplit) splits2.get(i4), createContext);
                                    createRecordReader.initialize((InputSplit) splits2.get(i4), createContext);
                                    while (createRecordReader.nextKeyValue()) {
                                        String obj = createRecordReader.getCurrentValue().toString();
                                        if (StringUtils.countMatches(obj, VXQueryCollectionOperatorDescriptor.this.tag) > 1) {
                                            for (String str2 : obj.split(VXQueryCollectionOperatorDescriptor.this.tag)) {
                                                if (str2.length() > 0) {
                                                    xMLParser.parseHDFSElements(new ByteArrayInputStream(("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n" + VXQueryCollectionOperatorDescriptor.this.tag + str2).getBytes(StandardCharsets.UTF_8)), this.writer, frameTupleAccessor, i4);
                                                }
                                            }
                                        } else {
                                            xMLParser.parseHDFSElements(new ByteArrayInputStream(("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n" + obj).getBytes(StandardCharsets.UTF_8)), this.writer, frameTupleAccessor, i4);
                                        }
                                    }
                                } catch (InterruptedException e) {
                                    if (VXQueryCollectionOperatorDescriptor.LOGGER.isLoggable(Level.SEVERE)) {
                                        VXQueryCollectionOperatorDescriptor.LOGGER.severe(e.getMessage());
                                    }
                                }
                            }
                        } catch (IOException e2) {
                            if (VXQueryCollectionOperatorDescriptor.LOGGER.isLoggable(Level.SEVERE)) {
                                VXQueryCollectionOperatorDescriptor.LOGGER.severe(e2.getMessage());
                            }
                        } catch (ParserConfigurationException e3) {
                            if (VXQueryCollectionOperatorDescriptor.LOGGER.isLoggable(Level.SEVERE)) {
                                VXQueryCollectionOperatorDescriptor.LOGGER.severe(e3.getMessage());
                            }
                        } catch (SAXException e4) {
                            if (VXQueryCollectionOperatorDescriptor.LOGGER.isLoggable(Level.SEVERE)) {
                                VXQueryCollectionOperatorDescriptor.LOGGER.severe(e4.getMessage());
                            }
                        }
                    } else {
                        try {
                            if (!fileSystem.exists(path) || !fileSystem.isDirectory(path)) {
                                throw new HyracksDataException("Invalid HDFS directory parameter (" + nodeId + ":" + path + ") passed to collection.");
                            }
                            for (int i5 = 0; i5 < frameTupleAccessor.getTupleCount(); i5++) {
                                RemoteIterator listFiles = fileSystem.listFiles(path, true);
                                while (listFiles.hasNext()) {
                                    Path path2 = ((LocatedFileStatus) listFiles.next()).getPath();
                                    if (fileSystem.isFile(path2)) {
                                        if (VXQueryCollectionOperatorDescriptor.LOGGER.isLoggable(Level.FINE)) {
                                            VXQueryCollectionOperatorDescriptor.LOGGER.fine("Starting to read XML document: " + path2.getName());
                                        }
                                        xMLParser.parseHDFSElements(fileSystem.open(path2).getWrappedStream(), this.writer, frameTupleAccessor, i5);
                                    }
                                }
                            }
                        } catch (FileNotFoundException e5) {
                            if (VXQueryCollectionOperatorDescriptor.LOGGER.isLoggable(Level.SEVERE)) {
                                VXQueryCollectionOperatorDescriptor.LOGGER.severe(e5.getMessage());
                            }
                        } catch (IOException e6) {
                            if (VXQueryCollectionOperatorDescriptor.LOGGER.isLoggable(Level.SEVERE)) {
                                VXQueryCollectionOperatorDescriptor.LOGGER.severe(e6.getMessage());
                            }
                        }
                    }
                    try {
                        fileSystem.close();
                    } catch (IOException e7) {
                        if (VXQueryCollectionOperatorDescriptor.LOGGER.isLoggable(Level.SEVERE)) {
                            VXQueryCollectionOperatorDescriptor.LOGGER.severe(e7.getMessage());
                        }
                    }
                }
            }

            public void fail() throws HyracksDataException {
                this.writer.fail();
            }

            public void close() throws HyracksDataException {
                if (frameFixedFieldTupleAppender.getTupleCount() > 0) {
                    frameFixedFieldTupleAppender.flush(this.writer, true);
                }
                this.writer.close();
            }
        };
    }
}
