package org.apache.pig.data;

import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.EOFException;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.ListIterator;
import java.util.TreeSet;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.commons.net.bsd.RCommandClient;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapReduce;

/* loaded from: input_file:WEB-INF/lib/pig-0.9.1.jar:org/apache/pig/data/InternalDistinctBag.class */
public class InternalDistinctBag extends SortedSpillBag {
    private static final long serialVersionUID = 2;
    private static final Log log = LogFactory.getLog(InternalDistinctBag.class);
    private static TupleFactory gTupleFactory = TupleFactory.getInstance();
    private transient boolean mReadStarted;
    private transient int cacheLimit;
    private transient long maxMemUsage;
    private transient long memUsage;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:WEB-INF/lib/pig-0.9.1.jar:org/apache/pig/data/InternalDistinctBag$DistinctDataBagIterator.class */
    public class DistinctDataBagIterator implements Iterator<Tuple> {
        private Tuple mBuf = null;
        private int mMemoryPtr = 0;
        private TreeSet<TContainer> mMergeTree = null;
        private ArrayList<DataInputStream> mStreams = null;
        private int mCntr = 0;

        /* JADX INFO: Access modifiers changed from: private */
        /* loaded from: input_file:WEB-INF/lib/pig-0.9.1.jar:org/apache/pig/data/InternalDistinctBag$DistinctDataBagIterator$TContainer.class */
        public class TContainer implements Comparable<TContainer> {
            public Tuple tuple;
            public int fileNum;

            private TContainer() {
            }

            @Override // java.lang.Comparable
            public int compareTo(TContainer tContainer) {
                return this.tuple.compareTo(tContainer.tuple);
            }

            public boolean equals(Object obj) {
                return (obj instanceof TContainer) && compareTo((TContainer) obj) == 0;
            }

            public int hashCode() {
                return this.tuple.hashCode();
            }
        }

        DistinctDataBagIterator() {
            if (InternalDistinctBag.this.mReadStarted) {
                return;
            }
            preMerge();
            ArrayList arrayList = new ArrayList(InternalDistinctBag.this.mContents);
            Collections.sort(arrayList);
            InternalDistinctBag.this.mContents = arrayList;
            InternalDistinctBag.this.mReadStarted = true;
        }

        @Override // java.util.Iterator
        public boolean hasNext() {
            this.mBuf = next();
            return this.mBuf != null;
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // java.util.Iterator
        public Tuple next() {
            int i = this.mCntr;
            this.mCntr = i + 1;
            if ((i & RCommandClient.MAX_CLIENT_PORT) == 0) {
                InternalDistinctBag.this.reportProgress();
            }
            if (this.mBuf == null) {
                return (InternalDistinctBag.this.mSpillFiles == null || InternalDistinctBag.this.mSpillFiles.size() == 0) ? readFromMemory() : readFromTree();
            }
            Tuple tuple = this.mBuf;
            this.mBuf = null;
            return tuple;
        }

        @Override // java.util.Iterator
        public void remove() {
        }

        private Tuple readFromTree() {
            if (this.mMergeTree == null) {
                this.mMergeTree = new TreeSet<>();
                this.mStreams = new ArrayList<>(InternalDistinctBag.this.mSpillFiles.size() + 1);
                Iterator<File> it = InternalDistinctBag.this.mSpillFiles.iterator();
                while (it.hasNext()) {
                    try {
                        this.mStreams.add(new DataInputStream(new BufferedInputStream(new FileInputStream(it.next()))));
                        addToQueue(null, this.mStreams.size() - 1);
                    } catch (FileNotFoundException e) {
                        InternalDistinctBag.log.fatal("Unable to find our spill file.", e);
                        throw new RuntimeException("Unable to find our spill file.", e);
                    }
                }
                if (InternalDistinctBag.this.mContents.size() > 0) {
                    addToQueue(null, -1);
                }
            }
            if (this.mMergeTree.size() == 0) {
                return null;
            }
            TContainer first = this.mMergeTree.first();
            this.mMergeTree.remove(first);
            Tuple tuple = first.tuple;
            addToQueue(first, first.fileNum);
            return tuple;
        }

        private void addToQueue(TContainer tContainer, int i) {
            if (tContainer == null) {
                tContainer = new TContainer();
            }
            tContainer.fileNum = i;
            if (i != -1) {
                DataInputStream dataInputStream = this.mStreams.get(i);
                if (dataInputStream != null) {
                    tContainer.tuple = InternalDistinctBag.gTupleFactory.newTuple();
                    do {
                        try {
                            tContainer.tuple.readFields(dataInputStream);
                        } catch (EOFException e) {
                            try {
                                dataInputStream.close();
                            } catch (IOException e2) {
                                InternalDistinctBag.log.warn("Failed to close spill file.", e2);
                            }
                            this.mStreams.set(i, null);
                            return;
                        } catch (IOException e3) {
                            InternalDistinctBag.log.fatal("Unable to find our spill file.", e3);
                            throw new RuntimeException("Unable to find our spill file.", e3);
                        }
                    } while (!this.mMergeTree.add(tContainer));
                    return;
                }
                return;
            }
            do {
                tContainer.tuple = readFromMemory();
                if (tContainer.tuple != null && this.mMergeTree.add(tContainer)) {
                    return;
                }
            } while (tContainer.tuple != null);
        }

        private Tuple readFromMemory() {
            if (InternalDistinctBag.this.mContents.size() == 0 || this.mMemoryPtr >= InternalDistinctBag.this.mContents.size()) {
                return null;
            }
            ArrayList arrayList = (ArrayList) InternalDistinctBag.this.mContents;
            int i = this.mMemoryPtr;
            this.mMemoryPtr = i + 1;
            return (Tuple) arrayList.get(i);
        }

        private void preMerge() {
            if (InternalDistinctBag.this.mSpillFiles == null || InternalDistinctBag.this.mSpillFiles.size() <= 100) {
                return;
            }
            try {
                LinkedList linkedList = new LinkedList(InternalDistinctBag.this.mSpillFiles);
                LinkedList linkedList2 = new LinkedList();
                while (linkedList.size() > 100) {
                    ListIterator listIterator = linkedList.listIterator();
                    this.mStreams = new ArrayList<>(100);
                    this.mMergeTree = new TreeSet<>();
                    for (int i = 0; i < 100; i++) {
                        try {
                            File file = (File) listIterator.next();
                            this.mStreams.add(new DataInputStream(new BufferedInputStream(new FileInputStream(file))));
                            addToQueue(null, this.mStreams.size() - 1);
                            listIterator.remove();
                            linkedList2.add(file);
                        } catch (FileNotFoundException e) {
                            InternalDistinctBag.log.fatal("Unable to find our spill file.", e);
                            throw new RuntimeException("Unable to find our spill file.", e);
                        }
                    }
                    try {
                        DataOutputStream spillFile = InternalDistinctBag.this.getSpillFile();
                        linkedList.add(InternalDistinctBag.this.mSpillFiles.get(InternalDistinctBag.this.mSpillFiles.size() - 1));
                        while (true) {
                            Tuple readFromTree = readFromTree();
                            if (readFromTree == null) {
                                break;
                            } else {
                                readFromTree.write(spillFile);
                            }
                        }
                        spillFile.flush();
                        spillFile.close();
                    } catch (IOException e2) {
                        InternalDistinctBag.log.fatal("Unable to find our spill file.", e2);
                        throw new RuntimeException("Unable to find our spill file.", e2);
                    }
                }
                Iterator it = linkedList2.iterator();
                while (it.hasNext()) {
                    File file2 = (File) it.next();
                    if (!file2.delete()) {
                        InternalDistinctBag.log.warn("Failed to delete spill file: " + file2.getPath());
                    }
                }
                InternalDistinctBag.this.mSpillFiles.clear();
                InternalDistinctBag.this.mSpillFiles = new FileList((LinkedList<File>) linkedList);
                this.mStreams = null;
                this.mMergeTree = null;
            } catch (Throwable th) {
                this.mStreams = null;
                this.mMergeTree = null;
                throw th;
            }
        }
    }

    public InternalDistinctBag() {
        this(1, -1.0d);
    }

    public InternalDistinctBag(int i) {
        this(i, -1.0d);
    }

    public InternalDistinctBag(int i, double d) {
        String str;
        this.mReadStarted = false;
        if (d < 0.0d) {
            d = 0.20000000298023224d;
            if (PigMapReduce.sJobConfInternal.get() != null && (str = PigMapReduce.sJobConfInternal.get().get("pig.cachedbag.memusage")) != null) {
                d = Float.parseFloat(str);
            }
        }
        init(i, d);
    }

    private void init(int i, double d) {
        this.mContents = new HashSet();
        this.maxMemUsage = (long) ((((float) Runtime.getRuntime().maxMemory()) * d) / i);
        this.cacheLimit = Integer.MAX_VALUE;
        if (this.maxMemUsage < 1) {
            this.cacheLimit = 0;
        }
    }

    @Override // org.apache.pig.data.DataBag
    public boolean isSorted() {
        return false;
    }

    @Override // org.apache.pig.data.DataBag
    public boolean isDistinct() {
        return true;
    }

    @Override // org.apache.pig.data.DefaultAbstractBag, org.apache.pig.data.DataBag
    public long size() {
        if (this.mSpillFiles != null && this.mSpillFiles.size() > 0) {
            Iterator<Tuple> it = iterator();
            int i = 0;
            while (it.hasNext()) {
                i++;
                it.next();
            }
            this.mSize = i;
        }
        return this.mSize;
    }

    @Override // org.apache.pig.data.DataBag, java.lang.Iterable
    public Iterator<Tuple> iterator() {
        return new DistinctDataBagIterator();
    }

    @Override // org.apache.pig.data.DefaultAbstractBag, org.apache.pig.data.DataBag
    public void add(Tuple tuple) {
        if (this.mReadStarted) {
            throw new IllegalStateException("InternalDistinctBag is closed for adding new tuples");
        }
        if (this.mContents.size() > this.cacheLimit) {
            proactive_spill(null);
        }
        if (this.mContents.add(tuple)) {
            this.mSize++;
            if (this.mSize < 100) {
                if (this.mSpillFiles == null || this.mSpillFiles.isEmpty()) {
                    this.memUsage += tuple.getMemorySize();
                    long size = this.memUsage / this.mContents.size();
                    if (size > 0) {
                        this.cacheLimit = (int) (this.maxMemUsage / size);
                        log.debug("Memory can hold " + this.cacheLimit + " records.");
                    }
                }
            }
        }
    }

    @Override // org.apache.pig.data.DefaultAbstractBag, org.apache.pig.data.DataBag
    public void addAll(DataBag dataBag) {
        Iterator<Tuple> it = dataBag.iterator();
        while (it.hasNext()) {
            add(it.next());
        }
    }

    @Override // org.apache.pig.data.DefaultAbstractBag
    public void addAll(Collection<Tuple> collection) {
        Iterator<Tuple> it = collection.iterator();
        while (it.hasNext()) {
            add(it.next());
        }
    }

    @Override // org.apache.pig.impl.util.Spillable
    public long spill() {
        return proactive_spill(null);
    }
}
