package org.apache.pig.pen;

import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Map;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan;
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.PigContext;
import org.apache.pig.impl.io.FileSpec;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.util.IdentityHashSet;
import org.apache.pig.impl.util.Pair;
import org.apache.pig.newplan.Operator;
import org.apache.pig.newplan.logical.relational.LOCogroup;
import org.apache.pig.newplan.logical.relational.LOCross;
import org.apache.pig.newplan.logical.relational.LODistinct;
import org.apache.pig.newplan.logical.relational.LOFilter;
import org.apache.pig.newplan.logical.relational.LOForEach;
import org.apache.pig.newplan.logical.relational.LOJoin;
import org.apache.pig.newplan.logical.relational.LOLimit;
import org.apache.pig.newplan.logical.relational.LOLoad;
import org.apache.pig.newplan.logical.relational.LOSort;
import org.apache.pig.newplan.logical.relational.LOSplit;
import org.apache.pig.newplan.logical.relational.LOStore;
import org.apache.pig.newplan.logical.relational.LOUnion;
import org.apache.pig.newplan.logical.relational.LogicalPlan;
import org.apache.pig.newplan.logical.relational.LogicalRelationalNodesVisitor;
import org.apache.pig.newplan.logical.relational.LogicalRelationalOperator;
import org.apache.pig.pen.util.ExampleTuple;
import org.apache.pig.pen.util.LineageTracer;
import org.apache.pig.pen.util.MetricEvaluation;
import org.apache.pig.pen.util.PreOrderDepthFirstWalker;

/* loaded from: input_file:WEB-INF/lib/pig-0.9.1.jar:org/apache/pig/pen/LineageTrimmingVisitor.class */
public class LineageTrimmingVisitor extends LogicalRelationalNodesVisitor {
    LogicalPlan plan;
    Map<LOLoad, DataBag> baseData;
    Map<FileSpec, DataBag> inputToDataMap;
    Map<Operator, PhysicalOperator> LogToPhyMap;
    PhysicalPlan physPlan;
    double completeness;
    Log log;
    Map<Operator, Collection<IdentityHashSet<Tuple>>> AffinityGroups;
    Map<Operator, LineageTracer> Lineage;
    boolean continueTrimming;
    PigContext pc;
    private ExampleGenerator eg;

    public LineageTrimmingVisitor(LogicalPlan logicalPlan, Map<LOLoad, DataBag> map, ExampleGenerator exampleGenerator, Map<Operator, PhysicalOperator> map2, PhysicalPlan physicalPlan, PigContext pigContext) throws IOException, InterruptedException {
        super(logicalPlan, new PreOrderDepthFirstWalker(logicalPlan));
        this.plan = null;
        this.LogToPhyMap = null;
        this.physPlan = null;
        this.completeness = 100.0d;
        this.log = LogFactory.getLog(getClass());
        this.AffinityGroups = new HashMap();
        this.Lineage = new HashMap();
        this.baseData = map;
        this.plan = logicalPlan;
        this.LogToPhyMap = map2;
        this.pc = pigContext;
        this.physPlan = physicalPlan;
        this.eg = exampleGenerator;
        this.inputToDataMap = new HashMap();
        init();
    }

    public void init() throws IOException, InterruptedException {
        Map<Operator, DataBag> data = this.eg.getData();
        LineageTracer lineage = this.eg.getLineage();
        Map<LogicalRelationalOperator, Collection<IdentityHashSet<Tuple>>> loToEqClassMap = this.eg.getLoToEqClassMap();
        for (Operator operator : this.plan.getSinks()) {
            this.Lineage.put(operator, lineage);
            this.AffinityGroups.put(operator, this.eg.getEqClasses());
        }
        this.completeness = MetricEvaluation.getCompleteness(null, data, loToEqClassMap, true);
        this.LogToPhyMap = this.eg.getLogToPhyMap();
        this.continueTrimming = true;
    }

    @Override // org.apache.pig.newplan.logical.relational.LogicalRelationalNodesVisitor
    public void visit(LOCogroup lOCogroup) throws FrontendException {
        if ((this.plan.getSuccessors(lOCogroup) == null || !(this.plan.getSuccessors(lOCogroup).get(0) instanceof LOForEach)) && this.continueTrimming) {
            try {
                this.continueTrimming = checkCompleteness(lOCogroup);
                if (lOCogroup.getInputs(this.plan).size() == 1) {
                    LineageTracer lineage = this.eg.getLineage();
                    this.AffinityGroups.put(lOCogroup.getInputs(this.plan).get(0), this.eg.getEqClasses());
                    this.Lineage.put(lOCogroup.getInputs(this.plan).get(0), lineage);
                } else {
                    for (Operator operator : lOCogroup.getInputs(this.plan)) {
                        this.Lineage.put(operator, this.eg.getLineage());
                        this.AffinityGroups.put(operator, this.eg.getEqClasses());
                    }
                }
            } catch (Exception e) {
                throw new FrontendException("Exception : " + e.getMessage());
            }
        }
    }

    @Override // org.apache.pig.newplan.logical.relational.LogicalRelationalNodesVisitor
    public void visit(LOJoin lOJoin) throws FrontendException {
        if (this.continueTrimming) {
            processOperator(lOJoin);
        }
    }

    @Override // org.apache.pig.newplan.logical.relational.LogicalRelationalNodesVisitor
    public void visit(LOCross lOCross) throws FrontendException {
        if (this.continueTrimming) {
            processOperator(lOCross);
        }
    }

    @Override // org.apache.pig.newplan.logical.relational.LogicalRelationalNodesVisitor
    public void visit(LODistinct lODistinct) throws FrontendException {
        if (this.continueTrimming) {
            processOperator(lODistinct);
        }
    }

    @Override // org.apache.pig.newplan.logical.relational.LogicalRelationalNodesVisitor
    public void visit(LOFilter lOFilter) throws FrontendException {
        if (this.continueTrimming) {
            processOperator(lOFilter);
        }
    }

    @Override // org.apache.pig.newplan.logical.relational.LogicalRelationalNodesVisitor
    public void visit(LOStore lOStore) throws FrontendException {
        if (this.continueTrimming) {
            processOperator(lOStore);
        }
    }

    @Override // org.apache.pig.newplan.logical.relational.LogicalRelationalNodesVisitor
    public void visit(LOForEach lOForEach) throws FrontendException {
        if (this.continueTrimming) {
            processOperator(lOForEach);
        }
    }

    @Override // org.apache.pig.newplan.logical.relational.LogicalRelationalNodesVisitor
    public void visit(LOLimit lOLimit) throws FrontendException {
        if (this.continueTrimming) {
            processOperator(lOLimit);
        }
    }

    @Override // org.apache.pig.newplan.logical.relational.LogicalRelationalNodesVisitor
    public void visit(LOLoad lOLoad) throws FrontendException {
        if (this.continueTrimming) {
            processOperator(lOLoad);
        }
    }

    @Override // org.apache.pig.newplan.logical.relational.LogicalRelationalNodesVisitor
    public void visit(LOSort lOSort) throws FrontendException {
        if (this.continueTrimming) {
            processOperator(lOSort);
        }
    }

    @Override // org.apache.pig.newplan.logical.relational.LogicalRelationalNodesVisitor
    public void visit(LOSplit lOSplit) throws FrontendException {
        if (this.continueTrimming) {
            processOperator(lOSplit);
        }
    }

    @Override // org.apache.pig.newplan.logical.relational.LogicalRelationalNodesVisitor
    public void visit(LOUnion lOUnion) throws FrontendException {
        if (this.continueTrimming) {
            processOperator(lOUnion);
        }
    }

    private Map<LOLoad, DataBag> PruneBaseDataConstrainedCoverage(Map<LOLoad, DataBag> map, LineageTracer lineageTracer, Collection<IdentityHashSet<Tuple>> collection) {
        IdentityHashMap<Tuple, Collection<Tuple>> membershipMap = lineageTracer.getMembershipMap();
        IdentityHashMap<Tuple, Double> weightedCounts = lineageTracer.getWeightedCounts(2.0f, 1.0f);
        IdentityHashMap identityHashMap = new IdentityHashMap();
        for (IdentityHashSet<Tuple> identityHashSet : collection) {
            Iterator<Tuple> it = identityHashSet.iterator();
            while (it.hasNext()) {
                Tuple representative = lineageTracer.getRepresentative(it.next());
                Set set = (Set) identityHashMap.get(representative);
                if (set == null) {
                    set = new HashSet();
                    identityHashMap.put(representative, set);
                }
                set.add(identityHashSet);
            }
        }
        IdentityHashSet identityHashSet2 = new IdentityHashSet();
        while (!identityHashMap.isEmpty()) {
            double d = -1.0d;
            Tuple tuple = null;
            Set set2 = null;
            int i = 0;
            for (Tuple tuple2 : identityHashMap.keySet()) {
                double doubleValue = weightedCounts.get(tuple2).doubleValue();
                Set set3 = (Set) identityHashMap.get(tuple2);
                int size = set3.size();
                if (size > i || (size == i && doubleValue < d)) {
                    if (identityHashSet2.contains(tuple2)) {
                        tuple = tuple2;
                        set2 = set3;
                    } else {
                        d = doubleValue;
                        tuple = tuple2;
                        i = size;
                        set2 = set3;
                    }
                }
            }
            identityHashSet2.add(tuple);
            HashSet hashSet = new HashSet();
            hashSet.addAll(set2);
            LinkedList linkedList = new LinkedList();
            for (Tuple tuple3 : identityHashMap.keySet()) {
                Set set4 = (Set) identityHashMap.get(tuple3);
                Iterator it2 = set4.iterator();
                while (it2.hasNext()) {
                    if (hashSet.contains((IdentityHashSet) it2.next())) {
                        it2.remove();
                    }
                }
                if (set4.size() == 0) {
                    linkedList.add(tuple3);
                }
            }
            Iterator it3 = linkedList.iterator();
            while (it3.hasNext()) {
                identityHashMap.remove((Tuple) it3.next());
            }
        }
        IdentityHashSet identityHashSet3 = new IdentityHashSet();
        Iterator it4 = identityHashSet2.iterator();
        while (it4.hasNext()) {
            Iterator<Tuple> it5 = membershipMap.get((Tuple) it4.next()).iterator();
            while (it5.hasNext()) {
                identityHashSet3.add(it5.next());
            }
        }
        HashMap hashMap = new HashMap();
        for (LOLoad lOLoad : map.keySet()) {
            DataBag dataBag = map.get(lOLoad);
            DataBag newDefaultBag = BagFactory.getInstance().newDefaultBag();
            for (Tuple tuple4 : dataBag) {
                if (identityHashSet3.contains(tuple4)) {
                    newDefaultBag.add(tuple4);
                }
            }
            hashMap.put(lOLoad, newDefaultBag);
        }
        return hashMap;
    }

    private void processLoad(LOLoad lOLoad) throws FrontendException {
        if (this.inputToDataMap.get(lOLoad.getFileSpec()) != null) {
            this.baseData.put(lOLoad, this.inputToDataMap.get(lOLoad.getFileSpec()));
            return;
        }
        DataBag dataBag = this.baseData.get(lOLoad);
        if (dataBag == null || dataBag.size() < 2) {
            return;
        }
        Set<Tuple> hashSet = new HashSet<>();
        Set<Tuple> hashSet2 = new HashSet<>();
        for (Tuple tuple : dataBag) {
            if (((ExampleTuple) tuple).synthetic) {
                hashSet2.add(tuple);
            } else {
                hashSet.add(tuple);
            }
        }
        Map<LOLoad, DataBag> hashMap = new HashMap<>();
        DataBag newDefaultBag = BagFactory.getInstance().newDefaultBag();
        hashMap.put(lOLoad, newDefaultBag);
        for (Map.Entry<LOLoad, DataBag> entry : this.baseData.entrySet()) {
            if (entry.getKey() != lOLoad) {
                if (entry.getKey().getFileSpec().equals(lOLoad.getFileSpec())) {
                    hashMap.put(entry.getKey(), newDefaultBag);
                } else {
                    hashMap.put(entry.getKey(), entry.getValue());
                }
            }
        }
        if (checkNewBaseData(newDefaultBag, hashMap, hashSet)) {
            checkNewBaseData(newDefaultBag, hashMap, hashSet2);
        }
        this.inputToDataMap.put(lOLoad.getFileSpec(), this.baseData.get(lOLoad));
    }

    private boolean checkNewBaseData(DataBag dataBag, Map<LOLoad, DataBag> map, Set<Tuple> set) throws FrontendException {
        LinkedList linkedList = new LinkedList();
        DataBag newDefaultBag = BagFactory.getInstance().newDefaultBag();
        newDefaultBag.addAll(dataBag);
        double d = this.completeness;
        for (Tuple tuple : set) {
            dataBag.add(tuple);
            try {
                double completeness = MetricEvaluation.getCompleteness(null, this.eg.getData(map), this.eg.getLoToEqClassMap(), true);
                linkedList.add(new Pair(tuple, Double.valueOf(completeness)));
                if (completeness >= d) {
                    break;
                }
            } catch (Exception e) {
                throw new FrontendException("Exception: " + e.getMessage());
            }
        }
        Collections.sort(linkedList, new Comparator<Pair<Tuple, Double>>() { // from class: org.apache.pig.pen.LineageTrimmingVisitor.1
            @Override // java.util.Comparator
            public int compare(Pair<Tuple, Double> pair, Pair<Tuple, Double> pair2) {
                if (pair.second.doubleValue() > pair2.second.doubleValue()) {
                    return -1;
                }
                return pair.second == pair2.second ? 0 : 1;
            }
        });
        dataBag.clear();
        dataBag.addAll(newDefaultBag);
        Iterator it = linkedList.iterator();
        while (it.hasNext()) {
            dataBag.add((Tuple) ((Pair) it.next()).first);
            try {
                double completeness2 = MetricEvaluation.getCompleteness(null, this.eg.getData(map), this.eg.getLoToEqClassMap(), true);
                if (completeness2 >= this.completeness) {
                    this.completeness = completeness2;
                    this.baseData.putAll(map);
                    return false;
                }
            } catch (Exception e2) {
                throw new FrontendException("Exception: " + e2.getMessage());
            }
        }
        return true;
    }

    private void processOperator(LogicalRelationalOperator logicalRelationalOperator) throws FrontendException {
        try {
            if (logicalRelationalOperator instanceof LOLoad) {
                processLoad((LOLoad) logicalRelationalOperator);
                return;
            }
            this.continueTrimming = checkCompleteness(logicalRelationalOperator);
            if (this.plan.getPredecessors(logicalRelationalOperator) != null && this.continueTrimming) {
                Operator operator = this.plan.getPredecessors(logicalRelationalOperator).get(0);
                if ((logicalRelationalOperator instanceof LOForEach) && (operator instanceof LOCogroup)) {
                    for (Operator operator2 : ((LOCogroup) operator).getInputs(this.plan)) {
                        this.AffinityGroups.put(operator2, this.eg.getEqClasses());
                        this.Lineage.put(operator2, this.eg.getLineage());
                    }
                } else {
                    for (Operator operator3 : this.plan.getPredecessors(logicalRelationalOperator)) {
                        this.AffinityGroups.put(operator3, this.eg.getEqClasses());
                        this.Lineage.put(operator3, this.eg.getLineage());
                    }
                }
            }
        } catch (Exception e) {
            e.printStackTrace(System.out);
            throw new FrontendException("Exception: " + e.getMessage());
        }
    }

    private boolean checkCompleteness(LogicalRelationalOperator logicalRelationalOperator) throws Exception {
        LineageTracer lineageTracer = this.Lineage.get(logicalRelationalOperator);
        this.Lineage.remove(logicalRelationalOperator);
        Collection<IdentityHashSet<Tuple>> collection = this.AffinityGroups.get(logicalRelationalOperator);
        this.AffinityGroups.remove(logicalRelationalOperator);
        Map<LOLoad, DataBag> PruneBaseDataConstrainedCoverage = PruneBaseDataConstrainedCoverage(this.baseData, lineageTracer, collection);
        double completeness = MetricEvaluation.getCompleteness(null, this.eg.getData(PruneBaseDataConstrainedCoverage), this.eg.getLoToEqClassMap(), true);
        if (completeness >= this.completeness) {
            this.completeness = completeness;
            this.baseData.putAll(PruneBaseDataConstrainedCoverage);
        } else {
            this.continueTrimming = false;
        }
        return this.continueTrimming;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public Map<LOLoad, DataBag> getBaseData() {
        return this.baseData;
    }
}
