package com.github.bentorfs.ai.ml.reinforcement.qlearning.strategy;

import com.github.bentorfs.ai.common.FunctionLearner;
import com.github.bentorfs.ai.ml.reinforcement.qlearning.State;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.math3.distribution.EnumeratedDistribution;
import org.apache.commons.math3.util.Pair;

/* loaded from: input_file:com/github/bentorfs/ai/ml/reinforcement/qlearning/strategy/BoltzmannStrategy.class */
public class BoltzmannStrategy implements ActionSelectionStrategy {
    private int temperature;

    public static ActionSelectionStrategy withTemperature(int i) {
        return new BoltzmannStrategy(i);
    }

    private BoltzmannStrategy(int i) {
        this.temperature = i;
    }

    @Override // com.github.bentorfs.ai.ml.reinforcement.qlearning.strategy.ActionSelectionStrategy
    public State getNextState(State state, List<State> list, FunctionLearner<Object, Double> functionLearner) {
        double highestQInState = getHighestQInState(functionLearner, state);
        HashMap hashMap = new HashMap();
        Double valueOf = Double.valueOf(0.0d);
        for (State state2 : list) {
            double pow = Math.pow(2.718281828459045d, (functionLearner.predictValue(getAttributesList(state, state2)).doubleValue() - highestQInState) / this.temperature);
            hashMap.put(state2, Double.valueOf(pow));
            valueOf = Double.valueOf(valueOf.doubleValue() + pow);
        }
        ArrayList arrayList = new ArrayList();
        for (State state3 : list) {
            arrayList.add(new Pair(state3, Double.valueOf(((Double) hashMap.get(state3)).doubleValue() / valueOf.doubleValue())));
        }
        return (State) new EnumeratedDistribution(arrayList).sample();
    }

    private double getHighestQInState(FunctionLearner<Object, Double> functionLearner, State state) {
        double d = Double.NEGATIVE_INFINITY;
        Iterator<State> it = state.getPossibleNextStates().iterator();
        while (it.hasNext()) {
            Double predictValue = functionLearner.predictValue(getAttributesList(state, it.next()));
            if (predictValue.doubleValue() > d) {
                d = predictValue.doubleValue();
            }
        }
        return d;
    }

    private List<Object> getAttributesList(State state, State state2) {
        ArrayList arrayList = new ArrayList();
        arrayList.add(state);
        arrayList.add(state2);
        return arrayList;
    }
}
