package com.github.bentorfs.ai.ml.reinforcement.qlearning;

import com.github.bentorfs.ai.common.FunctionLearner;
import com.github.bentorfs.ai.ml.reinforcement.qlearning.strategy.ActionSelectionStrategy;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/github/bentorfs/ai/ml/reinforcement/qlearning/QLearningAlgorithm.class */
public class QLearningAlgorithm {
    protected Logger logger = LoggerFactory.getLogger(getClass());
    private double discountFactor;
    private ActionSelectionStrategy actionSelectionStrategy;

    /* loaded from: input_file:com/github/bentorfs/ai/ml/reinforcement/qlearning/QLearningAlgorithm$ActionStrategy.class */
    public enum ActionStrategy {
        greedy,
        boltzmann
    }

    public QLearningAlgorithm(double d, ActionSelectionStrategy actionSelectionStrategy) {
        this.discountFactor = 0.9d;
        this.discountFactor = d;
        this.actionSelectionStrategy = actionSelectionStrategy;
    }

    public Episode learnFunction(FunctionLearner<Object, Double> functionLearner, State state) {
        Episode episode = new Episode();
        State state2 = state;
        while (true) {
            State state3 = state2;
            if (state3.isFinalState()) {
                this.logger.debug("Q-Learning experienced an episode");
                return episode;
            }
            State selectNextState = selectNextState(state3, functionLearner);
            ActionResult goToState = state3.goToState(selectNextState);
            double reward = goToState.getReward() + (this.discountFactor * getHighestQInState(functionLearner, selectNextState));
            functionLearner.showExample(Double.valueOf(reward), getAttributesList(state3, selectNextState));
            episode.addStep(state3, selectNextState, goToState.getReward());
            state2 = selectNextState;
        }
    }

    private double getHighestQInState(FunctionLearner<Object, Double> functionLearner, State state) {
        double d = Double.NEGATIVE_INFINITY;
        Iterator<State> it = state.getPossibleNextStates().iterator();
        while (it.hasNext()) {
            Double predictValue = functionLearner.predictValue(getAttributesList(state, it.next()));
            if (predictValue.doubleValue() > d) {
                d = predictValue.doubleValue();
            }
        }
        return d;
    }

    private State selectNextState(State state, FunctionLearner<Object, Double> functionLearner) {
        return this.actionSelectionStrategy.getNextState(state, state.getPossibleNextStates(), functionLearner);
    }

    private List<Object> getAttributesList(State state, State state2) {
        ArrayList arrayList = new ArrayList();
        arrayList.add(state);
        arrayList.add(state2);
        return arrayList;
    }
}
