/*
 * Decompiled with CFR 0.152.
 */
package org.jsoar.kernel.exploration;

import java.util.HashMap;
import java.util.LinkedList;
import java.util.ListIterator;
import java.util.Map;
import org.jsoar.kernel.Agent;
import org.jsoar.kernel.exploration.ExplorationParameter;
import org.jsoar.kernel.exploration.ExplorationValidateEpsilon;
import org.jsoar.kernel.exploration.ExplorationValidateTemperature;
import org.jsoar.kernel.exploration.ExplorationValueFunction;
import org.jsoar.kernel.learning.rl.ReinforcementLearning;
import org.jsoar.kernel.learning.rl.ReinforcementLearningParams;
import org.jsoar.kernel.memory.Preference;
import org.jsoar.kernel.memory.PreferenceType;
import org.jsoar.kernel.memory.Slot;
import org.jsoar.kernel.symbols.DoubleSymbolImpl;
import org.jsoar.kernel.symbols.IntegerSymbolImpl;
import org.jsoar.kernel.symbols.SymbolImpl;
import org.jsoar.kernel.tracing.Trace;
import org.jsoar.util.adaptables.Adaptables;

public class Exploration {
    private final Agent context;
    private ReinforcementLearning rl;
    private Policy userSelectMode = Policy.USER_SELECT_SOFTMAX;
    private boolean autoUpdate = false;
    private NumericIndifferentMode numeric_indifferent_mode = NumericIndifferentMode.NUMERIC_INDIFFERENT_MODE_SUM;
    private Map<String, ExplorationParameter> parameters = new HashMap<String, ExplorationParameter>();

    public Exploration(Agent context) {
        this.context = context;
        this.exploration_add_parameter(0.1, new ExplorationValidateEpsilon(), "epsilon");
        this.exploration_add_parameter(25.0, new ExplorationValidateTemperature(), "temperature");
    }

    public void initialize() {
        this.rl = Adaptables.adapt(this.context, ReinforcementLearning.class);
    }

    public boolean exploration_set_policy(String policy_name) {
        Policy policy = Policy.findPolicy(policy_name);
        if (policy != null) {
            return this.exploration_set_policy(policy);
        }
        return false;
    }

    public boolean exploration_set_policy(Policy policy) {
        if (policy != null) {
            this.userSelectMode = policy;
            return true;
        }
        return false;
    }

    public Policy exploration_get_policy() {
        return this.userSelectMode;
    }

    public boolean exploration_set_numeric_indifferent_mode(String mode_name) {
        NumericIndifferentMode mode = NumericIndifferentMode.findNumericIndifferentMode(mode_name);
        if (mode != null) {
            return this.exploration_set_numeric_indifferent_mode(mode);
        }
        return false;
    }

    public boolean exploration_set_numeric_indifferent_mode(NumericIndifferentMode mode) {
        if (mode != null) {
            this.numeric_indifferent_mode = mode;
            return true;
        }
        return false;
    }

    public NumericIndifferentMode exploration_get_numeric_indifferent_mode() {
        return this.numeric_indifferent_mode;
    }

    public ExplorationParameter exploration_add_parameter(double value, ExplorationValueFunction val_func, String name) {
        ExplorationParameter newbie = new ExplorationParameter();
        newbie.value = value;
        newbie.name = name;
        newbie.reduction_policy = ExplorationParameter.ReductionPolicy.EXPLORATION_REDUCTION_EXPONENTIAL;
        newbie.val_func = val_func;
        newbie.rates.put(ExplorationParameter.ReductionPolicy.EXPLORATION_REDUCTION_EXPONENTIAL, 1.0);
        newbie.rates.put(ExplorationParameter.ReductionPolicy.EXPLORATION_REDUCTION_LINEAR, 0.0);
        this.parameters.put(name, newbie);
        return newbie;
    }

    public double exploration_get_parameter_value(String parameter) {
        ExplorationParameter param = this.parameters.get(parameter);
        return param != null ? param.value : 0.0;
    }

    public boolean exploration_valid_parameter(String name) {
        ExplorationParameter param = this.parameters.get(name);
        return param != null;
    }

    public boolean exploration_valid_parameter_value(String name, double value) {
        ExplorationParameter param = this.parameters.get(name);
        if (param == null) {
            return false;
        }
        return param.val_func.call(value);
    }

    boolean exploration_valid_parameter_value(ExplorationParameter parameter, double value) {
        if (parameter != null) {
            return parameter.val_func.call(value);
        }
        return false;
    }

    public boolean exploration_set_parameter_value(String name, double value) {
        ExplorationParameter param = this.parameters.get(name);
        if (param == null) {
            return false;
        }
        param.value = value;
        return true;
    }

    boolean exploration_set_parameter_value(ExplorationParameter parameter, double value) {
        if (parameter != null) {
            parameter.value = value;
            return true;
        }
        return false;
    }

    public boolean exploration_get_auto_update() {
        return this.autoUpdate;
    }

    public boolean exploration_set_auto_update(boolean setting) {
        this.autoUpdate = setting;
        return true;
    }

    public void exploration_update_parameters() {
        if (this.exploration_get_auto_update()) {
            for (ExplorationParameter p : this.parameters.values()) {
                p.update();
            }
        }
    }

    public ExplorationParameter.ReductionPolicy exploration_get_reduction_policy(String parameter) {
        ExplorationParameter param = this.parameters.get(parameter);
        return param != null ? param.reduction_policy : null;
    }

    ExplorationParameter.ReductionPolicy exploration_get_reduction_policy(ExplorationParameter parameter) {
        return parameter != null ? parameter.reduction_policy : null;
    }

    public boolean exploration_valid_reduction_policy(String parameter, String policy_name) {
        ExplorationParameter param = this.parameters.get(parameter);
        if (param == null) {
            return false;
        }
        ExplorationParameter.ReductionPolicy policy = ExplorationParameter.ReductionPolicy.findPolicy(policy_name);
        return policy != null;
    }

    public boolean exploration_set_reduction_policy(String parameter, String policy_name) {
        ExplorationParameter param = this.parameters.get(parameter);
        if (param == null) {
            return false;
        }
        ExplorationParameter.ReductionPolicy policy = ExplorationParameter.ReductionPolicy.findPolicy(policy_name);
        if (policy == null) {
            return false;
        }
        param.reduction_policy = policy;
        return true;
    }

    public double exploration_get_reduction_rate(String parameter, String policy_name) {
        ExplorationParameter param = this.parameters.get(parameter);
        if (param == null) {
            return 0.0;
        }
        ExplorationParameter.ReductionPolicy policy = ExplorationParameter.ReductionPolicy.findPolicy(policy_name);
        if (policy == null) {
            return 0.0;
        }
        return param.getReductionRate(policy);
    }

    public boolean exploration_set_reduction_rate(String parameter, String policy_name, double reduction_rate) {
        ExplorationParameter param = this.parameters.get(parameter);
        if (param == null) {
            return false;
        }
        ExplorationParameter.ReductionPolicy policy = ExplorationParameter.ReductionPolicy.findPolicy(policy_name);
        if (policy == null) {
            return false;
        }
        return param.setReductionRate(policy, reduction_rate);
    }

    public Preference exploration_choose_according_to_policy(Slot s, Preference candidates) {
        Policy exploration_policy = this.exploration_get_policy();
        Preference cand = candidates;
        while (cand != null) {
            this.exploration_compute_value_of_candidate(cand, s, 0.0);
            cand = cand.next_candidate;
        }
        boolean my_rl_enabled = this.rl.rl_enabled();
        ReinforcementLearningParams.LearningPolicy my_learning_policy = my_rl_enabled ? this.context.getProperties().get(ReinforcementLearningParams.LEARNING_POLICY) : ReinforcementLearningParams.LearningPolicy.q;
        double top_value = candidates.numeric_value;
        boolean top_rl = candidates.rl_contribution;
        if (my_rl_enabled && my_learning_policy == ReinforcementLearningParams.LearningPolicy.q) {
            Preference cand2 = candidates;
            while (cand2 != null) {
                if (cand2.numeric_value > top_value) {
                    top_value = cand2.numeric_value;
                    top_rl = cand2.rl_contribution;
                }
                cand2 = cand2.next_candidate;
            }
        }
        Preference return_val = null;
        switch (exploration_policy) {
            case USER_SELECT_FIRST: {
                return_val = candidates;
                break;
            }
            case USER_SELECT_LAST: {
                return_val = candidates;
                while (return_val.next_candidate != null) {
                    return_val = return_val.next_candidate;
                }
                break;
            }
            case USER_SELECT_RANDOM: {
                return_val = this.exploration_randomly_select(candidates);
                break;
            }
            case USER_SELECT_SOFTMAX: {
                return_val = this.exploration_probabilistically_select(candidates);
                break;
            }
            case USER_SELECT_E_GREEDY: {
                return_val = this.exploration_epsilon_greedy_select(candidates);
                break;
            }
            case USER_SELECT_BOLTZMANN: {
                return_val = this.exploration_boltzmann_select(candidates);
            }
        }
        if (my_rl_enabled) {
            this.rl.rl_tabulate_reward_values();
            if (my_learning_policy == ReinforcementLearningParams.LearningPolicy.sarsa) {
                this.rl.rl_perform_update(return_val.numeric_value, return_val.rl_contribution, s.id);
            } else if (my_learning_policy == ReinforcementLearningParams.LearningPolicy.q) {
                this.rl.rl_perform_update(top_value, top_rl, s.id);
                if (return_val.numeric_value != top_value) {
                    ReinforcementLearning.rl_watkins_clear(s.id);
                }
            }
        }
        return return_val;
    }

    private Preference exploration_randomly_select(Preference candidates) {
        int cand_count = Preference.countCandidates(candidates);
        int chosen_num = this.context.getRandom().nextInt(cand_count);
        return Preference.getCandidate(candidates, chosen_num);
    }

    private Preference exploration_probabilistically_select(Preference candidates) {
        double total_probability = 0.0;
        Preference cand = candidates;
        while (cand != null) {
            if (cand.numeric_value > 0.0) {
                total_probability += cand.numeric_value;
            }
            cand = cand.next_candidate;
        }
        if (total_probability == 0.0) {
            return this.exploration_randomly_select(candidates);
        }
        double rn = this.context.getRandom().nextDouble();
        double selected_probability = rn * total_probability;
        double current_sum = 0.0;
        Preference cand2 = candidates;
        while (cand2 != null) {
            if (cand2.numeric_value > 0.0 && selected_probability <= (current_sum += cand2.numeric_value)) {
                return cand2;
            }
            cand2 = cand2.next_candidate;
        }
        return null;
    }

    Preference exploration_boltzmann_select(Preference candidates) {
        double t = this.exploration_get_parameter_value("temperature");
        double maxq = candidates.numeric_value;
        Preference c = candidates.next_candidate;
        while (c != null) {
            if (maxq < c.numeric_value) {
                maxq = c.numeric_value;
            }
            c = c.next_candidate;
        }
        double exptotal = 0.0;
        LinkedList<Double> expvals = new LinkedList<Double>();
        c = candidates;
        while (c != null) {
            double v = Math.exp((c.numeric_value - maxq) / t);
            expvals.add(v);
            exptotal += v;
            c = c.next_candidate;
        }
        Trace trace = this.context.getTrace();
        if (trace.isEnabled(Trace.Category.INDIFFERENT)) {
            ListIterator i = expvals.listIterator();
            c = candidates;
            while (c != null) {
                double prob = (Double)i.next() / exptotal;
                trace.print("\n Candidate %s:  ", c.value);
                trace.print("Value (Sum) = %f, (Prob) = %f", c.numeric_value, prob);
                c = c.next_candidate;
            }
        }
        double r = this.context.getRandom().nextDouble() * exptotal;
        double sum = 0.0;
        ListIterator i = expvals.listIterator();
        c = candidates;
        i = expvals.listIterator();
        while (c != null) {
            if ((sum += ((Double)i.next()).doubleValue()) >= r) {
                return c;
            }
            c = c.next_candidate;
        }
        return null;
    }

    private Preference exploration_epsilon_greedy_select(Preference candidates) {
        double epsilon = this.exploration_get_parameter_value("epsilon");
        Trace trace = this.context.getTrace();
        if (trace.isEnabled(Trace.Category.INDIFFERENT)) {
            Preference cand = candidates;
            while (cand != null) {
                trace.print("\n Candidate %s:  Value (Sum) = %f", cand.value, cand.numeric_value);
                cand = cand.next_candidate;
            }
        }
        if (this.context.getRandom().nextDouble() < epsilon) {
            return this.exploration_randomly_select(candidates);
        }
        return this.exploration_get_highest_q_value_pref(candidates);
    }

    private Preference exploration_get_highest_q_value_pref(Preference candidates) {
        Preference top_cand = candidates;
        double top_value = candidates.numeric_value;
        int num_max_cand = 0;
        Preference cand = candidates;
        while (cand != null) {
            if (cand.numeric_value > top_value) {
                top_value = cand.numeric_value;
                top_cand = cand;
                num_max_cand = 1;
            } else if (cand.numeric_value == top_value) {
                ++num_max_cand;
            }
            cand = cand.next_candidate;
        }
        if (num_max_cand == 1) {
            return top_cand;
        }
        int chosen_num = this.context.getRandom().nextInt(num_max_cand);
        Preference cand2 = candidates;
        while (cand2.numeric_value != top_value) {
            cand2 = cand2.next_candidate;
        }
        while (chosen_num != 0) {
            cand2 = cand2.next_candidate;
            --chosen_num;
            while (cand2.numeric_value != top_value) {
                cand2 = cand2.next_candidate;
            }
        }
        return cand2;
    }

    public void exploration_compute_value_of_candidate(Preference cand, Slot s, double default_value) {
        if (cand == null) {
            return;
        }
        cand.total_preferences_for_candidate = 0;
        cand.numeric_value = 0.0;
        cand.rl_contribution = false;
        Preference pref = s.getPreferencesByType(PreferenceType.NUMERIC_INDIFFERENT);
        while (pref != null) {
            if (cand.value == pref.value) {
                ++cand.total_preferences_for_candidate;
                cand.numeric_value += Exploration.get_number_from_symbol(pref.referent);
                if (pref.inst.prod.rlRuleInfo != null) {
                    cand.rl_contribution = true;
                }
            }
            pref = pref.next;
        }
        pref = s.getPreferencesByType(PreferenceType.BINARY_INDIFFERENT);
        while (pref != null) {
            if (cand.value == pref.value) {
                ++cand.total_preferences_for_candidate;
                cand.numeric_value += Exploration.get_number_from_symbol(pref.referent);
            }
            pref = pref.next;
        }
        if (cand.total_preferences_for_candidate == 0) {
            cand.numeric_value = default_value;
            cand.total_preferences_for_candidate = 1;
        }
        if (this.numeric_indifferent_mode == NumericIndifferentMode.NUMERIC_INDIFFERENT_MODE_AVG) {
            cand.numeric_value /= (double)cand.total_preferences_for_candidate;
        }
    }

    public static double get_number_from_symbol(SymbolImpl s) {
        DoubleSymbolImpl f = s.asDouble();
        if (f != null) {
            return f.getValue();
        }
        IntegerSymbolImpl i = s.asInteger();
        if (i != null) {
            return i.getValue();
        }
        return 0.0;
    }

    public static enum Policy {
        USER_SELECT_BOLTZMANN("boltzmann"),
        USER_SELECT_E_GREEDY("epsilon-greedy"),
        USER_SELECT_FIRST("first"),
        USER_SELECT_LAST("last"),
        USER_SELECT_RANDOM("random-uniform"),
        USER_SELECT_SOFTMAX("softmax");

        private final String policyName;

        private Policy(String policyName) {
            this.policyName = policyName;
        }

        public String getPolicyName() {
            return this.policyName;
        }

        public static Policy findPolicy(String policyName) {
            for (Policy p : Policy.values()) {
                if (!p.policyName.equals(policyName)) continue;
                return p;
            }
            return null;
        }
    }

    public static enum NumericIndifferentMode {
        NUMERIC_INDIFFERENT_MODE_AVG("avg"),
        NUMERIC_INDIFFERENT_MODE_SUM("sum");

        private final String modeName;

        private NumericIndifferentMode(String modeName) {
            this.modeName = modeName;
        }

        public String getModeName() {
            return this.modeName;
        }

        public static NumericIndifferentMode findNumericIndifferentMode(String modeName) {
            for (NumericIndifferentMode p : NumericIndifferentMode.values()) {
                if (!p.modeName.equals(modeName)) continue;
                return p;
            }
            return null;
        }
    }
}

