/*
 * Decompiled with CFR 0.152.
 */
package org.jsoar.kernel.learning.rl;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.ListIterator;
import java.util.Map;
import org.jsoar.kernel.Agent;
import org.jsoar.kernel.Decider;
import org.jsoar.kernel.PredefinedSymbols;
import org.jsoar.kernel.Production;
import org.jsoar.kernel.ProductionType;
import org.jsoar.kernel.learning.Chunker;
import org.jsoar.kernel.learning.rl.RLRuleInfo;
import org.jsoar.kernel.learning.rl.RLTemplateInfo;
import org.jsoar.kernel.learning.rl.ReinforcementLearningInfo;
import org.jsoar.kernel.learning.rl.ReinforcementLearningParams;
import org.jsoar.kernel.lhs.ComplexTest;
import org.jsoar.kernel.lhs.Condition;
import org.jsoar.kernel.lhs.GoalIdTest;
import org.jsoar.kernel.lhs.ImpasseIdTest;
import org.jsoar.kernel.lhs.PositiveCondition;
import org.jsoar.kernel.lhs.Test;
import org.jsoar.kernel.lhs.Tests;
import org.jsoar.kernel.lhs.ThreeFieldCondition;
import org.jsoar.kernel.memory.Instantiation;
import org.jsoar.kernel.memory.Preference;
import org.jsoar.kernel.memory.PreferenceType;
import org.jsoar.kernel.memory.RecognitionMemory;
import org.jsoar.kernel.memory.Slot;
import org.jsoar.kernel.memory.WmeImpl;
import org.jsoar.kernel.rete.ConditionsAndNots;
import org.jsoar.kernel.rete.ProductionAddResult;
import org.jsoar.kernel.rete.Rete;
import org.jsoar.kernel.rete.Token;
import org.jsoar.kernel.rhs.Action;
import org.jsoar.kernel.rhs.MakeAction;
import org.jsoar.kernel.rhs.ReordererException;
import org.jsoar.kernel.rhs.RhsSymbolValue;
import org.jsoar.kernel.symbols.IdentifierImpl;
import org.jsoar.kernel.symbols.StringSymbolImpl;
import org.jsoar.kernel.symbols.SymbolFactoryImpl;
import org.jsoar.kernel.symbols.SymbolImpl;
import org.jsoar.kernel.symbols.Symbols;
import org.jsoar.kernel.tracing.Printer;
import org.jsoar.kernel.tracing.Trace;
import org.jsoar.util.ByRef;
import org.jsoar.util.DefaultSourceLocation;
import org.jsoar.util.SourceLocation;
import org.jsoar.util.adaptables.Adaptable;
import org.jsoar.util.adaptables.Adaptables;
import org.jsoar.util.markers.DefaultMarker;
import org.jsoar.util.properties.PropertyChangeEvent;
import org.jsoar.util.properties.PropertyListener;
import org.jsoar.util.properties.PropertyManager;

public class ReinforcementLearning {
    private final PropertyManager properties;
    private final ReinforcementLearningParams params;
    private static final SourceLocation NEW_PRODUCTION_SOURCE = DefaultSourceLocation.newBuilder().file("*RL*").build();
    private int rl_template_count;
    private final Agent my_agent;
    private final Adaptable myContext;
    private SymbolFactoryImpl syms;
    private Decider decider;
    private Chunker chunker;
    private RecognitionMemory recMemory;
    private Rete rete;
    private PredefinedSymbols preSyms;
    private Trace trace;
    private Printer printer;

    public ReinforcementLearning(Adaptable context) {
        this.myContext = context;
        this.properties = ((Agent)this.myContext).getProperties();
        this.params = new ReinforcementLearningParams(this.properties, this.syms);
        this.my_agent = (Agent)this.myContext;
    }

    public ReinforcementLearningParams getParams() {
        return this.params;
    }

    public void initialize() {
        this.syms = Adaptables.require(this.getClass(), this.myContext, SymbolFactoryImpl.class);
        this.decider = Adaptables.require(this.getClass(), this.myContext, Decider.class);
        this.chunker = Adaptables.require(this.getClass(), this.myContext, Chunker.class);
        this.recMemory = Adaptables.require(this.getClass(), this.myContext, RecognitionMemory.class);
        this.rete = Adaptables.require(this.getClass(), this.myContext, Rete.class);
        this.preSyms = Adaptables.require(this.getClass(), this.myContext, PredefinedSymbols.class);
        this.trace = Adaptables.require(this.getClass(), this.myContext, Trace.class);
        this.printer = Adaptables.require(this.getClass(), this.myContext, Printer.class);
        this.properties.addListener(ReinforcementLearningParams.LEARNING, new PropertyListener<ReinforcementLearningParams.Learning>(){

            @Override
            public void propertyChanged(PropertyChangeEvent<ReinforcementLearningParams.Learning> event) {
                if (event.getNewValue() == ReinforcementLearningParams.Learning.off) {
                    ReinforcementLearning.this.rl_reset_data();
                }
            }
        });
        this.rl_initialize_template_tracking();
    }

    public boolean rl_enabled() {
        return this.params.learning.get() == ReinforcementLearningParams.Learning.on;
    }

    private void rl_reset_data() {
        IdentifierImpl goal = this.decider.top_goal;
        while (goal != null) {
            ReinforcementLearningInfo data = goal.goalInfo.rl_info;
            data.eligibility_traces.clear();
            data.prev_op_rl_rules.clear();
            data.previous_q = 0.0;
            data.reward = 0.0;
            data.gap_age = 0L;
            data.hrl_age = 0L;
            goal = goal.goalInfo.lower_goal;
        }
    }

    private void rl_remove_refs_for_prod(Production prod) {
        IdentifierImpl state = this.decider.top_state;
        while (state != null) {
            state.goalInfo.rl_info.eligibility_traces.remove(prod);
            ListIterator it = state.goalInfo.rl_info.prev_op_rl_rules.listIterator();
            while (it.hasNext()) {
                Production c = (Production)it.next();
                if (c != prod) continue;
                it.set(null);
            }
            state = state.goalInfo.lower_goal;
        }
    }

    static boolean rl_valid_template(Production prod) {
        boolean numeric_pref = false;
        boolean var_pref = false;
        int num_actions = 0;
        Action a = prod.getFirstAction();
        while (a != null) {
            ++num_actions;
            MakeAction ma = a.asMakeAction();
            if (ma != null) {
                RhsSymbolValue asSym;
                if (a.preference_type == PreferenceType.NUMERIC_INDIFFERENT) {
                    numeric_pref = true;
                } else if (a.preference_type == PreferenceType.BINARY_INDIFFERENT && (asSym = ma.referent.asSymbolValue()) != null && asSym.getSym().asVariable() != null) {
                    var_pref = true;
                }
            }
            a = a.next;
        }
        return num_actions == 1 && (numeric_pref || var_pref);
    }

    private static boolean rl_valid_rule(Production prod) {
        boolean numeric_pref = false;
        int num_actions = 0;
        Action a = prod.getFirstAction();
        while (a != null) {
            ++num_actions;
            MakeAction ma = a.asMakeAction();
            if (ma != null && a.preference_type == PreferenceType.NUMERIC_INDIFFERENT) {
                numeric_pref = true;
            }
            a = a.next;
        }
        return numeric_pref && num_actions == 1;
    }

    private static boolean is_natural_number(String s) {
        for (int i = 0; i < s.length(); ++i) {
            if (Character.isDigit(s.charAt(i))) continue;
            return false;
        }
        return true;
    }

    private static int rl_get_template_id(String prod_name) {
        String temp = prod_name;
        if (temp.length() < 6) {
            return -1;
        }
        if (!temp.startsWith("rl*")) {
            return -1;
        }
        int last_star = temp.lastIndexOf(42);
        if (last_star == -1) {
            return -1;
        }
        if (last_star == temp.length() - 1) {
            return -1;
        }
        String id_str = temp.substring(last_star + 1);
        if (!ReinforcementLearning.is_natural_number(id_str)) {
            return -1;
        }
        return Integer.parseInt(id_str);
    }

    public void rl_initialize_template_tracking() {
        this.rl_template_count = 1;
    }

    private void rl_update_template_tracking(String rule_name) {
        int new_id = ReinforcementLearning.rl_get_template_id(rule_name);
        if (new_id != -1 && new_id > this.rl_template_count) {
            this.rl_template_count = new_id + 1;
        }
    }

    private int rl_next_template_id() {
        return this.rl_template_count++;
    }

    private void rl_revert_template_id() {
        --this.rl_template_count;
    }

    private static void rl_get_symbol_constant(SymbolImpl p_sym, SymbolImpl i_sym, Map<SymbolImpl, SymbolImpl> constants) {
        if (p_sym.asVariable() != null && (i_sym.asIdentifier() == null || i_sym.asIdentifier().smem_lti != 0L)) {
            constants.put(p_sym, i_sym);
        }
    }

    private static void rl_get_test_constant(Test p_test, Test i_test, Map<SymbolImpl, SymbolImpl> constants) {
        if (Tests.isBlank(p_test)) {
            return;
        }
        if (p_test.asEqualityTest() != null) {
            ReinforcementLearning.rl_get_symbol_constant(p_test.asEqualityTest().getReferent(), i_test.asEqualityTest().getReferent(), constants);
            return;
        }
    }

    private void rl_get_template_constants(Condition p_conds, Condition i_conds, Map<SymbolImpl, SymbolImpl> constants) {
        Condition p_cond = p_conds;
        Condition i_cond = i_conds;
        while (p_cond != null) {
            ThreeFieldCondition tfc = p_cond.asThreeFieldCondition();
            if (tfc != null) {
                ReinforcementLearning.rl_get_test_constant(tfc.id_test, i_cond.asThreeFieldCondition().id_test, constants);
                ReinforcementLearning.rl_get_test_constant(tfc.attr_test, i_cond.asThreeFieldCondition().attr_test, constants);
                ReinforcementLearning.rl_get_test_constant(tfc.value_test, i_cond.asThreeFieldCondition().value_test, constants);
            } else if (p_cond.asConjunctiveNegationCondition() != null) {
                this.rl_get_template_constants(p_cond.asConjunctiveNegationCondition().top, i_cond.asConjunctiveNegationCondition().top, constants);
            }
            p_cond = p_cond.next;
            i_cond = i_cond.next;
        }
    }

    public SymbolImpl rl_build_template_instantiation(Instantiation my_template_instance, Token tok, WmeImpl w) {
        StringSymbolImpl return_val = null;
        RLTemplateInfo rlInfo = my_template_instance.prod.rlTemplateInfo;
        if (rlInfo.rl_template_conds == null) {
            ConditionsAndNots cans = this.rete.p_node_to_conditions_and_nots(my_template_instance.prod.getReteNode(), null, null, false);
            rlInfo.rl_template_conds = cans.top;
        }
        if (rlInfo.rl_template_instantiations == null) {
            rlInfo.rl_template_instantiations = new HashSet<Map<SymbolImpl, SymbolImpl>>();
        }
        HashMap<SymbolImpl, SymbolImpl> constant_map = new HashMap<SymbolImpl, SymbolImpl>();
        this.rl_get_template_constants(rlInfo.rl_template_conds, my_template_instance.top_of_instantiated_conditions, constant_map);
        if (rlInfo.rl_template_instantiations.add(constant_map)) {
            int new_id;
            Production my_template = my_template_instance.prod;
            Action my_action = my_template.getFirstAction();
            String new_name = "";
            do {
                new_id = this.rl_next_template_id();
            } while (this.syms.findString(new_name = "rl*" + my_template.getName() + "*" + new_id) != null);
            StringSymbolImpl new_name_symbol = this.syms.createString(new_name);
            ByRef<Object> cond_top = ByRef.create(null);
            ByRef<Object> cond_bottom = ByRef.create(null);
            Condition.copy_condition_list(my_template_instance.top_of_instantiated_conditions, cond_top, cond_bottom);
            this.rl_add_goal_or_impasse_tests_to_conds((Condition)cond_top.value);
            this.syms.getVariableGenerator().reset((Condition)cond_top.value, null);
            this.chunker.variablization_tc = DefaultMarker.create();
            this.chunker.variablize_condition_list((Condition)cond_top.value);
            this.chunker.variablize_nots_and_insert_into_conditions(my_template_instance.nots, (Condition)cond_top.value);
            IdentifierImpl id = this.recMemory.instantiate_rhs_value(my_action.asMakeAction().id, -1, 's', tok, w).asIdentifier();
            SymbolImpl attr = this.recMemory.instantiate_rhs_value(my_action.asMakeAction().attr, id.level, 'a', tok, w);
            char first_letter = attr.getFirstLetter();
            SymbolImpl value = this.recMemory.instantiate_rhs_value(my_action.asMakeAction().value, id.level, first_letter, tok, w);
            SymbolImpl referent = this.recMemory.instantiate_rhs_value(my_action.asMakeAction().referent, id.level, first_letter, tok, w);
            MakeAction new_action = this.rl_make_simple_action(id, attr, value, referent);
            new_action.preference_type = PreferenceType.NUMERIC_INDIFFERENT;
            Production new_production = Production.newBuilder().type(ProductionType.USER).location(NEW_PRODUCTION_SOURCE).name(((Object)new_name_symbol).toString()).conditions((Condition)cond_top.value, (Condition)cond_bottom.value).actions(new_action).build();
            new_production.rlRuleInfo = new RLRuleInfo();
            double init_value = 0.0;
            if (referent.asInteger() != null) {
                init_value = referent.asInteger().getValue();
            } else if (referent.asDouble() != null) {
                init_value = referent.asDouble().getValue();
            }
            new_production.rlRuleInfo.rl_ecr = 0.0;
            new_production.rlRuleInfo.rl_efr = init_value;
            try {
                if (this.my_agent.getProductions().addProduction(new_production, false) == ProductionAddResult.DUPLICATE_PRODUCTION) {
                    this.rl_revert_template_id();
                    new_name_symbol = null;
                }
            }
            catch (ReordererException e) {
                e.printStackTrace();
            }
            return_val = new_name_symbol;
        }
        return return_val;
    }

    private MakeAction rl_make_simple_action(IdentifierImpl id_sym, SymbolImpl attr_sym, SymbolImpl val_sym, SymbolImpl ref_sym) {
        MakeAction rhs = new MakeAction();
        rhs.id = this.chunker.variablize_symbol(id_sym).toRhsValue();
        rhs.attr = this.chunker.variablize_symbol(attr_sym).toRhsValue();
        rhs.value = this.chunker.variablize_symbol(val_sym).toRhsValue();
        rhs.referent = this.chunker.variablize_symbol(ref_sym).toRhsValue();
        return rhs;
    }

    private void rl_add_goal_or_impasse_tests_to_conds(Condition all_conds) {
        DefaultMarker tc = DefaultMarker.create();
        Condition cond = all_conds;
        while (cond != null) {
            IdentifierImpl id;
            PositiveCondition pc = cond.asPositiveCondition();
            if (pc != null && (id = pc.id_test.asEqualityTest().getReferent().asIdentifier()).isGoal() && id.tc_number != tc) {
                ComplexTest ct = id.isGoal() ? GoalIdTest.INSTANCE : ImpasseIdTest.INSTANCE;
                pc.id_test = Tests.add_new_test_to_test(pc.id_test, ct);
                id.tc_number = tc;
            }
            cond = cond.next;
        }
    }

    public void rl_tabulate_reward_value_for_goal(IdentifierImpl goal) {
        ReinforcementLearningInfo data = goal.goalInfo.rl_info;
        if (!data.prev_op_rl_rules.isEmpty()) {
            Slot s = Slot.find_slot(goal.goalInfo.reward_header, this.preSyms.rl_sym_reward);
            double reward = 0.0;
            double discount_rate = this.params.discount_rate.get();
            if (s != null) {
                WmeImpl w = s.getWmes();
                while (w != null) {
                    Slot t;
                    if (w.value.asIdentifier() != null && (t = Slot.find_slot(w.value.asIdentifier(), this.preSyms.rl_sym_value)) != null) {
                        WmeImpl x = t.getWmes();
                        while (x != null) {
                            if (x.value.asDouble() != null) {
                                reward += x.value.asDouble().getValue();
                            } else if (x.value.asInteger() != null) {
                                reward += (double)x.value.asInteger().getValue();
                            }
                            x = x.next;
                        }
                    }
                    w = w.next;
                }
                long effective_age = data.hrl_age;
                if (this.params.temporal_discount.get() == ReinforcementLearningParams.TemporalDiscount.on) {
                    effective_age += data.gap_age;
                }
                data.reward += reward * Math.pow(discount_rate, effective_age);
            }
            if (goal != this.decider.bottom_goal && this.params.hrl_discount.get() == ReinforcementLearningParams.HrlDiscount.on) {
                ++data.hrl_age;
            }
        }
    }

    public void rl_tabulate_reward_values() {
        IdentifierImpl goal = this.decider.top_goal;
        while (goal != null) {
            this.rl_tabulate_reward_value_for_goal(goal);
            goal = goal.goalInfo.lower_goal;
        }
    }

    public void rl_store_data(IdentifierImpl goal, Preference cand) {
        ReinforcementLearningInfo data = goal.goalInfo.rl_info;
        SymbolImpl op = cand.value;
        data.previous_q = cand.numeric_value;
        boolean using_gaps = this.params.temporal_extension.get() == ReinforcementLearningParams.TemporalExtension.on;
        int just_fired = 0;
        Preference pref = goal.goalInfo.operator_slot.getPreferencesByType(PreferenceType.NUMERIC_INDIFFERENT);
        while (pref != null) {
            if (op == pref.value && pref.inst.prod.rlRuleInfo != null) {
                if (just_fired == 0 && !data.prev_op_rl_rules.isEmpty()) {
                    data.prev_op_rl_rules.clear();
                }
                data.prev_op_rl_rules.push(pref.inst.prod);
                ++just_fired;
            }
            pref = pref.next;
        }
        if (just_fired != 0) {
            data.previous_q = cand.numeric_value;
        } else {
            if (this.trace.isEnabled(Trace.Category.RL) && using_gaps && data.gap_age == 0L && !data.prev_op_rl_rules.isEmpty()) {
                this.trace.startNewLine().print(Trace.Category.RL, "gap started (%s)", goal);
            }
            if (!using_gaps) {
                if (!data.prev_op_rl_rules.isEmpty()) {
                    data.prev_op_rl_rules.clear();
                }
                data.previous_q = cand.numeric_value;
            } else if (!data.prev_op_rl_rules.isEmpty()) {
                ++data.gap_age;
            }
        }
    }

    public void rl_perform_update(double op_value, boolean op_rl, IdentifierImpl goal) {
        this.rl_perform_update(op_value, op_rl, goal, true);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void rl_perform_update(double op_value, boolean op_rl, IdentifierImpl goal, boolean update_efr) {
        boolean using_gaps;
        boolean bl = using_gaps = this.params.temporal_extension.get() == ReinforcementLearningParams.TemporalExtension.on;
        if (!using_gaps || op_rl) {
            ReinforcementLearningInfo data = goal.goalInfo.rl_info;
            if (!data.prev_op_rl_rules.isEmpty()) {
                double alpha = this.params.learning_rate.get();
                double lambda = this.params.et_decay_rate.get();
                double gamma = this.params.discount_rate.get();
                double tolerance = this.params.et_tolerance.get();
                double theta = this.params.meta_learning_rate.get();
                long effective_age = data.hrl_age + 1L;
                if (this.params.temporal_discount.get() == ReinforcementLearningParams.TemporalDiscount.on) {
                    effective_age += data.gap_age;
                }
                double discount = Math.pow(gamma, effective_age);
                if (data.gap_age != 0L && using_gaps && this.trace.isEnabled(Trace.Category.RL)) {
                    this.trace.startNewLine().print("gap ended (%s)", goal);
                }
                if (lambda == 0.0) {
                    if (!data.eligibility_traces.isEmpty()) {
                        data.eligibility_traces.clear();
                    }
                } else {
                    Iterator<Map.Entry<Production, Double>> it = data.eligibility_traces.entrySet().iterator();
                    while (it.hasNext()) {
                        Map.Entry<Production, Double> e = it.next();
                        e.setValue(e.getValue() * lambda * discount);
                        if (!(e.getValue() < tolerance)) continue;
                        it.remove();
                    }
                }
                double sum_old_ecr = 0.0;
                double sum_old_efr = 0.0;
                if (!data.prev_op_rl_rules.isEmpty()) {
                    double trace_increment = 1.0 / (double)data.prev_op_rl_rules.size();
                    for (Production p : data.prev_op_rl_rules) {
                        if (p == null) continue;
                        sum_old_ecr += p.rlRuleInfo.rl_ecr;
                        sum_old_efr += p.rlRuleInfo.rl_efr;
                        Double old = data.eligibility_traces.get(p);
                        if (old != null) {
                            data.eligibility_traces.put(p, old + trace_increment);
                            continue;
                        }
                        data.eligibility_traces.put(p, trace_increment);
                    }
                }
                double delta_t = data.reward + discount * op_value - (sum_old_ecr + sum_old_efr);
                for (Map.Entry<Production, Double> iter : data.eligibility_traces.entrySet()) {
                    double adjusted_alpha;
                    Production prod = iter.getKey();
                    assert (prod.rlRuleInfo != null);
                    double old_ecr = prod.rlRuleInfo.rl_ecr;
                    double old_efr = prod.rlRuleInfo.rl_efr;
                    switch ((ReinforcementLearningParams.DecayMode)((Object)this.params.decay_mode.get())) {
                        case exponential_decay: {
                            adjusted_alpha = 1.0 / (prod.rlRuleInfo.rl_update_count + 1.0);
                            break;
                        }
                        case logarithmic_decay: {
                            adjusted_alpha = 1.0 / (Math.log(prod.rlRuleInfo.rl_update_count + 1.0) + 1.0);
                            break;
                        }
                        case delta_bar_delta_decay: {
                            prod.rlRuleInfo.rl_delta_bar_delta_beta += theta * delta_t * 1.0 * prod.rlRuleInfo.rl_delta_bar_delta_h;
                            adjusted_alpha = Math.exp(prod.rlRuleInfo.rl_delta_bar_delta_beta);
                            double decay_term = 1.0 - adjusted_alpha * 1.0 * 1.0;
                            if (decay_term < 0.0) {
                                decay_term = 0.0;
                            }
                            prod.rlRuleInfo.rl_delta_bar_delta_h = prod.rlRuleInfo.rl_delta_bar_delta_h * decay_term + adjusted_alpha * delta_t * 1.0;
                            break;
                        }
                        default: {
                            adjusted_alpha = alpha;
                        }
                    }
                    double delta_ecr = adjusted_alpha * iter.getValue() * (data.reward - sum_old_ecr);
                    double delta_efr = update_efr ? adjusted_alpha * iter.getValue() * (discount * op_value - sum_old_efr) : 0.0;
                    double new_ecr = old_ecr + delta_ecr;
                    double new_efr = old_efr + delta_efr;
                    double new_combined = new_ecr + new_efr;
                    if (this.trace.isEnabled(Trace.Category.RL)) {
                        String ss = "RL update " + prod.getName() + " " + old_ecr + " " + old_efr + " " + (old_ecr + old_efr) + " -> " + new_ecr + " " + new_efr + " " + new_combined + "\n";
                        this.trace.startNewLine().print(ss);
                        String log_path = this.params.update_log_path.get();
                        if (!log_path.isEmpty()) {
                            File log = new File(log_path);
                            BufferedWriter writer = null;
                            try {
                                writer = new BufferedWriter(new FileWriter(log));
                                writer.write(String.format("%s%n", ss));
                            }
                            catch (IOException e) {
                                e.printStackTrace();
                            }
                            finally {
                                try {
                                    writer.close();
                                }
                                catch (IOException e) {
                                    e.printStackTrace();
                                }
                            }
                        }
                    }
                    prod.getFirstAction().asMakeAction().referent = this.syms.createDouble(new_combined).toRhsValue();
                    prod.rlRuleInfo.rl_update_count += 1.0;
                    prod.rlRuleInfo.rl_ecr = new_ecr;
                    prod.rlRuleInfo.rl_efr = new_efr;
                    if (this.params.meta.get() == ReinforcementLearningParams.Meta.on) {
                        StringBuilder builder = new StringBuilder();
                        builder.append(String.format("%s=%f;", "rl-updates", prod.rlRuleInfo.rl_update_count));
                        builder.append(String.format("%s=%f;", "delta-bar-delta-h", prod.rlRuleInfo.rl_delta_bar_delta_h));
                        prod.setDocumentation(builder.toString());
                    }
                    Instantiation inst = prod.instantiations;
                    while (inst != null) {
                        Preference pref = inst.preferences_generated;
                        while (pref != null) {
                            pref.referent = this.syms.createDouble(new_combined);
                            pref = pref.inst_next;
                        }
                        inst = inst.nextInProdList;
                    }
                }
            }
            data.gap_age = 0L;
            data.hrl_age = 0L;
            data.reward = 0.0;
        }
    }

    public static void rl_watkins_clear(IdentifierImpl goal) {
        goal.goalInfo.rl_info.eligibility_traces.clear();
    }

    public void addProduction(Production p) {
        p.rlRuleInfo = null;
        if (p.getType() != ProductionType.JUSTIFICATION && p.getType() != ProductionType.TEMPLATE && ReinforcementLearning.rl_valid_rule(p)) {
            p.rlRuleInfo = new RLRuleInfo();
            p.rlRuleInfo.rl_efr = Symbols.asDouble(p.getFirstAction().asMakeAction().referent.asSymbolValue().getSym());
        }
        this.rl_update_template_tracking(p.getName());
        if (p.getType() == ProductionType.TEMPLATE && !ReinforcementLearning.rl_valid_template(p)) {
            this.printer.print("Invalid Soar-RL template (%s)\n\n", p.getName());
            this.my_agent.getProductions().exciseProduction(p, false);
            return;
        }
        if (p != null && p.rlRuleInfo != null && p.getDocumentation() != null) {
            this.rl_rule_meta(p);
        }
    }

    private void rl_rule_meta(Production prod) {
        if (prod.getDocumentation() != null && this.params.meta.get() == ReinforcementLearningParams.Meta.on) {
            String doc = prod.getDocumentation();
            double rlUpdateDocVal = this.getDocParam("rl-updates", doc);
            if (rlUpdateDocVal != Double.NaN) {
                prod.rlRuleInfo.rl_update_count = rlUpdateDocVal;
            }
            double rlDeltaBarDeltaHVal = this.getDocParam("delta-bar-delta-h", doc);
            if (rlUpdateDocVal != Double.NaN) {
                prod.rlRuleInfo.rl_delta_bar_delta_h = rlDeltaBarDeltaHVal;
            }
        }
    }

    private double getDocParam(String name, String doc) {
        String search_term = name + "=";
        int begin_index = doc.indexOf(search_term);
        if (begin_index >= 0) {
            int end_index = doc.indexOf(";", begin_index += search_term.length());
            if (end_index >= 0) {
                String param_value_str = doc.substring(begin_index, end_index);
                return Double.parseDouble(param_value_str);
            }
            return Double.NaN;
        }
        return Double.NaN;
    }

    public void exciseProduction(Production prod) {
        if (prod.rlRuleInfo != null) {
            this.rl_remove_refs_for_prod(prod);
        }
    }
}

