package com.github.chen0040.rl.learning.actorcritic;

import com.github.chen0040.rl.actionselection.AbstractActionSelectionStrategy;
import com.github.chen0040.rl.actionselection.ActionSelectionStrategy;
import com.github.chen0040.rl.actionselection.ActionSelectionStrategyFactory;
import com.github.chen0040.rl.actionselection.GibbsSoftMaxActionSelectionStrategy;
import com.github.chen0040.rl.models.QModel;
import java.io.Serializable;
import java.util.Set;
import java.util.function.Function;

/* loaded from: input_file:com/github/chen0040/rl/learning/actorcritic/ActorCriticLearner.class */
public class ActorCriticLearner implements Cloneable, Serializable {
    protected QModel P;
    protected ActionSelectionStrategy actionSelectionStrategy;

    public Object clone() {
        ActorCriticLearner actorCriticLearner = new ActorCriticLearner();
        actorCriticLearner.copy(this);
        return actorCriticLearner;
    }

    public void copy(ActorCriticLearner actorCriticLearner) {
        this.P = (QModel) actorCriticLearner.P.clone();
        this.actionSelectionStrategy = (ActionSelectionStrategy) ((AbstractActionSelectionStrategy) actorCriticLearner.actionSelectionStrategy).clone();
    }

    public boolean equals(Object obj) {
        if (obj == null || !(obj instanceof ActorCriticLearner)) {
            return false;
        }
        ActorCriticLearner actorCriticLearner = (ActorCriticLearner) obj;
        return this.P.equals(actorCriticLearner.P) && this.actionSelectionStrategy.equals(actorCriticLearner.actionSelectionStrategy);
    }

    public ActorCriticLearner() {
    }

    public ActorCriticLearner(int i, int i2) {
        this(i, i2, 1.0d, 0.7d, 0.01d);
    }

    public int selectAction(int i, Set<Integer> set) {
        return this.actionSelectionStrategy.selectAction(i, this.P, set).getIndex();
    }

    public int selectAction(int i) {
        return selectAction(i, null);
    }

    public ActorCriticLearner(int i, int i2, double d, double d2, double d3) {
        this.P = new QModel(i, i2, d3);
        this.P.setAlpha(d);
        this.P.setGamma(d2);
        this.actionSelectionStrategy = new GibbsSoftMaxActionSelectionStrategy();
    }

    public void update(int i, int i2, int i3, double d, Function<Integer, Double> function) {
        update(i, i2, i3, null, d, function);
    }

    public void update(int i, int i2, int i3, Set<Integer> set, double d, Function<Integer, Double> function) {
        this.P.setQ(i, i2, this.P.getQ(i, i2) + (this.P.getAlpha(i, i2) * ((d + function.apply(Integer.valueOf(i3)).doubleValue()) - function.apply(Integer.valueOf(i)).doubleValue())));
    }

    public String getActionSelection() {
        return ActionSelectionStrategyFactory.serialize(this.actionSelectionStrategy);
    }

    public void setActionSelection(String str) {
        this.actionSelectionStrategy = ActionSelectionStrategyFactory.deserialize(str);
    }

    public QModel getP() {
        return this.P;
    }

    public void setP(QModel qModel) {
        this.P = qModel;
    }
}
