public class QLearner extends AbstractLearner implements MDPLearner, Resetable, org.springframework.beans.factory.InitializingBean, Serializable, Prototypeable
An implementation of the Q-learning algorithm. This algorithm is described in Watkins, J. C. H., Dayan, P., 1992. Q-learning. Machine Learning 8, 279-292.
Modifier and Type | Field and Description |
---|---|
protected ActionSelector |
actionSelector |
protected int |
bestAction
The best action for the current state
|
protected int |
currentState
The current state
|
protected double |
discountRate
The discount rate for future payoffs.
|
protected double |
initialQValue |
protected int |
lastActionChosen
The last action that was chosen.
|
protected double |
learningRate
The learning rate.
|
protected int |
numActions
The number of possible actions
|
protected int |
numStates
The number of possible states
|
protected int |
previousState
The previous state
|
protected cern.jet.random.engine.RandomEngine |
prng |
protected double[][] |
q
The matrix representing the estimated payoff of each possible action in
each possible state.
|
monitor
Constructor and Description |
---|
QLearner() |
QLearner(int numStates,
int numActions,
double learningRate,
double discountRate,
cern.jet.random.engine.RandomEngine prng) |
QLearner(cern.jet.random.engine.RandomEngine prng) |
Modifier and Type | Method and Description |
---|---|
int |
act()
Request that the learner perform an action.
|
void |
afterPropertiesSet() |
int |
bestAction(int state) |
void |
dumpState(DataWriter out)
Write out our state data to the specified data writer.
|
ActionSelector |
getActionSelector() |
double |
getDiscountRate() |
double |
getInitialQValue() |
int |
getLastActionChosen() |
double |
getLearningDelta()
Return a value indicative of the amount of learning that occured during the
last iteration.
|
double |
getLearningRate() |
int |
getNumberOfActions()
Get the number of different possible actions this learner can choose from
when it performs an action.
|
int |
getNumberOfStates() |
int |
getPreviousState() |
cern.jet.random.engine.RandomEngine |
getPrng() |
int |
getState() |
double |
getValueEstimate(int action) |
double[] |
getValueEstimates(int state) |
void |
initialise() |
double |
maxQ(int newState) |
void |
newState(double reward,
int newState)
The call-back after performing an action.
|
Object |
protoClone() |
void |
reset()
Reinitialise our state to the original settings.
|
void |
setActionSelector(ActionSelector actionSelector) |
void |
setDiscountRate(double discountRate) |
void |
setInitialQValue(double initialQValue) |
void |
setLearningRate(double learningRate) |
void |
setNumberOfActions(int numActions) |
void |
setNumberOfStates(int numStates) |
void |
setPrng(cern.jet.random.engine.RandomEngine prng) |
void |
setState(int newState) |
void |
setStatesAndActions(int numStates,
int numActions) |
String |
toString() |
protected void |
updateQ(double reward,
int newState) |
int |
worstAction(int state) |
monitor
protected int numStates
protected int numActions
protected double[][] q
protected double learningRate
protected double discountRate
protected int previousState
protected int currentState
protected int lastActionChosen
protected int bestAction
protected cern.jet.random.engine.RandomEngine prng
protected ActionSelector actionSelector
protected double initialQValue
public QLearner(int numStates, int numActions, double learningRate, double discountRate, cern.jet.random.engine.RandomEngine prng)
public QLearner(cern.jet.random.engine.RandomEngine prng)
public QLearner()
public Object protoClone()
protoClone
in interface Prototypeable
public void initialise()
public void setStatesAndActions(int numStates, int numActions)
public void setState(int newState)
public int getState()
public int act()
DiscreteLearner
act
in interface DiscreteLearner
public void newState(double reward, int newState)
MDPLearner
newState
in interface MDPLearner
reward
- The reward received from taking the most recently-selected
action.newState
- The new state encountered after taking the most
recently-selected action.protected void updateQ(double reward, int newState)
public double maxQ(int newState)
public int worstAction(int state)
public int bestAction(int state)
bestAction
in interface MDPLearner
public void reset()
Resetable
public void setDiscountRate(double discountRate)
public double getDiscountRate()
public int getLastActionChosen()
public double getLearningDelta()
Learner
getLearningDelta
in interface Learner
getLearningDelta
in class AbstractLearner
public void dumpState(DataWriter out)
Learner
dumpState
in interface Learner
dumpState
in class AbstractLearner
public int getNumberOfActions()
DiscreteLearner
getNumberOfActions
in interface DiscreteLearner
getNumberOfActions
in interface MDPLearner
public double getLearningRate()
public void setLearningRate(double learningRate)
public int getNumberOfStates()
getNumberOfStates
in interface MDPLearner
public void setNumberOfStates(int numStates)
public void setNumberOfActions(int numActions)
public int getPreviousState()
public cern.jet.random.engine.RandomEngine getPrng()
public void setPrng(cern.jet.random.engine.RandomEngine prng)
public ActionSelector getActionSelector()
public void setActionSelector(ActionSelector actionSelector)
public double getValueEstimate(int action)
public void setInitialQValue(double initialQValue)
public double getInitialQValue()
public double[] getValueEstimates(int state)
getValueEstimates
in interface MDPLearner
state
- The current state of the MDP.Copyright © 2014. All rights reserved.