[go: up one dir, main page]

Menu

[r74]: / trunk / rlgo / RlState.h  Maximize  Restore  History

Download this file

253 lines (197 with data), 5.6 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
//----------------------------------------------------------------------------
/** @file RlState.h
Learning state for a single timestep
*/
//----------------------------------------------------------------------------
#ifndef RLSTATE_H
#define RLSTATE_H
#include "RlActiveSet.h"
#include "RlUtils.h"
#include "SgBlackWhite.h"
#include "SgMove.h"
//----------------------------------------------------------------------------
/** Simple class containing state information for an individual time-step */
class RlState
{
public:
/** Types of policy, for determining on/off-policy status */
enum
{
POL_NONE, // No move selected
POL_BEST, // Best move selected (sets bestMove and bestEval)
POL_ON, // Always considered on-policy
POL_OFF, // Always considered off-policy
POL_TERMINAL // Special value for terminal states
};
RlState();
RlState(int timestep, SgBlackWhite colour);
/** Initialise this state */
void Initialise(int timestep, SgBlackWhite colour);
/** Uninitialise this state */
void Uninitialise();
/** Re-initialise this state */
void Reinitialise();
/** Resize active sets */
void Resize(int activesize);
/** Set this state to be a terminal state with specified reward */
void SetTerminal(RlFloat score);
/** Set the evaluation of this state */
void SetEval(RlFloat value);
/** Set the policy type */
void SetPolicyType(int policytype);
/** Set the move */
void SetMove(SgMove move);
/** Set the active features */
void SetActive(const RlActiveSet& active);
/** Check whether this state is on-policy */
bool OnPolicy() const;
/** Copy best moves and values from source state */
void CopyBest(const RlState& sourcestate);
//------------------------------------------------------------------------
/** Accessors */
bool Initialised() const
{
return m_timestep >= 0;
}
int TimeStep() const
{
SG_ASSERT(Initialised());
return m_timestep;
}
SgBlackWhite Colour() const
{
SG_ASSERT(Initialised());
return m_colour;
}
SgMove Move() const
{
SG_ASSERT(Initialised());
return m_move;
}
bool Evaluated() const
{
SG_ASSERT(Initialised());
return m_evaluated;
}
bool ActiveSet() const
{
SG_ASSERT(Initialised());
return m_activeSet;
}
bool Terminal() const
{
SG_ASSERT(Initialised());
return m_terminal;
}
const RlActiveSet& Active() const
{
SG_ASSERT(ActiveSet());
return m_active;
}
const RlFloat Reward() const
{
SG_ASSERT(Initialised());
return m_reward;
}
const RlFloat Eval() const
{
SG_ASSERT(Evaluated());
return m_eval;
}
SgMove BestMove() const
{
SG_ASSERT(Initialised());
SG_ASSERT(m_policyType == POL_BEST);
return m_bestMove;
}
RlFloat BestValue() const
{
SG_ASSERT(Initialised());
SG_ASSERT(m_policyType == POL_BEST);
return m_bestEval;
}
int PolicyType() const { return m_policyType; }
private:
void ClearBest();
/** Time in this state */
int m_timestep;
/** Colour to play in current state */
SgBlackWhite m_colour;
/** Selected move */
SgMove m_move;
/** Type of policy used to select move in this state */
int m_policyType;
/** Whether state has been evaluated yet */
bool m_evaluated;
/** Whether active features have been set yet */
bool m_activeSet;
/** Whether this is a terminal state */
bool m_terminal;
/** Active features in current state */
RlActiveSet m_active;
/** Reward received */
RlFloat m_reward;
/** The linear evaluation (unsquashed) of this state */
RlFloat m_eval;
/** Best move, if computed */
SgMove m_bestMove;
/** Value of best move, if computed */
RlFloat m_bestEval;
friend class RlEvaluator; // For setting best moves and values
};
inline void RlState::Initialise(int timestep, SgBlackWhite colour)
{
m_timestep = timestep;
m_colour = colour;
m_move = SG_NULLMOVE;
m_policyType = POL_NONE;
m_evaluated = false;
m_activeSet = false;
m_terminal = false;
// Active set is not cleared, for efficiency
m_reward = 0;
m_eval = 0;
// Best moves and values are not cleared, for efficiency
}
inline void RlState::Uninitialise()
{
m_timestep = -1;
m_colour = SG_EMPTY;
}
inline void RlState::Reinitialise()
{
Initialise(m_timestep, m_colour);
}
inline void RlState::SetMove(SgMove move)
{
#ifndef RL_REUSE
SG_ASSERT(m_move == SG_NULLMOVE);
#endif // RL_REUSE
m_move = move;
}
inline void RlState::SetActive(const RlActiveSet& active)
{
m_active = active;
m_activeSet = true;
}
inline void RlState::SetPolicyType(int type)
{
#ifndef RL_REUSE
SG_ASSERT(m_policyType == POL_NONE);
#endif // RL_REUSE
m_policyType = type;
}
inline void RlState::SetTerminal(RlFloat score)
{
m_terminal = true;
m_reward = score;
m_policyType = POL_TERMINAL;
}
inline void RlState::SetEval(RlFloat value)
{
// Allow value to be refreshed even if already evaluated
m_eval = value;
m_evaluated = true;
}
//----------------------------------------------------------------------------
#endif // RLSTATE_H