[go: up one dir, main page]

Menu

[r74]: / trunk / rlgo / RlAgent.h  Maximize  Restore  History

Download this file

278 lines (213 with data), 8.1 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
//----------------------------------------------------------------------------
/** @file RlAgent.h
An agent represents the top-level API for RLGO
*/
//----------------------------------------------------------------------------
#ifndef RLAGENT_H
#define RLAGENT_H
#include "RlActiveSet.h"
#include "RlFactory.h"
#include "RlHistory.h"
#include "RlSetup.h"
#include "RlTrace.h"
#include "SgBoardColor.h"
#include "SgDebug.h"
#include "SgRect.h"
#include <boost/filesystem/fstream.hpp>
#include <list>
#include <vector>
namespace bfs = boost::filesystem;
class RlAgentLogger;
class RlBinaryFeatures;
class RlWeightSet;
class RlEvaluator;
class RlHistory;
class RlLog;
class RlPolicy;
class RlSimulator;
class RlTrainer;
//----------------------------------------------------------------------------
/** Top-level API for RLGO
*/
class RlAgent : public RlAutoObject
{
public:
RlAgent(GoBoard& board,
RlPolicy* policy = 0,
RlEvaluator* evaluator = 0,
RlBinaryFeatures* featureset = 0,
RlWeightSet* weightset = 0,
RlHistory* history = 0,
RlTrainer* trainer = 0);
/** Load in the settings for this agent */
virtual void LoadSettings(std::istream& settings);
/** Initialise agent */
virtual void Initialise();
/** Begin a new game (only ignore for startup) */
virtual void NewGame();
/** End game by resignation or by normal termination.
Updates history and calls trainer. Returns result. */
virtual RlFloat EndGame(bool resign, bool train = true);
/** Choose epsilon-greedy move (before move is made) */
virtual SgMove SelectMove();
/** Update features after executing a move by either player
This does not touch the weights
@param move
@param colour
@param updateboard Whether move should be played on board */
virtual void Execute(SgMove move, SgBlackWhite colour,
bool updateboard = true);
/** Update features after undoing a move by either player
This does not touch the weights
@param updateboard Whether move should be undone on board */
virtual void Undo(bool updateboard = true);
/** Think during agent's time */
virtual void Think();
/** Ponder during opponent's time */
virtual void Ponder();
void Load(const bfs::path& filename);
void Save(const bfs::path& filename);
/** Accessor functions */
//@todo: improve encapsulation
GoBoard& Board() { return m_board; }
RlBinaryFeatures* GetFeatureSet() const { return m_featureSet;}
RlWeightSet* GetWeightSet() const { return m_weightSet;}
RlPolicy* GetPolicy() { return m_policy; } //@todo: const
RlEvaluator* GetEvaluator() { return m_evaluator; }
RlHistory* GetHistory() { return m_history; }
RlTrainer* GetTrainer() { return m_trainer; }
RlTrainer* GetTester() { return m_tester; }
int GetTimeStep() const { return m_timestep; }
RlState& GetState() { return m_history->GetState(m_timestep); }
RlAgentLogger* GetLog() { return m_log; }
void SetPolicy(RlPolicy* policy) { m_policy = policy; }
protected:
/** Set played move in state */
virtual void SetMove(SgMove move);
/** Set active features in state */
void SetActive();
/** Get the probability of winning the game after selecting move */
RlFloat GetProbability(SgMove move, SgBlackWhite colour) const;
/** Check value after move is below the resignation threshold */
bool CheckResign(RlFloat pwin) const;
/** Score final position */
RlFloat Score(bool resign) const;
/** Check whether agent is in training or testing phase */
bool Training() const
{
return m_trainingGames < 0 || m_numGames < m_trainingGames;
}
protected:
RlPolicy* m_policy;
RlEvaluator* m_evaluator;
RlBinaryFeatures* m_featureSet;
RlWeightSet* m_weightSet;
RlHistory* m_history;
RlTrainer* m_trainer;
RlTrainer* m_tester;
RlAgentLogger* m_log;
/** Only update weights for this number of games
(set to -1 to always update weights) */
int m_trainingGames;
/** Threshold at which to resign game */
RlFloat m_resignThreshold;
/** Whether to prune discovered features each game */
bool m_prune;
/** Current update timestep */
int m_timestep;
/** Current game number */
int m_numGames;
friend class RlAgentLogger;
};
//----------------------------------------------------------------------------
/** Agent class for dealing with real experience */
class RlRealAgent : public RlAgent
{
public:
DECLARE_OBJECT(RlRealAgent);
RlRealAgent(GoBoard& board,
RlPolicy* policy = 0,
RlEvaluator* evaluator = 0,
RlBinaryFeatures* featureset = 0,
RlWeightSet* weightset = 0,
RlHistory* history = 0,
RlTrainer* trainer = 0,
RlSimulator* simulator = 0);
/** Load in the settings for this agent */
virtual void LoadSettings(std::istream& settings);
/** Initialise agent */
virtual void Initialise();
/** Begin a new game */
virtual void NewGame();
/** End game by resignation or by normal termination.
Updates history and calls trainer. Returns result. */
virtual RlFloat EndGame(bool resign, bool train = true);
/** Choose move according to current policy. */
virtual SgMove SelectMove();
/** Update features after executing a move by either player
This does not touch the weights
@param move
@param colour
@param updateboard Whether move should be played on board */
virtual void Execute(SgMove move, SgBlackWhite colour,
bool updateboard = true);
/** Update features after undoing a move by either player
This does not touch the weights
@param updateboard Whether move should be undone on board */
virtual void Undo(bool updateboard = true);
/** Think during agent's time */
virtual void Think();
/** Ponder during opponent's time */
virtual void Ponder();
/** Accessor functions */
//@todo: improve encapsulation
RlSimulator* GetSimulator() { return m_simulator; }
protected:
/** Load and/or zero weights */
void InitWeights();
protected:
RlSimulator* m_simulator;
/** Weights to load on new game */
std::string m_weightFile;
enum
{
RL_NEVER_RESET = 0,
RL_RESET_ON_INIT = 1,
RL_RESET_ON_NEWGAME = 2
};
/** Whether to reset weights (to zero or weightfile) on init or new game */
int m_resetWeights;
/** Min and max values to reset weights */
RlFloat m_minWeight, m_maxWeight;
friend class RlAgentLog;
};
//----------------------------------------------------------------------------
/** Agent class for dealing with simulated experience */
class RlSimAgent : public RlAgent
{
public:
DECLARE_OBJECT(RlSimAgent);
RlSimAgent(GoBoard& board,
RlPolicy* policy = 0,
RlEvaluator* evaluator = 0,
RlBinaryFeatures* featureset = 0,
RlWeightSet* weightset = 0,
RlHistory* history = 0,
RlTrainer* trainer = 0);
/** End game by resignation or by normal termination.
Updates history and calls trainer. Returns result. */
virtual RlFloat EndGame(bool resign, bool train = true);
/** Update features after executing a move by either player
This does not touch the weights
@param move
@param colour
@param updateboard Whether move should be played on board */
virtual void Execute(SgMove move, SgBlackWhite colour,
bool updateboard = true);
/** Update features after undoing a move by either player
This does not touch the weights
@param updateboard Whether move should be undone on board */
virtual void Undo(bool updateboard = true);
};
//----------------------------------------------------------------------------
#endif // RLAGENT_H