RLGO Code

Brought to you by: sliv
[r74]: / trunk / rlgo / RlState.h Maximize Restore History

253 lines (197 with data), 5.6 kB

//----------------------------------------------------------------------------
/** @file RlState.h
    Learning state for a single timestep
*/
//----------------------------------------------------------------------------

#ifndef RLSTATE_H
#define RLSTATE_H

#include "RlActiveSet.h"
#include "RlUtils.h"
#include "SgBlackWhite.h"
#include "SgMove.h"

//----------------------------------------------------------------------------
/** Simple class containing state information for an individual time-step */
class RlState
{
public:

    /** Types of policy, for determining on/off-policy status */
    enum
    {
        POL_NONE, // No move selected
        POL_BEST, // Best move selected (sets bestMove and bestEval)
        POL_ON,   // Always considered on-policy
        POL_OFF,  // Always considered off-policy
        POL_TERMINAL // Special value for terminal states
    };

    RlState();
    RlState(int timestep, SgBlackWhite colour);

    /** Initialise this state */
    void Initialise(int timestep, SgBlackWhite colour);

    /** Uninitialise this state */
    void Uninitialise();

    /** Re-initialise this state */
    void Reinitialise();

    /** Resize active sets */
    void Resize(int activesize);

    /** Set this state to be a terminal state with specified reward */
    void SetTerminal(RlFloat score);

    /** Set the evaluation of this state */
    void SetEval(RlFloat value);

    /** Set the policy type */
    void SetPolicyType(int policytype);

    /** Set the move */
    void SetMove(SgMove move);

    /** Set the active features */
    void SetActive(const RlActiveSet& active);

    /** Check whether this state is on-policy */
    bool OnPolicy() const;

    /** Copy best moves and values from source state */
    void CopyBest(const RlState& sourcestate);
    
    //------------------------------------------------------------------------
    /** Accessors */

    bool Initialised() const
    {
        return m_timestep >= 0;
    }

    int TimeStep() const 
    { 
        SG_ASSERT(Initialised());
        return m_timestep; 
    }
    
    SgBlackWhite Colour() const 
    { 
        SG_ASSERT(Initialised());
        return m_colour; 
    }
    
    SgMove Move() const 
    { 
        SG_ASSERT(Initialised());
        return m_move; 
    }
    
    bool Evaluated() const 
    { 
        SG_ASSERT(Initialised());
        return m_evaluated; 
    }
    
    bool ActiveSet() const
    {
        SG_ASSERT(Initialised());
        return m_activeSet;
    }
    
    bool Terminal() const 
    { 
        SG_ASSERT(Initialised());
        return m_terminal; 
    }
    
    const RlActiveSet& Active() const
    { 
        SG_ASSERT(ActiveSet());
        return m_active; 
    }

    const RlFloat Reward() const 
    { 
        SG_ASSERT(Initialised());
        return m_reward; 
    }
    
    const RlFloat Eval() const 
    { 
        SG_ASSERT(Evaluated());
        return m_eval; 
    }
    
    SgMove BestMove() const 
    { 
        SG_ASSERT(Initialised());
        SG_ASSERT(m_policyType == POL_BEST);
        return m_bestMove; 
    }
    
    RlFloat BestValue() const 
    { 
        SG_ASSERT(Initialised());
        SG_ASSERT(m_policyType == POL_BEST);
        return m_bestEval; 
    }
    
    int PolicyType() const { return m_policyType; }
    
private:
    
    void ClearBest();
                
    /** Time in this state */
    int m_timestep;

    /** Colour to play in current state */
    SgBlackWhite m_colour;
    
    /** Selected move */
    SgMove m_move;

    /** Type of policy used to select move in this state */
    int m_policyType;
    
    /** Whether state has been evaluated yet */
    bool m_evaluated;
    
    /** Whether active features have been set yet */
    bool m_activeSet;
    
    /** Whether this is a terminal state */
    bool m_terminal;

    /** Active features in current state */
    RlActiveSet m_active;

    /** Reward received */
    RlFloat m_reward;
    
    /** The linear evaluation (unsquashed) of this state */
    RlFloat m_eval;

    /** Best move, if computed */
    SgMove m_bestMove;
    
    /** Value of best move, if computed */
    RlFloat m_bestEval;

friend class RlEvaluator; // For setting best moves and values
};

inline void RlState::Initialise(int timestep, SgBlackWhite colour)
{
    m_timestep = timestep;
    m_colour = colour;
    m_move = SG_NULLMOVE;
    m_policyType = POL_NONE;
    m_evaluated = false;
    m_activeSet = false;
    m_terminal = false;
    // Active set is not cleared, for efficiency
    m_reward = 0;
    m_eval = 0;
    // Best moves and values are not cleared, for efficiency
}

inline void RlState::Uninitialise()
{
    m_timestep = -1;
    m_colour = SG_EMPTY;
}

inline void RlState::Reinitialise()
{
    Initialise(m_timestep, m_colour);
}

inline void RlState::SetMove(SgMove move)
{
#ifndef RL_REUSE
    SG_ASSERT(m_move == SG_NULLMOVE);
#endif // RL_REUSE
    m_move = move;
}

inline void RlState::SetActive(const RlActiveSet& active)
{
    m_active = active;
    m_activeSet = true;
}

inline void RlState::SetPolicyType(int type)
{
#ifndef RL_REUSE
    SG_ASSERT(m_policyType == POL_NONE);
#endif // RL_REUSE
    m_policyType = type;
}

inline void RlState::SetTerminal(RlFloat score)
{
    m_terminal = true;
    m_reward = score;
    m_policyType = POL_TERMINAL;
}

inline void RlState::SetEval(RlFloat value)
{
    // Allow value to be refreshed even if already evaluated
    m_eval = value;
    m_evaluated = true;
}

//----------------------------------------------------------------------------

#endif // RLSTATE_H