[go: up one dir, main page]

File: Targets.h

package info (click to toggle)
timbl 6.10-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 3,088 kB
  • sloc: cpp: 17,211; ansic: 425; sh: 70; makefile: 63
file content (204 lines) | stat: -rw-r--r-- 7,601 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
/*
  Copyright (c) 1998 - 2024
  ILK   - Tilburg University
  CLST  - Radboud University
  CLiPS - University of Antwerp

  This file is part of timbl

  timbl is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 3 of the License, or
  (at your option) any later version.

  timbl is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, see <http://www.gnu.org/licenses/>.

  For questions and suggestions, see:
      https://github.com/LanguageMachines/timbl/issues
  or send mail to:
      lamasoftware (at ) science.ru.nl

*/
#ifndef TIMBL_TARGETS_H
#define TIMBL_TARGETS_H

#include <vector>
#include <map>
#include <unordered_map>
#include "unicode/unistr.h"
#include "timbl/MsgClass.h"
#include "ticcutils/Unicode.h"

namespace Hash {
  class UnicodeHash;
}

namespace Timbl {

  class ValueClass {
  public:
    ValueClass( const icu::UnicodeString& n, size_t i ):
      _name( n ), _index( i ), _frequency( 1 ) {};
    ValueClass( const ValueClass& ) = delete; // forbid copies
    ValueClass& operator=( const ValueClass& ) = delete; // forbid copies
    virtual ~ValueClass() {};
    void ValFreq( size_t f ){ _frequency = f; };
    void IncValFreq( int f ){ _frequency += f; };
    size_t ValFreq( ) const { return _frequency; };
    void incr_val_freq(){ ++_frequency; };
    void decr_val_freq(){ --_frequency; };
    size_t Index() const { return _index; };
    const icu::UnicodeString& name() const { return _name; };
    const std::string name_string() const { return TiCC::UnicodeToUTF8(_name);};
    // temporary for backward compatability
    const icu::UnicodeString& name_u() const { return _name; }; // HACK
    const std::string Name() const { return TiCC::UnicodeToUTF8(_name); }; // HACK
    // REMOVE ^^^^
    friend std::ostream& operator<<( std::ostream& os, ValueClass const *vc );
  protected:
    const icu::UnicodeString& _name;
    size_t _index;
    size_t _frequency;
  };

  class TargetValue: public ValueClass {
  public:
    TargetValue( const icu::UnicodeString&, size_t );
  };

  class Targets: public MsgClass {
    friend class MBLClass;
    friend class WClassDistribution;
    friend class ConfusionMatrix;
  public:
    explicit Targets( Hash::UnicodeHash *T ):
      target_hash( T ),
      is_reference(false)
    {};
    ~Targets() override;
    Targets& operator=( const Targets& );
    void init();
    TargetValue *add_value( const icu::UnicodeString&, int freq = 1 );
    TargetValue *add_value( size_t, int freq = 1 );
    TargetValue *Lookup( const icu::UnicodeString& ) const;
    TargetValue *ReverseLookup( size_t ) const;
    bool decrement_value( TargetValue * );
    bool increment_value( TargetValue * );
    TargetValue *MajorityClass() const;
    size_t EffectiveValues() const;
    size_t TotalValues() const;
    size_t num_of_values() const { return values_array.size(); };
    Hash::UnicodeHash *hash() const { return target_hash; };
  private:
    Hash::UnicodeHash *target_hash;
    std::vector<TargetValue *> values_array;
    std::unordered_map< size_t, TargetValue *> reverse_values;
    bool is_reference;
  };

  class Vfield{
    friend class ClassDistribution;
    friend class WClassDistribution;
    friend std::ostream& operator<<( std::ostream&, const Vfield& );
    friend std::ostream& operator<<( std::ostream&, const Vfield * );
  public:
    Vfield( const TargetValue *val, int freq, double w ):
      value(val), frequency(freq), weight(w) {};
    Vfield( const Vfield& in ):
      value(in.value), frequency(in.frequency), weight(in.weight) {};
    Vfield& operator=( const Vfield& ) = delete; // forbid copies
    ~Vfield(){};
    std::ostream& put( std::ostream& ) const;
    const TargetValue *Value() const { return value; };
    void Value( const TargetValue *t ){  value = t; };
    size_t Freq() const { return frequency; };
    void IncFreq( int inc=1 ) {  frequency += inc; };
    void AddFreq( int f ) {  frequency += f; weight += f; };
    void DecFreq() {  frequency -= 1; };
    double Weight() const { return weight; };
    void SetWeight( double w ){ weight = w; };
    size_t Index();
  protected:
    const TargetValue *value;
    size_t frequency;
    double weight;
  private:
  };

  class WClassDistribution;

  class ClassDistribution{
    friend std::ostream& operator<<( std::ostream&, const ClassDistribution& );
    friend std::ostream& operator<<( std::ostream&, const ClassDistribution * );
    friend class WClassDistribution;
  public:
    using VDlist = std::map<size_t, Vfield *>;
    using dist_iterator = VDlist::const_iterator;
    ClassDistribution( ): total_items(0) {};
    ClassDistribution( const ClassDistribution& );
    virtual ~ClassDistribution(){ clear(); };
    size_t totalSize() const{ return total_items; };
    size_t size() const{ return distribution.size(); };
    bool empty() const{ return distribution.empty(); };
    void clear();
    dist_iterator begin() const { return distribution.begin(); };
    dist_iterator end() const { return distribution.end(); };
    virtual const TargetValue* BestTarget( bool&, bool = false ) const;
    void Merge( const ClassDistribution& );
    virtual void SetFreq( const TargetValue *, int, double=1.0 );
    virtual bool IncFreq( const TargetValue *, size_t, double=1.0 );
    void DecFreq( const TargetValue * );
    static ClassDistribution *read_distribution( std::istream&,
						 Targets&,
						 bool );
    static ClassDistribution *read_distribution_hashed( std::istream&,
							Targets&,
							bool );
    const std::string DistToString() const;
    const std::string DistToStringW( int ) const;
    double Confidence( const TargetValue * ) const;
    virtual const std::string SaveHashed() const;
    virtual const std::string Save() const;
    bool ZeroDist() const { return total_items == 0; };
    double Entropy() const;
    ClassDistribution *to_VD_Copy( ) const;
    virtual WClassDistribution *to_WVD_Copy() const;
  protected:
    virtual void DistToString( std::string&, double=0 ) const;
    virtual void DistToStringWW( std::string&, int ) const;
    const TargetValue* BestTargetN( bool &, bool = false ) const;
    const TargetValue* BestTargetW( bool &, bool = false ) const;
    virtual ClassDistribution *clone( ) const {
      return new ClassDistribution(); };
    size_t total_items;
    VDlist distribution;
  };

  class WClassDistribution: public ClassDistribution {
  public:
    WClassDistribution(): ClassDistribution() {};
    const TargetValue* BestTarget( bool &, bool = false ) const override;
    void SetFreq( const TargetValue *, int, double ) override;
    bool IncFreq( const TargetValue *, size_t, double ) override;
    WClassDistribution *to_WVD_Copy( ) const override;
    const std::string SaveHashed() const override;
    const std::string Save() const override;
    void Normalize();
    void Normalize_1( double, const Targets& );
    void Normalize_2();
    void MergeW( const ClassDistribution&, double );
  private:
    void DistToString( std::string&, double=0 ) const override;
    void DistToStringWW( std::string&, int ) const override;
    WClassDistribution *clone() const override {
      return new WClassDistribution; };
  };

}
#endif // TINBL_TARGETS_H