[go: up one dir, main page]

Menu

[r49]: / trunk / compress.e  Maximize  Restore  History

Download this file

247 lines (204 with data), 5.9 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
-- (c) Copyright 2006 Rapid Deployment Software - See License.txt
--
-- compression / decompression
export constant IL_MAGIC = 79, -- indicates an IL file
IL_VERSION = -- 10 (2.5 alpha) IL version number
-- 11 -- (2.5 beta)
12 -- 3.0.0 Open Source, no encryption
export constant IL_START = "YTREWQ\n"
-- Compressed / Decompress Euphoria objects
-- tags for various values:
export constant
I2B = 249, -- 2-byte signed integer follows
I3B = 250, -- 3-byte signed integer follows
I4B = 251, -- 4-byte signed integer follows
F4B = 252, -- 4-byte f.p. number follows
F8B = 253, -- 8-byte f.p. number follows
S1B = 254, -- sequence, 1-byte length follows, then elements
S4B = 255 -- sequence, 4-byte length follows, then elements
-- ranges for various sizes:
export constant
MIN1B = -2, -- minimum integer value stored in one byte
MAX1B = 246, -- maximum integer value (if no cache)
MIN2B = -power(2, 15),
MAX2B = power(2, 15)-1,
MIN3B = -power(2, 23),
MAX3B = power(2, 23)-1,
MIN4B = -power(2, 31)
constant COMP_CACHE_SIZE = 64 -- power of 2: number of large integers to cache
constant CACHE0 = 255-7-COMP_CACHE_SIZE -- just before cache
export integer max1b -- maximum integer value to store in one byte
max1b = CACHE0 + MIN1B
export function compress(object x)
-- Return the compressed representation of a Euphoria object
-- as a sequence of bytes (in memory).
-- The compression cache is not used. Decompression occurs in be_execute.c
sequence x4, s
if integer(x) then
if x >= MIN1B and x <= MAX1B then
return {x - MIN1B}
elsif x >= MIN2B and x <= MAX2B then
x -= MIN2B
return {I2B, and_bits(x, #FF), floor(x / #100)}
elsif x >= MIN3B and x <= MAX3B then
x -= MIN3B
return {I3B, and_bits(x, #FF), and_bits(floor(x / #100), #FF), floor(x / #10000)}
else
return I4B & int_to_bytes(x-MIN4B)
end if
elsif atom(x) then
-- floating point
x4 = atom_to_float32(x)
if x = float32_to_atom(x4) then
-- can represent as 4-byte float
return F4B & x4
else
return F8B & atom_to_float64(x)
end if
else
-- sequence
if length(x) <= 255 then
s = {S1B, length(x)}
else
s = S4B & int_to_bytes(length(x))
end if
for i = 1 to length(x) do
s &= compress(x[i])
end for
return s
end if
end function
sequence comp_cache -- recent large (over one byte) values
export procedure init_compress()
-- do this before a series of calls to fcompress() or fdecompress()
comp_cache = repeat({}, COMP_CACHE_SIZE)
end procedure
export procedure fcompress(integer f, object x)
-- Write the compressed representation of a Euphoria object
-- to disk as a sequence of bytes. A compression cache is used.
sequence x4, s
integer p
if integer(x) then
if x >= MIN1B and x <= max1b then
puts(f, x - MIN1B)
else
p = 1 + and_bits(x, COMP_CACHE_SIZE-1)
if equal(comp_cache[p], x) then
-- a cache hit
puts(f, CACHE0 + p)
else
-- cache miss
comp_cache[p] = x
if x >= MIN2B and x <= MAX2B then
x -= MIN2B
puts(f, {I2B, and_bits(x, #FF), floor(x / #100)})
elsif x >= MIN3B and x <= MAX3B then
x -= MIN3B
puts(f, {I3B, and_bits(x, #FF), and_bits(floor(x / #100), #FF), floor(x / #10000)})
else
puts(f, I4B & int_to_bytes(x-MIN4B))
end if
end if
end if
elsif atom(x) then
-- floating point
x4 = atom_to_float32(x)
if x = float32_to_atom(x4) then
-- can represent as 4-byte float
puts(f, F4B & x4)
else
puts(f, F8B & atom_to_float64(x))
end if
else
-- sequence
if length(x) <= 255 then
s = {S1B, length(x)}
else
s = S4B & int_to_bytes(length(x))
end if
puts(f, s)
for i = 1 to length(x) do
fcompress(f, x[i])
end for
end if
end procedure
atom mem0, mem1, mem2, mem3
mem0 = allocate(4)
mem1 = mem0 + 1
mem2 = mem0 + 2
mem3 = mem0 + 3
export integer current_db
function get4()
-- read 4-byte value at current position in database file
poke(mem0, getc(current_db))
poke(mem1, getc(current_db))
poke(mem2, getc(current_db))
poke(mem3, getc(current_db))
return peek4u(mem0)
end function
export function fdecompress(integer c)
-- read a compressed Euphoria object from disk.
-- A compression cache is used.
-- if c is set, then c is not in byte range.
sequence s
integer len
integer ival
if c = 0 then
c = getc(current_db)
if c <= CACHE0 then
return c + MIN1B
elsif c <= CACHE0 + COMP_CACHE_SIZE then
-- a value from cache
return comp_cache[c-CACHE0]
end if
end if
if c = I2B then
ival = getc(current_db) +
#100 * getc(current_db) +
MIN2B
comp_cache[1 + and_bits(ival, COMP_CACHE_SIZE-1)] = ival
return ival
elsif c = I3B then
ival = getc(current_db) +
#100 * getc(current_db) +
#10000 * getc(current_db) +
MIN3B
comp_cache[1 + and_bits(ival, COMP_CACHE_SIZE-1)] = ival
return ival
elsif c = I4B then
ival = get4() + MIN4B
comp_cache[1 + and_bits(ival, COMP_CACHE_SIZE-1)] = ival
return ival
elsif c = F4B then
return float32_to_atom({getc(current_db), getc(current_db),
getc(current_db), getc(current_db)})
elsif c = F8B then
return float64_to_atom({getc(current_db), getc(current_db),
getc(current_db), getc(current_db),
getc(current_db), getc(current_db),
getc(current_db), getc(current_db)})
else
-- sequence
if c = S1B then
len = getc(current_db)
else
len = get4()
end if
s = repeat(0, len)
for i = 1 to len do
-- inline small integer case for greater speed on strings
c = getc(current_db)
if c < I2B then
if c <= CACHE0 then
s[i] = c + MIN1B
elsif c <= CACHE0 + COMP_CACHE_SIZE then
-- a value from cache
s[i] = comp_cache[c - CACHE0]
end if
else
s[i] = fdecompress(c)
end if
end for
return s
end if
end function