[go: up one dir, main page]

Menu

[r52]: / engine / asm / dark_64.asm  Maximize  Restore  History

Download this file

270 lines (199 with data), 4.5 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
;
; AMD64-ABI Assembler versions of the darkness routines
;
; This will need modification to run in Windows because
; they rolled their own ABI
;
[BITS 64]
[SECTION .text]
; Declare public symbols
GLOBAL _darken_asm_32
GLOBAL darken_asm_32
GLOBAL _darken_asm_32s
GLOBAL darken_asm_32s
GLOBAL _darken_asm_16
GLOBAL darken_asm_16
GLOBAL _darken_asm_16s
GLOBAL darken_asm_16s
GLOBAL _darken_asm_blit16
GLOBAL darken_asm_blit16
GLOBAL _darken_asm_blit32
GLOBAL darken_asm_blit32
_darken_asm_32:
darken_asm_32:
; RDI = dest
; RSI = src
; RDX = len
mov rcx,rdx ; We want it in CX, actually
darkloop32:
xor rax,rax
xor rdx,rdx
mov al,[rsi] ; get the source pixel
mov edx,[rdi] ; get the dest pixel
; Replicate source level throughout EAX
mov ah,al
shl eax,8
mov al,ah
shl eax,8
mov al,ah
; Do the thing
movd mm0,edx
movd mm1,eax
psubusb mm0,mm1
movd eax,mm0
mov [rdi],eax ; Write output
; Next pixel
add rdi,4
inc rsi ; inc byte source
loop darkloop32
ret
;;
;; Single colour (not using a darkmap)
;;
_darken_asm_32s:
darken_asm_32s:
; RDI = dest
; RSI = src (colour, not address!)
; RDX = len
mov rcx,rdx ; We want it in CX, actually
; Do some pre-computation for the source colour level
mov rax,rsi
; Replicate source level throughout EAX
mov ah,al
shl eax,8
mov al,ah
shl eax,8
mov al,ah
and rax,0xffffff
mov rsi,rax
darkloop32s:
xor rdx,rdx
mov rax,rsi
aa:
mov edx,[rdi] ; get the dest pixel
ab:
ac:
; Do the thing
movd mm0,edx
movd mm1,eax
psubusb mm0,mm1
movd eax,mm0
; We use colour separation in the roof projector, and 0 is transparent
; So we need to make it non-zero unless it is supposed to be transparent
ad:
test eax,0xffffffff ; Is it zero?
jnz dark32noclip
test edx,0xffffffff ; If it's meant to be 0, don't adjust it
jz dark32noclip
or eax,0x01000000
dark32noclip:
mov [rdi],eax ; Write output
; Next pixel
add rdi,4
loop darkloop32s
ret
;;
;; 16bpp darkness code
;;
_darken_asm_16:
darken_asm_16:
push rbx
; RDI = dest
; RSI = src
; RDX = len
; RCX = LUT address
; We want RBX as the LUT and RCX as the count
mov rbx,rcx
mov rcx,rdx
; Now RCX = len
; And RBX = LUT address
; Clear high bits
xor rdx,rdx
xor rax,rax
darkloop16:
mov al,[rsi] ; get the source pixel
mov dx,[rdi] ; get the dest pixel
; Consult lookuptable to get correct lighting value
; shl 13 instead of shl 16 converts light level to 5-bit, effective shr 3
; ax = I32_clut[(ax*65536)+dx]
and rax,0xf8 ; ax = (ax)
shl rax,13 ; ax = (ax * 65536)
add rax,rdx ; ax = (ax * 65536)+dx
shl rax,1 ; align to 16 bit array
add rax,rbx ; I32_clut[(ax * 65536)+dx]
mov ax,[rax] ; ax = I32_clut[(ax * 65536)+dx]
mov [rdi],ax ; Write output
inc rdi ; inc word output
inc rdi
inc rsi ; inc byte source
loop darkloop16
pop rbx
ret
; Single colour (not using a darkmap)
_darken_asm_16s:
darken_asm_16s:
push rbx
; RDI = dest
; RSI = src colour, not address!
; RDX = len
; RCX = LUT address
; We want RBX as the LUT and RCX as the count
mov rbx,rcx
mov rcx,rdx
; Now RCX = len
; And RBX = LUT address
; Clear high bits
xor rdx,rdx
; Also trim the source colour, do some pre-computation
and rsi,0xf8 ; a = a
shl rsi,13 ; a = (a * 65536)
; shl 13 instead of shl 16 converts light level to 5-bit, effective shr 3
darkloop16s:
mov rax,rsi ; get the source pixel
mov dx,[rdi] ; get the dest pixel
; Consult lookuptable to get correct lighting value
; ax = I32_clut[(ax*65536)+dx]
add rax,rdx ; a = (a * 65536)+dx
shl rax,1 ; align to 16 bit array
add rax,rbx ; I32_clut[(a * 65536)+d]
mov dx,[rax] ; a = I32_clut[(a * 65536)+d]
mov [rdi],dx ; Write output
inc rdi ; inc word output
inc rdi
loop darkloop16s
pop rbx
ret
; Bitmap combining
; 16bpp
_darken_asm_blit16:
darken_asm_blit16:
; RDI = dest
; RSI = src colour, not address!
; RDX = len
mov rcx,rdx ; we want the length in CX
blitloop16:
lodsw
test ax,0xffff
jz blitskip16
mov [rdi],ax
blitskip16:
inc rdi
inc rdi
loop blitloop16
ret
; 32bpp
_darken_asm_blit32:
darken_asm_blit32:
; RDI = dest
; RSI = src colour, not address!
; RDX = len
mov rcx,rdx ; we want the length in CX
blitloop32:
lodsd
test eax,0xffffffff
jz blitskip32
mov [rdi],eax
blitskip32:
add rdi,4
loop blitloop32
ret