IT-HE RPG Engine Code

Status: Beta
Brought to you by: jpmorris
[r70]: / engine / asm / dark_64.asm Maximize Restore History
270 lines (199 with data), 4.5 kB

;
;	AMD64-ABI Assembler versions of the darkness routines
;
;	This will need modification to run in Windows because
;	they rolled their own ABI
;

[BITS 64]
[SECTION .text]

; Declare public symbols

GLOBAL _darken_asm_32
GLOBAL darken_asm_32
GLOBAL _darken_asm_32s
GLOBAL darken_asm_32s
GLOBAL _darken_asm_16
GLOBAL darken_asm_16
GLOBAL _darken_asm_16s
GLOBAL darken_asm_16s
GLOBAL _darken_asm_blit16
GLOBAL darken_asm_blit16
GLOBAL _darken_asm_blit32
GLOBAL darken_asm_blit32


_darken_asm_32:
 darken_asm_32:

	; RDI = dest
	; RSI = src
	; RDX = len

	mov rcx,rdx		; We want it in CX, actually

darkloop32:
	xor rax,rax
	xor rdx,rdx

	mov al,[rsi]       ; get the source pixel
	mov edx,[rdi]      ; get the dest pixel

	; Replicate source level throughout EAX

	mov ah,al
	shl eax,8
	mov al,ah
	shl eax,8
	mov al,ah

	; Do the thing

	movd mm0,edx
	movd mm1,eax
	psubusb mm0,mm1
	movd eax,mm0
	mov [rdi],eax		; Write output

	; Next pixel

	add rdi,4
	inc rsi                         ; inc byte source
	loop darkloop32

	ret

;;
;; Single colour (not using a darkmap)
;;

_darken_asm_32s:
 darken_asm_32s:

	; RDI = dest
	; RSI = src (colour, not address!)
	; RDX = len

	mov rcx,rdx		; We want it in CX, actually

	; Do some pre-computation for the source colour level
	mov rax,rsi
	; Replicate source level throughout EAX
	mov ah,al
	shl eax,8
	mov al,ah
	shl eax,8
	mov al,ah
	and rax,0xffffff
	mov rsi,rax

darkloop32s:
	xor rdx,rdx
	mov rax,rsi
aa:
	mov edx,[rdi]      ; get the dest pixel
ab:
ac:
	; Do the thing

	movd mm0,edx
	movd mm1,eax
	psubusb mm0,mm1
	movd eax,mm0

	; We use colour separation in the roof projector, and 0 is transparent
	; So we need to make it non-zero unless it is supposed to be transparent
ad:

	test eax,0xffffffff ; Is it zero?
	jnz dark32noclip
	test edx,0xffffffff ; If it's meant to be 0, don't adjust it
	jz dark32noclip
	or eax,0x01000000
dark32noclip:

	mov [rdi],eax		; Write output

	; Next pixel

	add rdi,4
	loop darkloop32s

	ret

;;
;;	16bpp darkness code
;;

_darken_asm_16:
darken_asm_16:

	push rbx

	; RDI = dest
	; RSI = src
	; RDX = len
	; RCX = LUT address

	; We want RBX as the LUT and RCX as the count
	mov rbx,rcx
	mov rcx,rdx

	; Now RCX = len
	; And RBX = LUT address

	; Clear high bits
	xor rdx,rdx
	xor rax,rax

darkloop16:
	mov al,[rsi]       ; get the source pixel
	mov dx,[rdi]       ; get the dest pixel

	; Consult lookuptable to get correct lighting value
	; shl 13 instead of shl 16 converts light level to 5-bit, effective shr 3

	; ax = I32_clut[(ax*65536)+dx]
	and rax,0xf8					; ax = (ax)
	shl rax,13					; ax = (ax * 65536)
	add rax,rdx					; ax = (ax * 65536)+dx
	shl rax,1					; align to 16 bit array
	add rax,rbx					; I32_clut[(ax * 65536)+dx]
	mov ax,[rax]					; ax = I32_clut[(ax * 65536)+dx]

	mov [rdi],ax					; Write output

	inc rdi							; inc word output
	inc rdi
	inc rsi							; inc byte source
	loop darkloop16

	pop rbx
	ret

;   Single colour (not using a darkmap)

_darken_asm_16s:
darken_asm_16s:

	push rbx

	; RDI = dest
	; RSI = src colour, not address!
	; RDX = len
	; RCX = LUT address

	; We want RBX as the LUT and RCX as the count
	mov rbx,rcx
	mov rcx,rdx

	; Now RCX = len
	; And RBX = LUT address

	; Clear high bits
	xor rdx,rdx

	; Also trim the source colour, do some pre-computation
	and rsi,0xf8						; a = a
	shl rsi,13						; a = (a * 65536)
	; shl 13 instead of shl 16 converts light level to 5-bit, effective shr 3

darkloop16s:
	mov rax,rsi        ; get the source pixel
	mov dx,[rdi]       ; get the dest pixel

	; Consult lookuptable to get correct lighting value

	; ax = I32_clut[(ax*65536)+dx]
	add rax,rdx						; a = (a * 65536)+dx
	shl rax,1						; align to 16 bit array
	add rax,rbx						; I32_clut[(a * 65536)+d]
	mov dx,[rax]						; a = I32_clut[(a * 65536)+d]

	mov [rdi],dx					; Write output

	inc rdi							; inc word output
	inc rdi
	loop darkloop16s

	pop rbx
	ret

; Bitmap combining

; 16bpp

_darken_asm_blit16:
 darken_asm_blit16:

	; RDI = dest
	; RSI = src colour, not address!
	; RDX = len

	mov rcx,rdx	; we want the length in CX

blitloop16:
	lodsw
	test ax,0xffff
	jz blitskip16
	mov [rdi],ax
blitskip16:
	inc rdi
	inc rdi
	loop blitloop16

	ret

; 32bpp

_darken_asm_blit32:
 darken_asm_blit32:

	; RDI = dest
	; RSI = src colour, not address!
	; RDX = len

	mov rcx,rdx	; we want the length in CX

blitloop32:
	lodsd
	test eax,0xffffffff
	jz blitskip32
	mov [rdi],eax
blitskip32:
	add rdi,4
	loop blitloop32

	ret