#define ASM
#include "loud.h"

#macro MAKEMOD mod,buflen
	(0x2000|(((mod)&127)<<6)|(((buflen)-1)&63))
#endm


#define MR_INT 0x200
#define MR_SAT 0x400
#define MR_NONE 0

	.sect data_x,my_x
	.org 0x1380
	.export _stereoFilt
_stereoFilt:
	.zero 22

	.sect data_y,constants
	.org 0x780
	.export _consts
_consts:
	.bss 5

	.import _filt

/*
	Filter()

	Inputs:
	  i0: s_int16 *in (16-bit)
	  i1: s_int16 step
	  a0: s_int16 n

	Outputs:
	  -
*/
	.sect code,Filter
	.export _Filter
_Filter:
	//ldx (I6)+1,NULL
	stx MR0,(I6)+1 ; sty I7,(I6)
	stx LR0,(I6)+1 ; sty I5,(I6)	// return address
	stx I0,(I6)+1 ; sty I1,(I6)
	stx I2,(I6)+1 ; sty I3,(I6)
	stx I4,(I6)+1 ; sty lc,(I6)
	stx A0,(I6)+1 ; sty A1,(I6)
	stx B0,(I6)+1 ; sty B1,(I6)
	stx C0,(I6)+1 ; sty C1,(I6)
	stx D0,(I6)+1 ; sty D1,(I6)
	stx LE,(I6) ; sty LS,(I6)

	mv i2,i7

	//ldc MR_INT,mr0
	ldc MR_NONE,mr0

	ldx (i7)+3,null	// fir -> findex
	ldx (i7)+2,i2	// findex -> lp[0]+1

	ldc _consts,i4
	ldc MAKEMOD(1,5),i5

	add a0,ones,a0	; ldx (I7)-1,C1
	loop a0,LoopEnd-1
	ldc MAKEMOD(1,3),i3

	and d,null,d	; ldx (i0),c0
	asr c0,c0	// Added 1.35 to make work with higher volume source
	and B,NULL,B	; stx C0,(I2)*	; ldy (i4)*,a0

	mulss C0,A0	; ldx (I2)*,A0	; ldy (i4)*,a1
	mac A1,A0,B	; ldx (I2)*,A1	; ldy (i4)*,a0
	mac A1,A0,B	; ldx (i2)*,A1 /*dummy*/ ; ldy (i4)*,c0
	macsu C0,C1,B	; ldx (i7)+2,C1

	// It might be possible to only use the lower 16 bits of lp[0],
	// which would save ~4 cycles
//     lp[0] = (f + 8064*lp[0]) >> 13;

	macsu C0,C1,D	; ldx (I7)+1,C1
	add D0,P,D	; ldx (I7)-3,C2	// Could d0 have too large value?
	add D,B,A	; ldy (i4)*,c0
	//lsl a,a
	lsl a,a
	lsl a,b
	stx b1,(I7)+1	; asr c,c
	stx b2,(I7)	; add b,C,A

//     lp[1] = lp[0] + lp[1]/2;

	add A,null,b		; ldx (I7)+1,C1
	stx A1,(I7)+1
	jec L15
	stx A2,(I7)-3

	jnc L15
	ldc 32767,b1
	ldc -32768,b1
L15:
	stx b1,(i0)*

LoopEnd:
	ldx (i7)-1,null
	stx i2,(i7)	// findex
Restore:
	ldx (I6)-1,LE ; ldy (I6),LS
	ldx (I6)-1,D0 ; ldy (I6),D1
	ldx (I6)-1,C0 ; ldy (I6),C1
	ldx (I6)-1,B0 ; ldy (I6),B1
	ldx (I6)-1,A0 ; ldy (I6),A1
	ldx (I6)-1,I4 ; ldy (I6),lc
	ldx (I6)-1,I2 ; ldy (I6),I3
	ldx (I6)-1,I0 ; ldy (I6),I1
	ldx (I6)-1,LR0 ; ldy (I6),I5
	jr
	ldx (I6)-1,MR0 ; ldy (I6),I7



	.sect code,Filter2
	.export _Filter2
_Filter2:
	//ldx (I6)+1,NULL
	stx MR0,(I6)+1 ; sty I7,(I6)
	stx LR0,(I6)+1 ; sty I5,(I6)	// return address
	stx I0,(I6)+1 ; sty I1,(I6)
	stx I2,(I6)+1 ; sty I3,(I6)
	stx I4,(I6)+1 ; sty lc,(I6)
	stx A0,(I6)+1 ; sty A1,(I6)
	stx B0,(I6)+1 ; sty B1,(I6)
	stx C0,(I6)+1 ; sty C1,(I6)
	stx D0,(I6)+1 ; sty D1,(I6)
	stx LE,(I6)+1 ; sty LS,(I6)

	and B,NULL,B	; mv i2,i7
	ldy (i7)+4,null // fir -> lp[0]

	ldc _consts,i4
	ldc MAKEMOD(1,4),i5

	add a0,ones,a0	; stx b0,(i6)	; sty b0,(i6)
	ldx (i7)+1,C1
	loop a0,LoopEnd2-1
	ldx (i7)-1,C2	; ldy (i6),mr0	// MR0 = 0

	add c1,null,c0	; ldx (i0),D0	; ldy (i4)*,a0

	mulss D0,A0	; ldx (I2),A1	; ldy (i4)*,a0
	mac A0,A1,B	; stx D0,(I2)	; ldy (i4)*,a0
	add b,p,b	; mv c2,c1
	mv b2,b1 ; add b1,null,b0

	mulss A0,B1	; ldx (i6),d1 ; ldy (i6),d0
	macsu A0,B0,D
	add D0,P,D	; ldy (i4)*,A0
	add C,B,B	; ldx (i6),c1 ; ldy (i6),c0

	mulss A0,B1	; mv B0,a1
	macsu a0,B0,c	; mv B1,a2
	add c0,p,c
	sub d,c,c
	add a,null,a
	nop
	jec L152
	lsl c,c	; ldx (i6),b1 ; ldy (i6),b0

	add a,null,a
	nop
	jnc L152
	ldc 32767,a1
	ldc -32768,a1
L152:
	lsl c,c ; stx a1,(i0)*
LoopEnd2:
	stx c1,(I7)+1
	j Restore
	stx c2,(I7)-1	; ldy (i6)-1,null
	
#if 0
	ldx (I6)-1,LE ; ldy (I6),LS
	ldx (I6)-1,D0 ; ldy (I6),D1
	ldx (I6)-1,C0 ; ldy (I6),C1
	ldx (I6)-1,B0 ; ldy (I6),B1
	ldx (I6)-1,A0 ; ldy (I6),A1
	ldx (I6)-1,I4 ; ldy (I6),lc
	ldx (I6)-1,I2 ; ldy (I6),I3
	ldx (I6)-1,I0 ; ldy (I6),I1
	ldx (I6)-1,LR0 ; ldy (I6),I5
	jr
	ldx (I6)-1,MR0 ; ldy (I6),I7
#endif


.end
	/********* EVERYTHING FROM NOW ON IS PURELY DECORATIONAL!!! **********/

/*
	Generic()

	Inputs:
	  i0: data (16-bit)
	  a0: chan

	Outputs:
	  -
*/
	.sect code,Generic
	.export _Generic
_Generic:
	stx MR0,(I6)+1 ; sty I7,(I6)
	stx LR0,(I6)+1 ; sty I5,(I6)	// return address
	stx I0,(I6)+1 ; sty I1,(I6)
	stx I2,(I6)+1 ; sty I3,(I6)
	stx I4,(I6)+1 ; sty I5,(I6)
	stx A0,(I6)+1 ; sty A1,(I6)
	stx B0,(I6)+1 ; sty B1,(I6)
	stx C0,(I6)+1 ; sty C1,(I6)
	stx D0,(I6)+1 ; sty D1,(I6)
	stx LE,(I6)+1 ; sty LS,(I6)
	stx LC,(I6)

	ldc MR_INT,mr0

	ldx (I6)-1,LC
	ldx (I6)-1,LE ; ldy (I6),LS
	ldx (I6)-1,D0 ; ldy (I6),D1
	ldx (I6)-1,C0 ; ldy (I6),C1
	ldx (I6)-1,B0 ; ldy (I6),B1
	ldx (I6)-1,A0 ; ldy (I6),A1
	ldx (I6)-1,I4 ; ldy (I6),I5
	ldx (I6)-1,I2 ; ldy (I6),I3
	ldx (I6)-1,I0 ; ldy (I6),I1
	ldx (I6)-1,LR0 ; ldy (I6),I5
	jr
	ldx (I6)-1,MR0 ; ldy (I6),I7
