;---------------------------------------------------------------------
; This software module is adopted from the book entitled,
; "ARM System Developers Guide" published by Elsevier Inc in 2004
; enabling to run in IAR environment in STR91x
; and the module is presented "as is with no warranty".
;---------------------------------------------------------------------

            RSEG CODE:CODE:NOROOT(2)
            CODE32 		
            PUBLIC  fir_16by16_arm9e

;---------------------------------------------------------------------

#define y       R0    /* array for output samples y[] */
#define x       R1    /* array of input samples x[] (32-bit aligned) */
#define h       R2    /* array of coefficients h[] (32-bit aligned) */
#define N       R3    /* number of outputs (a multiple of 6) */
#define T       R4    /* number of coefficients (a multiple of 6) */
#define h_10    R0    /* coefficient pairs */
#define h_32    R3
#define x_10    R5    /* sample pairs */
#define x_32    R6
#define x_54    R7
#define y_0     R8    /* output accumulators */
#define y_1     R9
#define y_2     R10
#define y_3     R11
#define y_4     R12
#define y_5     R14

;---------------------------------------------------------------------

        ; void fir_16by16_arm9e
        ;  (int *y,
        ;   short *x,
        ;   struct { short *h; unsigned int T; } *h,
        ;   unsigned int N)

fir_16by16_arm9e
        STMDB   sp!, {r4-r11, lr}
        LDMIA   h, {h, T}
next_sample_arm9e
        STMDB   sp!, {y, N, T}
        LDMIA   x!, {x_10, x_32, x_54}  ; preload six samples
        MOV     y_0, #0                 ; zero accumulators
        MOV     y_1, #0
        MOV     y_2, #0
        MOV     y_3, #0
        MOV     y_4, #0
        MOV     y_5, #0
next_tap_arm9e
        ; perform next block of 6x6=36 taps
        LDMIA   h!, {h_10, h_32}        ; load four coefficients
        SUBS    T, T, #6
        SMLABB  y_0, x_10, h_10, y_0
        SMLATB  y_1, x_10, h_10, y_1
        SMLABB  y_2, x_32, h_10, y_2
        SMLATB  y_3, x_32, h_10, y_3
        SMLABB  y_4, x_54, h_10, y_4
        SMLATB  y_5, x_54, h_10, y_5
        SMLATT  y_0, x_10, h_10, y_0
        LDR     x_10, [x], #4           ; load two coefficients
        SMLABT  y_1, x_32, h_10, y_1
        SMLATT  y_2, x_32, h_10, y_2
        SMLABT  y_3, x_54, h_10, y_3
        SMLATT  y_4, x_54, h_10, y_4
        SMLABT  y_5, x_10, h_10, y_5
        LDR     h_10, [h], #4
        SMLABB  y_0, x_32, h_32, y_0
        SMLATB  y_1, x_32, h_32, y_1
        SMLABB  y_2, x_54, h_32, y_2
        SMLATB  y_3, x_54, h_32, y_3
        SMLABB  y_4, x_10, h_32, y_4
        SMLATB  y_5, x_10, h_32, y_5
        SMLATT  y_0, x_32, h_32, y_0
        LDR     x_32, [x], #4
        SMLABT  y_1, x_54, h_32, y_1
        SMLATT  y_2, x_54, h_32, y_2
        SMLABT  y_3, x_10, h_32, y_3
        SMLATT  y_4, x_10, h_32, y_4
        SMLABT  y_5, x_32, h_32, y_5
        SMLABB  y_0, x_54, h_10, y_0
        SMLATB  y_1, x_54, h_10, y_1
        SMLABB  y_2, x_10, h_10, y_2
        SMLATB  y_3, x_10, h_10, y_3
        SMLABB  y_4, x_32, h_10, y_4
        SMLATB  y_5, x_32, h_10, y_5
        SMLATT  y_0, x_54, h_10, y_0
        LDR     x_54, [x], #4
        SMLABT  y_1, x_10, h_10, y_1
        SMLATT  y_2, x_10, h_10, y_2
        SMLABT  y_3, x_32, h_10, y_3
        SMLATT  y_4, x_32, h_10, y_4
        SMLABT  y_5, x_54, h_10, y_5
        BGT     next_tap_arm9e
        LDMIA   sp!, {y, N, T}
        STMIA   y!, {y_0, y_1, y_2, y_3, y_4, y_5}
        SUB     h, h, T, LSL#1          ; restore coefficient pointer
        SUB     x, x, T, LSL#1          ; advance data pointer
        SUBS    N, N, #6
        BGT     next_sample_arm9e
        LDMIA   sp!, {r4-r11, pc}

        END
