"ldr r10, [r9], #4 \n\t" /* load two samples */
#ifdef HAVE_AS3514
/* The AS3514 reads 3 bytes at a time, it seems, ignoring the lowest.
This code seems to work well, but we may have to mask off the extra
bits - at the expense of a few extra cycles in the FIQ */
"mov r10, r10, ror #2\n\t" /* put left sample at the top bits */
"str r10, [r12, #0x40]\n\t" /* write top sample, lower sample ignored */
"mov r10, r10, ror #16\n\t" /* put left sample at the top bits */
"str r10, [r12, #0x40]\n\t" /* then write it */
#else
"mov r10, r10, ror #16\n\t" /* put left sample at the top bits */
"str r10, [r12, #0x40]\n\t" /* write top sample, lower sample ignored */
"mov r10, r10, lsl #16\n\t" /* shift lower sample up */
"str r10, [r12, #0x40]\n\t" /* then write it */
#endif
"subs r8, r8, #4 \n\t" /* check if we have more samples */