38 #if defined ( __ICCARM__ ) 39 #pragma system_include 42 #ifndef __CORE_CMSIMD_H 43 #define __CORE_CMSIMD_H 61 #if defined ( __CC_ARM ) 63 #define __SADD8 __sadd8 64 #define __QADD8 __qadd8 65 #define __SHADD8 __shadd8 66 #define __UADD8 __uadd8 67 #define __UQADD8 __uqadd8 68 #define __UHADD8 __uhadd8 69 #define __SSUB8 __ssub8 70 #define __QSUB8 __qsub8 71 #define __SHSUB8 __shsub8 72 #define __USUB8 __usub8 73 #define __UQSUB8 __uqsub8 74 #define __UHSUB8 __uhsub8 75 #define __SADD16 __sadd16 76 #define __QADD16 __qadd16 77 #define __SHADD16 __shadd16 78 #define __UADD16 __uadd16 79 #define __UQADD16 __uqadd16 80 #define __UHADD16 __uhadd16 81 #define __SSUB16 __ssub16 82 #define __QSUB16 __qsub16 83 #define __SHSUB16 __shsub16 84 #define __USUB16 __usub16 85 #define __UQSUB16 __uqsub16 86 #define __UHSUB16 __uhsub16 89 #define __SHASX __shasx 91 #define __UQASX __uqasx 92 #define __UHASX __uhasx 95 #define __SHSAX __shsax 97 #define __UQSAX __uqsax 98 #define __UHSAX __uhsax 99 #define __USAD8 __usad8 100 #define __USADA8 __usada8 101 #define __SSAT16 __ssat16 102 #define __USAT16 __usat16 103 #define __UXTB16 __uxtb16 104 #define __UXTAB16 __uxtab16 105 #define __SXTB16 __sxtb16 106 #define __SXTAB16 __sxtab16 107 #define __SMUAD __smuad 108 #define __SMUADX __smuadx 109 #define __SMLAD __smlad 110 #define __SMLADX __smladx 111 #define __SMLALD __smlald 112 #define __SMLALDX __smlaldx 113 #define __SMUSD __smusd 114 #define __SMUSDX __smusdx 115 #define __SMLSD __smlsd 116 #define __SMLSDX __smlsdx 117 #define __SMLSLD __smlsld 118 #define __SMLSLDX __smlsldx 120 #define __QADD __qadd 121 #define __QSUB __qsub 123 #define __PKHBT(ARG1,ARG2,ARG3) ( ((((uint32_t)(ARG1)) ) & 0x0000FFFFUL) | \ 124 ((((uint32_t)(ARG2)) << (ARG3)) & 0xFFFF0000UL) ) 126 #define __PKHTB(ARG1,ARG2,ARG3) ( ((((uint32_t)(ARG1)) ) & 0xFFFF0000UL) | \ 127 ((((uint32_t)(ARG2)) >> (ARG3)) & 0x0000FFFFUL) ) 129 #define __SMMLA(ARG1,ARG2,ARG3) ( (int32_t)((((int64_t)(ARG1) * (ARG2)) + \ 130 ((int64_t)(ARG3) << 32) ) >> 32)) 133 #elif defined ( __GNUC__ ) 135 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SADD8(uint32_t op1, uint32_t op2)
139 __ASM
volatile (
"sadd8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
143 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QADD8(uint32_t op1, uint32_t op2)
147 __ASM
volatile (
"qadd8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
151 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SHADD8(uint32_t op1, uint32_t op2)
155 __ASM
volatile (
"shadd8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
159 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UADD8(uint32_t op1, uint32_t op2)
163 __ASM
volatile (
"uadd8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
167 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UQADD8(uint32_t op1, uint32_t op2)
171 __ASM
volatile (
"uqadd8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
175 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UHADD8(uint32_t op1, uint32_t op2)
179 __ASM
volatile (
"uhadd8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
184 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SSUB8(uint32_t op1, uint32_t op2)
188 __ASM
volatile (
"ssub8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
192 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QSUB8(uint32_t op1, uint32_t op2)
196 __ASM
volatile (
"qsub8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
200 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SHSUB8(uint32_t op1, uint32_t op2)
204 __ASM
volatile (
"shsub8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
208 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __USUB8(uint32_t op1, uint32_t op2)
212 __ASM
volatile (
"usub8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
216 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UQSUB8(uint32_t op1, uint32_t op2)
220 __ASM
volatile (
"uqsub8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
224 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UHSUB8(uint32_t op1, uint32_t op2)
228 __ASM
volatile (
"uhsub8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
233 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SADD16(uint32_t op1, uint32_t op2)
237 __ASM
volatile (
"sadd16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
241 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QADD16(uint32_t op1, uint32_t op2)
245 __ASM
volatile (
"qadd16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
249 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SHADD16(uint32_t op1, uint32_t op2)
253 __ASM
volatile (
"shadd16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
257 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UADD16(uint32_t op1, uint32_t op2)
261 __ASM
volatile (
"uadd16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
265 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UQADD16(uint32_t op1, uint32_t op2)
269 __ASM
volatile (
"uqadd16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
273 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UHADD16(uint32_t op1, uint32_t op2)
277 __ASM
volatile (
"uhadd16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
281 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SSUB16(uint32_t op1, uint32_t op2)
285 __ASM
volatile (
"ssub16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
289 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QSUB16(uint32_t op1, uint32_t op2)
293 __ASM
volatile (
"qsub16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
297 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SHSUB16(uint32_t op1, uint32_t op2)
301 __ASM
volatile (
"shsub16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
305 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __USUB16(uint32_t op1, uint32_t op2)
309 __ASM
volatile (
"usub16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
313 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UQSUB16(uint32_t op1, uint32_t op2)
317 __ASM
volatile (
"uqsub16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
321 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UHSUB16(uint32_t op1, uint32_t op2)
325 __ASM
volatile (
"uhsub16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
329 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SASX(uint32_t op1, uint32_t op2)
333 __ASM
volatile (
"sasx %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
337 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QASX(uint32_t op1, uint32_t op2)
341 __ASM
volatile (
"qasx %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
345 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SHASX(uint32_t op1, uint32_t op2)
349 __ASM
volatile (
"shasx %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
353 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UASX(uint32_t op1, uint32_t op2)
357 __ASM
volatile (
"uasx %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
361 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UQASX(uint32_t op1, uint32_t op2)
365 __ASM
volatile (
"uqasx %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
369 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UHASX(uint32_t op1, uint32_t op2)
373 __ASM
volatile (
"uhasx %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
377 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SSAX(uint32_t op1, uint32_t op2)
381 __ASM
volatile (
"ssax %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
385 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QSAX(uint32_t op1, uint32_t op2)
389 __ASM
volatile (
"qsax %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
393 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SHSAX(uint32_t op1, uint32_t op2)
397 __ASM
volatile (
"shsax %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
401 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __USAX(uint32_t op1, uint32_t op2)
405 __ASM
volatile (
"usax %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
409 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UQSAX(uint32_t op1, uint32_t op2)
413 __ASM
volatile (
"uqsax %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
417 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UHSAX(uint32_t op1, uint32_t op2)
421 __ASM
volatile (
"uhsax %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
425 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __USAD8(uint32_t op1, uint32_t op2)
429 __ASM
volatile (
"usad8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
433 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __USADA8(uint32_t op1, uint32_t op2, uint32_t op3)
437 __ASM
volatile (
"usada8 %0, %1, %2, %3" :
"=r" (result) :
"r" (op1),
"r" (op2),
"r" (op3) );
441 #define __SSAT16(ARG1,ARG2) \ 443 uint32_t __RES, __ARG1 = (ARG1); \ 444 __ASM ("ssat16 %0, %1, %2" : "=r" (__RES) : "I" (ARG2), "r" (__ARG1) ); \ 448 #define __USAT16(ARG1,ARG2) \ 450 uint32_t __RES, __ARG1 = (ARG1); \ 451 __ASM ("usat16 %0, %1, %2" : "=r" (__RES) : "I" (ARG2), "r" (__ARG1) ); \ 455 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UXTB16(uint32_t op1)
459 __ASM
volatile (
"uxtb16 %0, %1" :
"=r" (result) :
"r" (op1));
463 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UXTAB16(uint32_t op1, uint32_t op2)
467 __ASM
volatile (
"uxtab16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
471 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SXTB16(uint32_t op1)
475 __ASM
volatile (
"sxtb16 %0, %1" :
"=r" (result) :
"r" (op1));
479 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SXTAB16(uint32_t op1, uint32_t op2)
483 __ASM
volatile (
"sxtab16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
487 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMUAD (uint32_t op1, uint32_t op2)
491 __ASM
volatile (
"smuad %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
495 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMUADX (uint32_t op1, uint32_t op2)
499 __ASM
volatile (
"smuadx %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
503 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMLAD (uint32_t op1, uint32_t op2, uint32_t op3)
507 __ASM
volatile (
"smlad %0, %1, %2, %3" :
"=r" (result) :
"r" (op1),
"r" (op2),
"r" (op3) );
511 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMLADX (uint32_t op1, uint32_t op2, uint32_t op3)
515 __ASM
volatile (
"smladx %0, %1, %2, %3" :
"=r" (result) :
"r" (op1),
"r" (op2),
"r" (op3) );
519 __attribute__( ( always_inline ) ) __STATIC_INLINE uint64_t __SMLALD (uint32_t op1, uint32_t op2, uint64_t acc)
527 #ifndef __ARMEB__ // Little endian 528 __ASM
volatile (
"smlald %0, %1, %2, %3" :
"=r" (llr.w32[0]),
"=r" (llr.w32[1]):
"r" (op1),
"r" (op2) ,
"0" (llr.w32[0]),
"1" (llr.w32[1]) );
530 __ASM
volatile (
"smlald %0, %1, %2, %3" :
"=r" (llr.w32[1]),
"=r" (llr.w32[0]):
"r" (op1),
"r" (op2) ,
"0" (llr.w32[1]),
"1" (llr.w32[0]) );
536 __attribute__( ( always_inline ) ) __STATIC_INLINE uint64_t __SMLALDX (uint32_t op1, uint32_t op2, uint64_t acc)
544 #ifndef __ARMEB__ // Little endian 545 __ASM
volatile (
"smlaldx %0, %1, %2, %3" :
"=r" (llr.w32[0]),
"=r" (llr.w32[1]):
"r" (op1),
"r" (op2) ,
"0" (llr.w32[0]),
"1" (llr.w32[1]) );
547 __ASM
volatile (
"smlaldx %0, %1, %2, %3" :
"=r" (llr.w32[1]),
"=r" (llr.w32[0]):
"r" (op1),
"r" (op2) ,
"0" (llr.w32[1]),
"1" (llr.w32[0]) );
553 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMUSD (uint32_t op1, uint32_t op2)
557 __ASM
volatile (
"smusd %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
561 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMUSDX (uint32_t op1, uint32_t op2)
565 __ASM
volatile (
"smusdx %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
569 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMLSD (uint32_t op1, uint32_t op2, uint32_t op3)
573 __ASM
volatile (
"smlsd %0, %1, %2, %3" :
"=r" (result) :
"r" (op1),
"r" (op2),
"r" (op3) );
577 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMLSDX (uint32_t op1, uint32_t op2, uint32_t op3)
581 __ASM
volatile (
"smlsdx %0, %1, %2, %3" :
"=r" (result) :
"r" (op1),
"r" (op2),
"r" (op3) );
585 __attribute__( ( always_inline ) ) __STATIC_INLINE uint64_t __SMLSLD (uint32_t op1, uint32_t op2, uint64_t acc)
593 #ifndef __ARMEB__ // Little endian 594 __ASM
volatile (
"smlsld %0, %1, %2, %3" :
"=r" (llr.w32[0]),
"=r" (llr.w32[1]):
"r" (op1),
"r" (op2) ,
"0" (llr.w32[0]),
"1" (llr.w32[1]) );
596 __ASM
volatile (
"smlsld %0, %1, %2, %3" :
"=r" (llr.w32[1]),
"=r" (llr.w32[0]):
"r" (op1),
"r" (op2) ,
"0" (llr.w32[1]),
"1" (llr.w32[0]) );
602 __attribute__( ( always_inline ) ) __STATIC_INLINE uint64_t __SMLSLDX (uint32_t op1, uint32_t op2, uint64_t acc)
610 #ifndef __ARMEB__ // Little endian 611 __ASM
volatile (
"smlsldx %0, %1, %2, %3" :
"=r" (llr.w32[0]),
"=r" (llr.w32[1]):
"r" (op1),
"r" (op2) ,
"0" (llr.w32[0]),
"1" (llr.w32[1]) );
613 __ASM
volatile (
"smlsldx %0, %1, %2, %3" :
"=r" (llr.w32[1]),
"=r" (llr.w32[0]):
"r" (op1),
"r" (op2) ,
"0" (llr.w32[1]),
"1" (llr.w32[0]) );
619 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SEL (uint32_t op1, uint32_t op2)
623 __ASM
volatile (
"sel %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
627 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QADD(uint32_t op1, uint32_t op2)
631 __ASM
volatile (
"qadd %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
635 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QSUB(uint32_t op1, uint32_t op2)
639 __ASM
volatile (
"qsub %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
643 #define __PKHBT(ARG1,ARG2,ARG3) \ 645 uint32_t __RES, __ARG1 = (ARG1), __ARG2 = (ARG2); \ 646 __ASM ("pkhbt %0, %1, %2, lsl %3" : "=r" (__RES) : "r" (__ARG1), "r" (__ARG2), "I" (ARG3) ); \ 650 #define __PKHTB(ARG1,ARG2,ARG3) \ 652 uint32_t __RES, __ARG1 = (ARG1), __ARG2 = (ARG2); \ 654 __ASM ("pkhtb %0, %1, %2" : "=r" (__RES) : "r" (__ARG1), "r" (__ARG2) ); \ 656 __ASM ("pkhtb %0, %1, %2, asr %3" : "=r" (__RES) : "r" (__ARG1), "r" (__ARG2), "I" (ARG3) ); \ 660 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMMLA (int32_t op1, int32_t op2, int32_t op3)
664 __ASM
volatile (
"smmla %0, %1, %2, %3" :
"=r" (result):
"r" (op1),
"r" (op2),
"r" (op3) );
669 #elif defined ( __ICCARM__ ) 671 #include <cmsis_iar.h> 674 #elif defined ( __TMS470__ ) 676 #include <cmsis_ccs.h> 679 #elif defined ( __TASKING__ ) 684 #elif defined ( __CSMC__ ) 686 #include <cmsis_csm.h> struct __attribute__((packed))