
#ifndef _ARM_ACLE_H_
#define _ARM_ACLE_H_

#ifndef _SYSTEM_BUILD
  #pragma system_include
#endif

#ifndef __ARM_ACLE
  #error "ACLE intrinsics not supported"
#endif

#ifndef __ICCARM__
  #error "This version of arm_acle.h can only be used together with iccarm."
#endif  

#ifdef __aarch64__
#include <intrinsics.h>
#else
/* AARCH32: some ACLE intrinsics are incompatible with intrinsics.h */
#include <iccarm_builtin.h>
#include <iar_intrinsics_common.h>
#endif

#ifndef __ICCARM__
#error "This file can only be used with iccarm."
#endif

#pragma system_include

#ifdef __cplusplus
extern "C" {
#endif

#pragma language=save
#pragma language=extended


// 6. Feature test macros
//
// Those supported are defined by the compiler
//

// 6. Attributes and pragmas
// __attribute__((pcs("aapcs")))
// __attribute__((pcs("aapcs-vfp")))
// __attribute__((target("arm")))
// __attribute__((target("thumb")))
// __attribute__((weak))
// __attribute__((aligned(N)))
//

//
// alias, common, nocommon, noinline, packed, section, 
// visibility, weak
//

// 7. Synchronization, barrier, and hint intrinsics

// 7.3. Memory barriers

#define __dmb   __iar_builtin_DMBx
#define __dsb   __iar_builtin_DSBx
#define __isb   __iar_builtin_ISBx

// 7.4 Hints

#define __wfi     __iar_builtin_WFI
#define __wfe     __iar_builtin_WFE
#define __sev     __iar_builtin_SEV
#define __sevl    __iar_builtin_SEVL
#define __yield   __iar_builtin_YIELD
#if __ARM_32BIT_STATE
  #define __dbg(X)  __iar_builtin_DBG(x)
#endif  

// 7.5 Swap
// __swp depricated

// 7.6 Memory prefetch intrinsics
// 7.6.1 Data prefetch

#define __pld  __iar_builtin_PLD
#define __pldx __iar_builtin_PLDx


// 7.6.2 Instruction prefetch

#define __pli  __iar_builtin_PLI
#define __plix __iar_builtin_PLIx


// 7.7 NOP

#define __nop     __iar_builtin_no_operation

// 8 DATA-PROCESSING INTRINSICS
// 8.1 Programmer's model of global state
// 8.1.1 The Q (saturation) flag

#if __ARM_FEATURE_QBIT

  #define __saturation_occurred     __iar_builtin_acle_QFlag
  #define __set_saturation_occurred __iar_builtin_set_QFlag
  #define __ignore_saturation       __iar_builtin_ignore_QFlag

#endif

// 8.2 Miscellaneous data-processing intrinsics

// ror()

#pragma inline=forced 
unsigned int __ror(unsigned int x, unsigned int y)
{  
  y &= 0x1F;
  if (y==0)
    return x;
  return (x >> y) | (x << (32-y));
}

#pragma inline=forced
 __UINT64_T_TYPE__ __rorll(__UINT64_T_TYPE__ x, unsigned int y)
{
  y &= 0x3F;
  if (y == 0)
    return x;
  return (x >> y) | (x << (64-y));
}

#pragma inline=forced
unsigned long __rorl(unsigned long x, unsigned int y)
{  
#ifdef __lp64__
  return __rorll(x, y);
#else
  return __ror(x, y);
#endif  
}


// __clz()

#define __clz __iar_builtin_CLZ

#pragma inline=forced
unsigned int __clzll(__UINT64_T_TYPE__ x)
{
  unsigned int res = 0;
  __UINT32_T_TYPE__ t = x >> 32;
  if (t == 0)
  {
    t = x & 0xFFFFFFFF;
    res += 32;
  }
  return res + __clz(t);  
}

#pragma inline=forced
unsigned long __clzl(unsigned long x)
{  
#ifdef __lp64__
  return __clzll(x);
#else 
  return __clz(x);
#endif  
}



// __cls()

#pragma inline=forced
unsigned int __cls(__UINT32_T_TYPE__ x) { return __clz(~x);}

#pragma inline=forced
unsigned long __clsl(unsigned long x) { return __clzl(~x);}

#pragma inline=forced
unsigned int __clsll(__UINT64_T_TYPE__ x) { return __clzll(~x);}

// __rev()

#define __rev __iar_builtin_REV

#pragma inline=forced
__UINT64_T_TYPE__ __revll(__UINT64_T_TYPE__ x)
{
  __UINT32_T_TYPE__ t = __iar_builtin_REV(x & 0xFFFFFFFF);
  x = __iar_builtin_REV(x >> 32);
  x |= (__UINT64_T_TYPE__)t << 32;
  return x;
}

#pragma inline=forced
unsigned long __revl(unsigned long x)
{
#ifdef __lp64__
  return __revll(x);
#else  
  return __rev(x);
#endif  
}


// __rev16()

#define __rev16 __iar_builtin_REV16

#pragma inline=forced
__UINT64_T_TYPE__ __rev16ll(__UINT64_T_TYPE__ x)
{
  __UINT32_T_TYPE__ t = __iar_builtin_REV16(x >> 32);
  x = __iar_builtin_REV16(x & 0xFFFFFFFF);
  x |= (__UINT64_T_TYPE__)t << 32;
  return x;
}

#pragma inline=forced
unsigned long __rev16l(unsigned long x)
{
#ifdef __lp64__
  return __rev16ll(x);
#else
  return __rev16(x);
#endif  
}


// __revsh()
#define __revsh(X) ((__INT16_T_TYPE__)__iar_builtin_REVSH(X))

// __rbit()

#define __rbit __iar_builtin_RBIT

#pragma inline=forced
__UINT64_T_TYPE__ __rbitll(__UINT64_T_TYPE__ x)
{
  return (__UINT64_T_TYPE__)__rbit(x & 0xFFFFFFFF) << 32 | 
                   __rbit(x >> 32);
}

#pragma inline=forced
unsigned long __rbitl(unsigned long x)
{
#ifdef __lp64__
  return __rbitll(x);
#else
  return __rbit(x);
#endif    
}


// 8.3 16-bit multiplications

#ifdef __ARM_FEATURE_DSP

  #define __smulbb  __iar_builtin_SMULBB
  #define __smulbt  __iar_builtin_SMULBT
  #define __smultb  __iar_builtin_SMULTB
  #define __smultt  __iar_builtin_SMULTT
  #define __smulwb  __iar_builtin_SMULWB
  #define __smulwt  __iar_builtin_SMULWT

#endif /* __ARM_FEATURE_DSP */

// 8.4 Saturating intrinsics
// 8.4.1 Width-specified saturation intrinsics

#ifdef __ARM_FEATURE_SAT

  #define __ssat  __iar_builtin_SSAT
  #define __usat  __iar_builtin_USAT

#endif /* __ARM_FEATURE_SAT */

// 8.4.2 Saturating addition and subtraction intrinsics

#ifdef __ARM_FEATURE_DSP

  #define __qadd  __iar_builtin_QADD
  #define __qsub  __iar_builtin_QSUB
  #define __qdbl  __iar_builtin_QDOUBLE

#endif /* __ARM_FEATURE_DSP */


// 8.4.3 Accumulating multiplications

#ifdef __ARM_FEATURE_DSP

  #define __smlabb  __iar_builtin_SMLABB
  #define __smlabt  __iar_builtin_SMLABT
  #define __smlatb  __iar_builtin_SMLATB
  #define __smlatt  __iar_builtin_SMLATT

  #define __smlawb  __iar_builtin_SMLAWB
  #define __smlawt  __iar_builtin_SMLAWT

#endif /* __ARM_FEATURE_DSP */

// 8.5 32-bit SIMD intrinsics

#ifdef __ARM_FEATURE_SIMD32

  typedef __INT32_T_TYPE__  int16x2_t;
  typedef __UINT32_T_TYPE__ uint16x2_t;
  typedef __INT32_T_TYPE__  int8x4_t;
  typedef __UINT32_T_TYPE__ uint8x4_t;

// 8.5.4  Parallel 16-bit saturation

  #define __ssat16 __iar_builtin_SSAT16
  #define __usat16 __iar_builtin_USAT16

// 8.5.5  Packing and unpacking

  #define __sxtab16  __iar_builtin_SXTAB16
  #define __sxtb16   __iar_builtin_SXTB16
  #define __uxtab16  __iar_builtin_UXTAB16
  #define __uxtb16   __iar_builtin_UXTB16

// 8.5.6  Parallel selection

  #define __sel __iar_builtin_SEL

// 8.5.7  Parallel 8-bit addition and subtraction

  #define __qadd8   __iar_builtin_QADD8
  #define __qsub8   __iar_builtin_QSUB8
  #define __sadd8   __iar_builtin_SADD8
  #define __shadd8  __iar_builtin_SHADD8
  #define __shsub8  __iar_builtin_SHSUB8
  #define __ssub8   __iar_builtin_SSUB8
  #define __uadd8   __iar_builtin_UADD8
  #define __uhadd8  __iar_builtin_UHADD8
  #define __uhsub8  __iar_builtin_UHSUB8
  #define __uqadd8  __iar_builtin_UQADD8
  #define __uqsub8  __iar_builtin_UQSUB8
  #define __usub8   __iar_builtin_USUB8

// 8.5.8  Sum of 8-bit absolute differences

  #define __usad8   __iar_builtin_USAD8
  #define __usada8  __iar_builtin_USADA8

// 8.5.9  Parallel 16-bit addition and subtraction

  #define __qadd16    __iar_builtin_QADD16
  #define __qasx      __iar_builtin_QADDSUBX
  #define __qsax      __iar_builtin_QSUBADDX
  #define __qsub16    __iar_builtin_QSUB16
  #define __sadd16    __iar_builtin_SADD16
  #define __sasx      __iar_builtin_SADDSUBX
  #define __shadd16   __iar_builtin_SHADD16
  #define __shasx     __iar_builtin_SHADDSUBX
  #define __shsax     __iar_builtin_SHSUBADDX
  #define __shsub16   __iar_builtin_SHSUB16
  #define __ssax      __iar_builtin_SSUBADDX
  #define __ssub16    __iar_builtin_SSUB16
  #define __uadd16    __iar_builtin_UADD16
  #define __uasx      __iar_builtin_UADDSUBX
  #define __uhadd16   __iar_builtin_UHADD16
  #define __uhasx     __iar_builtin_UHADDSUBX
  #define __uhsax     __iar_builtin_UHSUBADDX
  #define __uhsub16   __iar_builtin_UHSUB16
  #define __uqadd16   __iar_builtin_UQADD16
  #define __uqasx     __iar_builtin_UQADDSUBX
  #define __uqsax     __iar_builtin_UQSUBADDX
  #define __uqsub16   __iar_builtin_UQSUB16
  #define __usax      __iar_builtin_USUBADDX
  #define __usub16    __iar_builtin_USUB16

// 8.5.10  Parallel 16-bit multiplication

  #define __smlad     __iar_builtin_SMLAD
  #define __smladx    __iar_builtin_SMLADX
  #define __smlald    __iar_builtin_SMLALD
  #define __smlaldx   __iar_builtin_SMLALDX
  #define __smlsd     __iar_builtin_SMLSD
  #define __smlsdx    __iar_builtin_SMLSDX
  #define __smlsld    __iar_builtin_SMLSLD
  #define __smlsldx   __iar_builtin_SMLSLDX
  #define __smuad     __iar_builtin_SMUAD
  #define __smuadx    __iar_builtin_SMUADX
  #define __smusd     __iar_builtin_SMUSD
  #define __smusdx    __iar_builtin_SMUSDX

#endif /* __ARM_FEATURE_SIMD32 */

// 8.6  Floating-point data-processing intrinsics
 
   /* declared in <iar_intrinsics_common.h> */

// 8.7  CRC32 intrinsics

   /* declared in <iar_intrinsics_common.h> */

// 9. System register access
// 9.1. Special register intrinsics

   /* declared in <iar_intrinsics_common.h> */

// 9.3. Coprocessor Intrinsics
// 9.3.2. AArch32 Data-processing coprocessor intrinsics

   /* declared in <iar_intrinsics_common.h> */

// 9.3.2.1. AArch32 Memory coprocessor transfer intrinsics

   /* declared in <iar_intrinsics_common.h> */

#pragma language=restore

#ifdef __cplusplus
}
#endif

#endif /* _ARM_ACLE_H_ */
