230 #define DRFLAC_STRINGIFY(x) #x
231 #define DRFLAC_XSTRINGIFY(x) DRFLAC_STRINGIFY(x)
233 #define DRFLAC_VERSION_MAJOR 0
234 #define DRFLAC_VERSION_MINOR 12
235 #define DRFLAC_VERSION_REVISION 31
236 #define DRFLAC_VERSION_STRING DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MAJOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MINOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_REVISION)
247 #if defined(_MSC_VER)
251 #if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)))
252 #pragma GCC diagnostic push
253 #pragma GCC diagnostic ignored "-Wlong-long"
254 #if defined(__clang__)
255 #pragma GCC diagnostic ignored "-Wc++11-long-long"
260 #if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)))
261 #pragma GCC diagnostic pop
264 #if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__)) || defined(_M_X64) || defined(__ia64) || defined (_M_IA64) || defined(__aarch64__) || defined(_M_ARM64) || defined(__powerpc64__)
271 #define DRFLAC_TRUE 1
272 #define DRFLAC_FALSE 0
274 #if !defined(DRFLAC_API)
275 #if defined(DRFLAC_DLL)
277 #define DRFLAC_DLL_IMPORT __declspec(dllimport)
278 #define DRFLAC_DLL_EXPORT __declspec(dllexport)
279 #define DRFLAC_DLL_PRIVATE static
281 #if defined(__GNUC__) && __GNUC__ >= 4
282 #define DRFLAC_DLL_IMPORT __attribute__((visibility("default")))
283 #define DRFLAC_DLL_EXPORT __attribute__((visibility("default")))
284 #define DRFLAC_DLL_PRIVATE __attribute__((visibility("hidden")))
286 #define DRFLAC_DLL_IMPORT
287 #define DRFLAC_DLL_EXPORT
288 #define DRFLAC_DLL_PRIVATE static
292 #if defined(DR_FLAC_IMPLEMENTATION) || defined(DRFLAC_IMPLEMENTATION)
293 #define DRFLAC_API DRFLAC_DLL_EXPORT
295 #define DRFLAC_API DRFLAC_DLL_IMPORT
297 #define DRFLAC_PRIVATE DRFLAC_DLL_PRIVATE
299 #define DRFLAC_API extern
300 #define DRFLAC_PRIVATE static
304 #if defined(_MSC_VER) && _MSC_VER >= 1700
305 #define DRFLAC_DEPRECATED __declspec(deprecated)
306 #elif (defined(__GNUC__) && __GNUC__ >= 4)
307 #define DRFLAC_DEPRECATED __attribute__((deprecated))
308 #elif defined(__has_feature)
309 #if __has_feature(attribute_deprecated)
310 #define DRFLAC_DEPRECATED __attribute__((deprecated))
312 #define DRFLAC_DEPRECATED
315 #define DRFLAC_DEPRECATED
325 #ifndef DR_FLAC_BUFFER_SIZE
326 #define DR_FLAC_BUFFER_SIZE 4096
330 #if defined(_WIN64) || defined(_LP64) || defined(__LP64__)
341 #define DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO 0
342 #define DRFLAC_METADATA_BLOCK_TYPE_PADDING 1
343 #define DRFLAC_METADATA_BLOCK_TYPE_APPLICATION 2
344 #define DRFLAC_METADATA_BLOCK_TYPE_SEEKTABLE 3
345 #define DRFLAC_METADATA_BLOCK_TYPE_VORBIS_COMMENT 4
346 #define DRFLAC_METADATA_BLOCK_TYPE_CUESHEET 5
347 #define DRFLAC_METADATA_BLOCK_TYPE_PICTURE 6
348 #define DRFLAC_METADATA_BLOCK_TYPE_INVALID 127
351 #define DRFLAC_PICTURE_TYPE_OTHER 0
352 #define DRFLAC_PICTURE_TYPE_FILE_ICON 1
353 #define DRFLAC_PICTURE_TYPE_OTHER_FILE_ICON 2
354 #define DRFLAC_PICTURE_TYPE_COVER_FRONT 3
355 #define DRFLAC_PICTURE_TYPE_COVER_BACK 4
356 #define DRFLAC_PICTURE_TYPE_LEAFLET_PAGE 5
357 #define DRFLAC_PICTURE_TYPE_MEDIA 6
358 #define DRFLAC_PICTURE_TYPE_LEAD_ARTIST 7
359 #define DRFLAC_PICTURE_TYPE_ARTIST 8
360 #define DRFLAC_PICTURE_TYPE_CONDUCTOR 9
361 #define DRFLAC_PICTURE_TYPE_BAND 10
362 #define DRFLAC_PICTURE_TYPE_COMPOSER 11
363 #define DRFLAC_PICTURE_TYPE_LYRICIST 12
364 #define DRFLAC_PICTURE_TYPE_RECORDING_LOCATION 13
365 #define DRFLAC_PICTURE_TYPE_DURING_RECORDING 14
366 #define DRFLAC_PICTURE_TYPE_DURING_PERFORMANCE 15
367 #define DRFLAC_PICTURE_TYPE_SCREEN_CAPTURE 16
368 #define DRFLAC_PICTURE_TYPE_BRIGHT_COLORED_FISH 17
369 #define DRFLAC_PICTURE_TYPE_ILLUSTRATION 18
370 #define DRFLAC_PICTURE_TYPE_BAND_LOGOTYPE 19
371 #define DRFLAC_PICTURE_TYPE_PUBLISHER_LOGOTYPE 20
422 const void* pRawData;
454 const void* pComments;
463 const void* pTrackData;
510 typedef size_t (*
drflac_read_proc)(
void* pUserData,
void* pBufferOut,
size_t bytesToRead);
567 void* (* onMalloc)(
size_t sz,
void* pUserData);
568 void* (* onRealloc)(
void* p,
size_t sz,
void* pUserData);
569 void (* onFree)(
void* p,
void* pUserData);
577 size_t currentReadPos;
598 size_t unalignedByteCount;
1060 #ifndef DR_FLAC_NO_STDIO
1228 #ifndef DR_FLAC_NO_STDIO
1260 const char* pRunningData;
1280 const char* pRunningData;
1327 #if defined(DR_FLAC_IMPLEMENTATION) || defined(DRFLAC_IMPLEMENTATION)
1332 #if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)))
1333 #pragma GCC diagnostic push
1335 #pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
1343 #ifndef _DEFAULT_SOURCE
1344 #define _DEFAULT_SOURCE
1356 #define DRFLAC_INLINE __forceinline
1357 #elif defined(__GNUC__)
1365 #if defined(__STRICT_ANSI__)
1366 #define DRFLAC_INLINE __inline__ __attribute__((always_inline))
1368 #define DRFLAC_INLINE inline __attribute__((always_inline))
1370 #elif defined(__WATCOMC__)
1371 #define DRFLAC_INLINE __inline
1373 #define DRFLAC_INLINE
1377 #if defined(__x86_64__) || defined(_M_X64)
1379 #elif defined(__i386) || defined(_M_IX86)
1381 #elif defined(__arm__) || defined(_M_ARM) || defined(_M_ARM64)
1394 #if !defined(DR_FLAC_NO_SIMD)
1395 #if defined(DRFLAC_X64) || defined(DRFLAC_X86)
1396 #if defined(_MSC_VER) && !defined(__clang__)
1398 #if _MSC_VER >= 1400 && !defined(DRFLAC_NO_SSE2)
1399 #define DRFLAC_SUPPORT_SSE2
1401 #if _MSC_VER >= 1600 && !defined(DRFLAC_NO_SSE41)
1402 #define DRFLAC_SUPPORT_SSE41
1404 #elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)))
1406 #if defined(__SSE2__) && !defined(DRFLAC_NO_SSE2)
1407 #define DRFLAC_SUPPORT_SSE2
1409 #if defined(__SSE4_1__) && !defined(DRFLAC_NO_SSE41)
1410 #define DRFLAC_SUPPORT_SSE41
1415 #if !defined(__GNUC__) && !defined(__clang__) && defined(__has_include)
1416 #if !defined(DRFLAC_SUPPORT_SSE2) && !defined(DRFLAC_NO_SSE2) && __has_include(<emmintrin.h>)
1417 #define DRFLAC_SUPPORT_SSE2
1419 #if !defined(DRFLAC_SUPPORT_SSE41) && !defined(DRFLAC_NO_SSE41) && __has_include(<smmintrin.h>)
1420 #define DRFLAC_SUPPORT_SSE41
1424 #if defined(DRFLAC_SUPPORT_SSE41)
1425 #include <smmintrin.h>
1426 #elif defined(DRFLAC_SUPPORT_SSE2)
1427 #include <emmintrin.h>
1431 #if defined(DRFLAC_ARM)
1432 #if !defined(DRFLAC_NO_NEON) && (defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64))
1433 #define DRFLAC_SUPPORT_NEON
1437 #if !defined(__GNUC__) && !defined(__clang__) && defined(__has_include)
1438 #if !defined(DRFLAC_SUPPORT_NEON) && !defined(DRFLAC_NO_NEON) && __has_include(<arm_neon.h>)
1439 #define DRFLAC_SUPPORT_NEON
1443 #if defined(DRFLAC_SUPPORT_NEON)
1444 #include <arm_neon.h>
1450 #if !defined(DR_FLAC_NO_SIMD) && (defined(DRFLAC_X86) || defined(DRFLAC_X64))
1451 #if defined(_MSC_VER) && !defined(__clang__)
1452 #if _MSC_VER >= 1400
1454 static void drflac__cpuid(
int info[4],
int fid)
1459 #define DRFLAC_NO_CPUID
1462 #if defined(__GNUC__) || defined(__clang__)
1463 static void drflac__cpuid(
int info[4],
int fid)
1472 #if defined(DRFLAC_X86) && defined(__PIC__)
1473 __asm__ __volatile__ (
1474 "xchg{l} {%%}ebx, %k1;"
1476 "xchg{l} {%%}ebx, %k1;"
1477 :
"=a"(info[0]),
"=&r"(info[1]),
"=c"(info[2]),
"=d"(info[3]) :
"a"(fid),
"c"(0)
1480 __asm__ __volatile__ (
1481 "cpuid" :
"=a"(info[0]),
"=b"(info[1]),
"=c"(info[2]),
"=d"(info[3]) :
"a"(fid),
"c"(0)
1486 #define DRFLAC_NO_CPUID
1490 #define DRFLAC_NO_CPUID
1495 #if defined(DRFLAC_SUPPORT_SSE2)
1496 #if (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(DRFLAC_NO_SSE2)
1497 #if defined(DRFLAC_X64)
1499 #elif (defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(__SSE2__)
1502 #if defined(DRFLAC_NO_CPUID)
1506 drflac__cpuid(info, 1);
1507 return (info[3] & (1 << 26)) != 0;
1520 #if defined(DRFLAC_SUPPORT_SSE41)
1521 #if (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(DRFLAC_NO_SSE41)
1522 #if defined(DRFLAC_X64)
1524 #elif (defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(__SSE4_1__)
1527 #if defined(DRFLAC_NO_CPUID)
1531 drflac__cpuid(info, 1);
1532 return (info[2] & (1 << 19)) != 0;
1544 #if defined(_MSC_VER) && _MSC_VER >= 1500 && (defined(DRFLAC_X86) || defined(DRFLAC_X64)) && !defined(__clang__)
1545 #define DRFLAC_HAS_LZCNT_INTRINSIC
1546 #elif (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)))
1547 #define DRFLAC_HAS_LZCNT_INTRINSIC
1548 #elif defined(__clang__)
1549 #if defined(__has_builtin)
1550 #if __has_builtin(__builtin_clzll) || __has_builtin(__builtin_clzl)
1551 #define DRFLAC_HAS_LZCNT_INTRINSIC
1556 #if defined(_MSC_VER) && _MSC_VER >= 1400 && !defined(__clang__)
1557 #define DRFLAC_HAS_BYTESWAP16_INTRINSIC
1558 #define DRFLAC_HAS_BYTESWAP32_INTRINSIC
1559 #define DRFLAC_HAS_BYTESWAP64_INTRINSIC
1560 #elif defined(__clang__)
1561 #if defined(__has_builtin)
1562 #if __has_builtin(__builtin_bswap16)
1563 #define DRFLAC_HAS_BYTESWAP16_INTRINSIC
1565 #if __has_builtin(__builtin_bswap32)
1566 #define DRFLAC_HAS_BYTESWAP32_INTRINSIC
1568 #if __has_builtin(__builtin_bswap64)
1569 #define DRFLAC_HAS_BYTESWAP64_INTRINSIC
1572 #elif defined(__GNUC__)
1573 #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
1574 #define DRFLAC_HAS_BYTESWAP32_INTRINSIC
1575 #define DRFLAC_HAS_BYTESWAP64_INTRINSIC
1577 #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
1578 #define DRFLAC_HAS_BYTESWAP16_INTRINSIC
1580 #elif defined(__WATCOMC__) && defined(__386__)
1581 #define DRFLAC_HAS_BYTESWAP16_INTRINSIC
1582 #define DRFLAC_HAS_BYTESWAP32_INTRINSIC
1583 #define DRFLAC_HAS_BYTESWAP64_INTRINSIC
1587 #pragma aux _watcom_bswap16 = \
1591 #pragma aux _watcom_bswap32 = \
1595 #pragma aux _watcom_bswap64 = \
1605 #ifndef DRFLAC_ASSERT
1607 #define DRFLAC_ASSERT(expression) assert(expression)
1609 #ifndef DRFLAC_MALLOC
1610 #define DRFLAC_MALLOC(sz) malloc((sz))
1612 #ifndef DRFLAC_REALLOC
1613 #define DRFLAC_REALLOC(p, sz) realloc((p), (sz))
1616 #define DRFLAC_FREE(p) free((p))
1618 #ifndef DRFLAC_COPY_MEMORY
1619 #define DRFLAC_COPY_MEMORY(dst, src, sz) memcpy((dst), (src), (sz))
1621 #ifndef DRFLAC_ZERO_MEMORY
1622 #define DRFLAC_ZERO_MEMORY(p, sz) memset((p), 0, (sz))
1624 #ifndef DRFLAC_ZERO_OBJECT
1625 #define DRFLAC_ZERO_OBJECT(p) DRFLAC_ZERO_MEMORY((p), sizeof(*(p)))
1628 #define DRFLAC_MAX_SIMD_VECTOR_SIZE 64
1631 #define DRFLAC_SUCCESS 0
1632 #define DRFLAC_ERROR -1
1633 #define DRFLAC_INVALID_ARGS -2
1634 #define DRFLAC_INVALID_OPERATION -3
1635 #define DRFLAC_OUT_OF_MEMORY -4
1636 #define DRFLAC_OUT_OF_RANGE -5
1637 #define DRFLAC_ACCESS_DENIED -6
1638 #define DRFLAC_DOES_NOT_EXIST -7
1639 #define DRFLAC_ALREADY_EXISTS -8
1640 #define DRFLAC_TOO_MANY_OPEN_FILES -9
1641 #define DRFLAC_INVALID_FILE -10
1642 #define DRFLAC_TOO_BIG -11
1643 #define DRFLAC_PATH_TOO_LONG -12
1644 #define DRFLAC_NAME_TOO_LONG -13
1645 #define DRFLAC_NOT_DIRECTORY -14
1646 #define DRFLAC_IS_DIRECTORY -15
1647 #define DRFLAC_DIRECTORY_NOT_EMPTY -16
1648 #define DRFLAC_END_OF_FILE -17
1649 #define DRFLAC_NO_SPACE -18
1650 #define DRFLAC_BUSY -19
1651 #define DRFLAC_IO_ERROR -20
1652 #define DRFLAC_INTERRUPT -21
1653 #define DRFLAC_UNAVAILABLE -22
1654 #define DRFLAC_ALREADY_IN_USE -23
1655 #define DRFLAC_BAD_ADDRESS -24
1656 #define DRFLAC_BAD_SEEK -25
1657 #define DRFLAC_BAD_PIPE -26
1658 #define DRFLAC_DEADLOCK -27
1659 #define DRFLAC_TOO_MANY_LINKS -28
1660 #define DRFLAC_NOT_IMPLEMENTED -29
1661 #define DRFLAC_NO_MESSAGE -30
1662 #define DRFLAC_BAD_MESSAGE -31
1663 #define DRFLAC_NO_DATA_AVAILABLE -32
1664 #define DRFLAC_INVALID_DATA -33
1665 #define DRFLAC_TIMEOUT -34
1666 #define DRFLAC_NO_NETWORK -35
1667 #define DRFLAC_NOT_UNIQUE -36
1668 #define DRFLAC_NOT_SOCKET -37
1669 #define DRFLAC_NO_ADDRESS -38
1670 #define DRFLAC_BAD_PROTOCOL -39
1671 #define DRFLAC_PROTOCOL_UNAVAILABLE -40
1672 #define DRFLAC_PROTOCOL_NOT_SUPPORTED -41
1673 #define DRFLAC_PROTOCOL_FAMILY_NOT_SUPPORTED -42
1674 #define DRFLAC_ADDRESS_FAMILY_NOT_SUPPORTED -43
1675 #define DRFLAC_SOCKET_NOT_SUPPORTED -44
1676 #define DRFLAC_CONNECTION_RESET -45
1677 #define DRFLAC_ALREADY_CONNECTED -46
1678 #define DRFLAC_NOT_CONNECTED -47
1679 #define DRFLAC_CONNECTION_REFUSED -48
1680 #define DRFLAC_NO_HOST -49
1681 #define DRFLAC_IN_PROGRESS -50
1682 #define DRFLAC_CANCELLED -51
1683 #define DRFLAC_MEMORY_ALREADY_MAPPED -52
1684 #define DRFLAC_AT_END -53
1685 #define DRFLAC_CRC_MISMATCH -128
1687 #define DRFLAC_SUBFRAME_CONSTANT 0
1688 #define DRFLAC_SUBFRAME_VERBATIM 1
1689 #define DRFLAC_SUBFRAME_FIXED 8
1690 #define DRFLAC_SUBFRAME_LPC 32
1691 #define DRFLAC_SUBFRAME_RESERVED 255
1693 #define DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE 0
1694 #define DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2 1
1696 #define DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT 0
1697 #define DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE 8
1698 #define DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE 9
1699 #define DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE 10
1701 #define drflac_align(x, a) ((((x) + (a) - 1) / (a)) * (a))
1726 #if defined(__has_feature)
1727 #if __has_feature(thread_sanitizer)
1728 #define DRFLAC_NO_THREAD_SANITIZE __attribute__((no_sanitize("thread")))
1730 #define DRFLAC_NO_THREAD_SANITIZE
1733 #define DRFLAC_NO_THREAD_SANITIZE
1736 #if defined(DRFLAC_HAS_LZCNT_INTRINSIC)
1740 #ifndef DRFLAC_NO_CPUID
1754 if (!isCPUCapsInitialized) {
1756 #if defined(DRFLAC_HAS_LZCNT_INTRINSIC)
1758 drflac__cpuid(info, 0x80000001);
1759 drflac__gIsLZCNTSupported = (info[2] & (1 << 5)) != 0;
1777 #if defined(DRFLAC_SUPPORT_NEON)
1778 #if defined(DRFLAC_ARM) && !defined(DRFLAC_NO_NEON)
1779 #if (defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64))
1797 #if defined(DRFLAC_HAS_LZCNT_INTRINSIC) && defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5)
1807 #if defined(DRFLAC_X86) || defined(DRFLAC_X64)
1809 #elif defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && __BYTE_ORDER == __LITTLE_ENDIAN
1813 return (*(
char*)&n) == 1;
1819 #ifdef DRFLAC_HAS_BYTESWAP16_INTRINSIC
1820 #if defined(_MSC_VER) && !defined(__clang__)
1821 return _byteswap_ushort(n);
1822 #elif defined(__GNUC__) || defined(__clang__)
1823 return __builtin_bswap16(n);
1824 #elif defined(__WATCOMC__) && defined(__386__)
1825 return _watcom_bswap16(n);
1827 #error "This compiler does not support the byte swap intrinsic."
1830 return ((n & 0xFF00) >> 8) |
1831 ((n & 0x00FF) << 8);
1837 #ifdef DRFLAC_HAS_BYTESWAP32_INTRINSIC
1838 #if defined(_MSC_VER) && !defined(__clang__)
1839 return _byteswap_ulong(n);
1840 #elif defined(__GNUC__) || defined(__clang__)
1841 #if defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 6) && !defined(DRFLAC_64BIT)
1844 __asm__ __volatile__ (
1845 #
if defined(DRFLAC_64BIT)
1846 "rev %w[out], %w[in]" : [out]
"=r"(r) : [in]
"r"(n)
1848 "rev %[out], %[in]" : [out]
"=r"(r) : [in]
"r"(n)
1853 return __builtin_bswap32(n);
1855 #elif defined(__WATCOMC__) && defined(__386__)
1856 return _watcom_bswap32(n);
1858 #error "This compiler does not support the byte swap intrinsic."
1861 return ((n & 0xFF000000) >> 24) |
1862 ((n & 0x00FF0000) >> 8) |
1863 ((n & 0x0000FF00) << 8) |
1864 ((n & 0x000000FF) << 24);
1870 #ifdef DRFLAC_HAS_BYTESWAP64_INTRINSIC
1871 #if defined(_MSC_VER) && !defined(__clang__)
1872 return _byteswap_uint64(n);
1873 #elif defined(__GNUC__) || defined(__clang__)
1874 return __builtin_bswap64(n);
1875 #elif defined(__WATCOMC__) && defined(__386__)
1876 return _watcom_bswap64(n);
1878 #error "This compiler does not support the byte swap intrinsic."
1935 result |= (n & 0x7F000000) >> 3;
1936 result |= (n & 0x007F0000) >> 2;
1937 result |= (n & 0x00007F00) >> 1;
1938 result |= (n & 0x0000007F) >> 0;
1947 0x00, 0x07, 0x0E, 0x09, 0x1C, 0x1B, 0x12, 0x15, 0x38, 0x3F, 0x36, 0x31, 0x24, 0x23, 0x2A, 0x2D,
1948 0x70, 0x77, 0x7E, 0x79, 0x6C, 0x6B, 0x62, 0x65, 0x48, 0x4F, 0x46, 0x41, 0x54, 0x53, 0x5A, 0x5D,
1949 0xE0, 0xE7, 0xEE, 0xE9, 0xFC, 0xFB, 0xF2, 0xF5, 0xD8, 0xDF, 0xD6, 0xD1, 0xC4, 0xC3, 0xCA, 0xCD,
1950 0x90, 0x97, 0x9E, 0x99, 0x8C, 0x8B, 0x82, 0x85, 0xA8, 0xAF, 0xA6, 0xA1, 0xB4, 0xB3, 0xBA, 0xBD,
1951 0xC7, 0xC0, 0xC9, 0xCE, 0xDB, 0xDC, 0xD5, 0xD2, 0xFF, 0xF8, 0xF1, 0xF6, 0xE3, 0xE4, 0xED, 0xEA,
1952 0xB7, 0xB0, 0xB9, 0xBE, 0xAB, 0xAC, 0xA5, 0xA2, 0x8F, 0x88, 0x81, 0x86, 0x93, 0x94, 0x9D, 0x9A,
1953 0x27, 0x20, 0x29, 0x2E, 0x3B, 0x3C, 0x35, 0x32, 0x1F, 0x18, 0x11, 0x16, 0x03, 0x04, 0x0D, 0x0A,
1954 0x57, 0x50, 0x59, 0x5E, 0x4B, 0x4C, 0x45, 0x42, 0x6F, 0x68, 0x61, 0x66, 0x73, 0x74, 0x7D, 0x7A,
1955 0x89, 0x8E, 0x87, 0x80, 0x95, 0x92, 0x9B, 0x9C, 0xB1, 0xB6, 0xBF, 0xB8, 0xAD, 0xAA, 0xA3, 0xA4,
1956 0xF9, 0xFE, 0xF7, 0xF0, 0xE5, 0xE2, 0xEB, 0xEC, 0xC1, 0xC6, 0xCF, 0xC8, 0xDD, 0xDA, 0xD3, 0xD4,
1957 0x69, 0x6E, 0x67, 0x60, 0x75, 0x72, 0x7B, 0x7C, 0x51, 0x56, 0x5F, 0x58, 0x4D, 0x4A, 0x43, 0x44,
1958 0x19, 0x1E, 0x17, 0x10, 0x05, 0x02, 0x0B, 0x0C, 0x21, 0x26, 0x2F, 0x28, 0x3D, 0x3A, 0x33, 0x34,
1959 0x4E, 0x49, 0x40, 0x47, 0x52, 0x55, 0x5C, 0x5B, 0x76, 0x71, 0x78, 0x7F, 0x6A, 0x6D, 0x64, 0x63,
1960 0x3E, 0x39, 0x30, 0x37, 0x22, 0x25, 0x2C, 0x2B, 0x06, 0x01, 0x08, 0x0F, 0x1A, 0x1D, 0x14, 0x13,
1961 0xAE, 0xA9, 0xA0, 0xA7, 0xB2, 0xB5, 0xBC, 0xBB, 0x96, 0x91, 0x98, 0x9F, 0x8A, 0x8D, 0x84, 0x83,
1962 0xDE, 0xD9, 0xD0, 0xD7, 0xC2, 0xC5, 0xCC, 0xCB, 0xE6, 0xE1, 0xE8, 0xEF, 0xFA, 0xFD, 0xF4, 0xF3
1966 0x0000, 0x8005, 0x800F, 0x000A, 0x801B, 0x001E, 0x0014, 0x8011,
1967 0x8033, 0x0036, 0x003C, 0x8039, 0x0028, 0x802D, 0x8027, 0x0022,
1968 0x8063, 0x0066, 0x006C, 0x8069, 0x0078, 0x807D, 0x8077, 0x0072,
1969 0x0050, 0x8055, 0x805F, 0x005A, 0x804B, 0x004E, 0x0044, 0x8041,
1970 0x80C3, 0x00C6, 0x00CC, 0x80C9, 0x00D8, 0x80DD, 0x80D7, 0x00D2,
1971 0x00F0, 0x80F5, 0x80FF, 0x00FA, 0x80EB, 0x00EE, 0x00E4, 0x80E1,
1972 0x00A0, 0x80A5, 0x80AF, 0x00AA, 0x80BB, 0x00BE, 0x00B4, 0x80B1,
1973 0x8093, 0x0096, 0x009C, 0x8099, 0x0088, 0x808D, 0x8087, 0x0082,
1974 0x8183, 0x0186, 0x018C, 0x8189, 0x0198, 0x819D, 0x8197, 0x0192,
1975 0x01B0, 0x81B5, 0x81BF, 0x01BA, 0x81AB, 0x01AE, 0x01A4, 0x81A1,
1976 0x01E0, 0x81E5, 0x81EF, 0x01EA, 0x81FB, 0x01FE, 0x01F4, 0x81F1,
1977 0x81D3, 0x01D6, 0x01DC, 0x81D9, 0x01C8, 0x81CD, 0x81C7, 0x01C2,
1978 0x0140, 0x8145, 0x814F, 0x014A, 0x815B, 0x015E, 0x0154, 0x8151,
1979 0x8173, 0x0176, 0x017C, 0x8179, 0x0168, 0x816D, 0x8167, 0x0162,
1980 0x8123, 0x0126, 0x012C, 0x8129, 0x0138, 0x813D, 0x8137, 0x0132,
1981 0x0110, 0x8115, 0x811F, 0x011A, 0x810B, 0x010E, 0x0104, 0x8101,
1982 0x8303, 0x0306, 0x030C, 0x8309, 0x0318, 0x831D, 0x8317, 0x0312,
1983 0x0330, 0x8335, 0x833F, 0x033A, 0x832B, 0x032E, 0x0324, 0x8321,
1984 0x0360, 0x8365, 0x836F, 0x036A, 0x837B, 0x037E, 0x0374, 0x8371,
1985 0x8353, 0x0356, 0x035C, 0x8359, 0x0348, 0x834D, 0x8347, 0x0342,
1986 0x03C0, 0x83C5, 0x83CF, 0x03CA, 0x83DB, 0x03DE, 0x03D4, 0x83D1,
1987 0x83F3, 0x03F6, 0x03FC, 0x83F9, 0x03E8, 0x83ED, 0x83E7, 0x03E2,
1988 0x83A3, 0x03A6, 0x03AC, 0x83A9, 0x03B8, 0x83BD, 0x83B7, 0x03B2,
1989 0x0390, 0x8395, 0x839F, 0x039A, 0x838B, 0x038E, 0x0384, 0x8381,
1990 0x0280, 0x8285, 0x828F, 0x028A, 0x829B, 0x029E, 0x0294, 0x8291,
1991 0x82B3, 0x02B6, 0x02BC, 0x82B9, 0x02A8, 0x82AD, 0x82A7, 0x02A2,
1992 0x82E3, 0x02E6, 0x02EC, 0x82E9, 0x02F8, 0x82FD, 0x82F7, 0x02F2,
1993 0x02D0, 0x82D5, 0x82DF, 0x02DA, 0x82CB, 0x02CE, 0x02C4, 0x82C1,
1994 0x8243, 0x0246, 0x024C, 0x8249, 0x0258, 0x825D, 0x8257, 0x0252,
1995 0x0270, 0x8275, 0x827F, 0x027A, 0x826B, 0x026E, 0x0264, 0x8261,
1996 0x0220, 0x8225, 0x822F, 0x022A, 0x823B, 0x023E, 0x0234, 0x8231,
1997 0x8213, 0x0216, 0x021C, 0x8219, 0x0208, 0x820D, 0x8207, 0x0202
2007 #ifdef DR_FLAC_NO_CRC
2016 for (
int i =
count-1; i >= 0; --i) {
2019 crc = ((crc << 1) | bit) ^ p;
2021 crc = ((crc << 1) | bit);
2031 0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F
2036 wholeBytes =
count >> 3;
2037 leftoverBits =
count - (wholeBytes*8);
2038 leftoverDataMask = leftoverDataMaskTable[leftoverBits];
2040 switch (wholeBytes) {
2045 case 0:
if (leftoverBits > 0) crc = (
drflac_uint8)((crc << leftoverBits) ^
drflac__crc8_table[(crc >> (8 - leftoverBits)) ^ (data & leftoverDataMask)]);
2095 #ifdef DR_FLAC_NO_CRC
2104 for (
int i =
count-1; i >= 0; --i) {
2107 r = ((r << 1) | bit) ^ p;
2109 r = ((r << 1) | bit);
2120 0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F
2125 wholeBytes =
count >> 3;
2126 leftoverBits =
count & 7;
2127 leftoverDataMask = leftoverDataMaskTable[leftoverBits];
2129 switch (wholeBytes) {
2135 case 0:
if (leftoverBits > 0) crc = (crc << leftoverBits) ^
drflac__crc16_table[(crc >> (16 - leftoverBits)) ^ (data & leftoverDataMask)];
2144 #ifdef DR_FLAC_NO_CRC
2155 0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F
2160 wholeBytes =
count >> 3;
2161 leftoverBits =
count & 7;
2162 leftoverDataMask = leftoverDataMaskTable[leftoverBits];
2164 switch (wholeBytes) {
2174 case 0:
if (leftoverBits > 0) crc = (crc << leftoverBits) ^
drflac__crc16_table[(crc >> (16 - leftoverBits)) ^ (data & leftoverDataMask)];
2184 return drflac_crc16__64bit(crc, data,
count);
2186 return drflac_crc16__32bit(crc, data,
count);
2193 #define drflac__be2host__cache_line drflac__be2host_64
2195 #define drflac__be2host__cache_line drflac__be2host_32
2207 #define DRFLAC_CACHE_L1_SIZE_BYTES(bs) (sizeof((bs)->cache))
2208 #define DRFLAC_CACHE_L1_SIZE_BITS(bs) (sizeof((bs)->cache)*8)
2209 #define DRFLAC_CACHE_L1_BITS_REMAINING(bs) (DRFLAC_CACHE_L1_SIZE_BITS(bs) - (bs)->consumedBits)
2210 #define DRFLAC_CACHE_L1_SELECTION_MASK(_bitCount) (~((~(drflac_cache_t)0) >> (_bitCount)))
2211 #define DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, _bitCount) (DRFLAC_CACHE_L1_SIZE_BITS(bs) - (_bitCount))
2212 #define DRFLAC_CACHE_L1_SELECT(bs, _bitCount) (((bs)->cache) & DRFLAC_CACHE_L1_SELECTION_MASK(_bitCount))
2213 #define DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, _bitCount) (DRFLAC_CACHE_L1_SELECT((bs), (_bitCount)) >> DRFLAC_CACHE_L1_SELECTION_SHIFT((bs), (_bitCount)))
2214 #define DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE(bs, _bitCount)(DRFLAC_CACHE_L1_SELECT((bs), (_bitCount)) >> (DRFLAC_CACHE_L1_SELECTION_SHIFT((bs), (_bitCount)) & (DRFLAC_CACHE_L1_SIZE_BITS(bs)-1)))
2215 #define DRFLAC_CACHE_L2_SIZE_BYTES(bs) (sizeof((bs)->cacheL2))
2216 #define DRFLAC_CACHE_L2_LINE_COUNT(bs) (DRFLAC_CACHE_L2_SIZE_BYTES(bs) / sizeof((bs)->cacheL2[0]))
2217 #define DRFLAC_CACHE_L2_LINES_REMAINING(bs) (DRFLAC_CACHE_L2_LINE_COUNT(bs) - (bs)->nextL2Line)
2220 #ifndef DR_FLAC_NO_CRC
2266 size_t alignedL1LineCount;
2305 if (alignedL1LineCount > 0) {
2308 for (i = alignedL1LineCount; i > 0; --i) {
2326 #ifndef DR_FLAC_NO_CRC
2334 #ifndef DR_FLAC_NO_CRC
2348 if (bytesRead == 0) {
2360 #ifndef DR_FLAC_NO_CRC
2375 #ifndef DR_FLAC_NO_CRC
2404 bs->
cache <<= bitCount;
2409 bs->
cache <<= bitCount;
2435 bs->
cache <<= bitCountLo;
2454 if (bitCount < 32) {
2456 signbit = ((result >> (bitCount-1)) & 0x01);
2457 result |= (~signbit + 1) << bitCount;
2495 if (!drflac__read_uint64(bs, bitCount, &result)) {
2499 signbit = ((result >> (bitCount-1)) & 0x01);
2500 result |= (~signbit + 1) << bitCount;
2582 bs->
cache <<= bitsToSeek;
2610 while (bitsToSeek >= 8) {
2619 if (bitsToSeek > 0) {
2649 #ifndef DR_FLAC_NO_CRC
2678 #if defined(DRFLAC_HAS_LZCNT_INTRINSIC)
2679 #define DRFLAC_IMPLEMENT_CLZ_LZCNT
2681 #if defined(_MSC_VER) && _MSC_VER >= 1400 && (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(__clang__)
2682 #define DRFLAC_IMPLEMENT_CLZ_MSVC
2684 #if defined(__WATCOMC__) && defined(__386__)
2685 #define DRFLAC_IMPLEMENT_CLZ_WATCOM
2696 1, 1, 1, 1, 1, 1, 1, 1
2703 n = clz_table_4[x >> (
sizeof(x)*8 - 4)];
2706 if ((x & ((
drflac_uint64)0xFFFFFFFF << 32)) == 0) { n = 32; x <<= 32; }
2707 if ((x & ((
drflac_uint64)0xFFFF0000 << 32)) == 0) { n += 16; x <<= 16; }
2708 if ((x & ((
drflac_uint64)0xFF000000 << 32)) == 0) { n += 8; x <<= 8; }
2709 if ((x & ((
drflac_uint64)0xF0000000 << 32)) == 0) { n += 4; x <<= 4; }
2711 if ((x & 0xFFFF0000) == 0) { n = 16; x <<= 16; }
2712 if ((x & 0xFF000000) == 0) { n += 8; x <<= 8; }
2713 if ((x & 0xF0000000) == 0) { n += 4; x <<= 4; }
2715 n += clz_table_4[x >> (
sizeof(x)*8 - 4)];
2721 #ifdef DRFLAC_IMPLEMENT_CLZ_LZCNT
2725 #if defined(DRFLAC_HAS_LZCNT_INTRINSIC) && defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5)
2729 #ifdef DRFLAC_HAS_LZCNT_INTRINSIC
2730 return drflac__gIsLZCNTSupported;
2757 #if defined(_MSC_VER)
2764 #if defined(__GNUC__) || defined(__clang__)
2765 #if defined(DRFLAC_X64)
2768 __asm__ __volatile__ (
2769 "lzcnt{ %1, %0| %0, %1}" :
"=r"(r) :
"r"(x) :
"cc"
2774 #elif defined(DRFLAC_X86)
2777 __asm__ __volatile__ (
2778 "lzcnt{l %1, %0| %0, %1}" :
"=r"(r) :
"r"(x) :
"cc"
2783 #elif defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5) && !defined(DRFLAC_64BIT)
2786 __asm__ __volatile__ (
2787 #
if defined(DRFLAC_64BIT)
2788 "clz %w[out], %w[in]" : [out]
"=r"(r) : [in]
"r"(x)
2790 "clz %[out], %[in]" : [out]
"=r"(r) : [in]
"r"(x)
2808 #error "This compiler does not support the lzcnt intrinsic."
2814 #ifdef DRFLAC_IMPLEMENT_CLZ_MSVC
2826 _BitScanReverse64((
unsigned long*)&n, x);
2828 _BitScanReverse((
unsigned long*)&n, x);
2830 return sizeof(x)*8 - n - 1;
2834 #ifdef DRFLAC_IMPLEMENT_CLZ_WATCOM
2836 #pragma aux drflac__clz_watcom = \
2839 parm [eax] nomemory \
2841 modify exact [eax] nomemory;
2846 #ifdef DRFLAC_IMPLEMENT_CLZ_LZCNT
2847 if (drflac__is_lzcnt_supported()) {
2848 return drflac__clz_lzcnt(x);
2852 #ifdef DRFLAC_IMPLEMENT_CLZ_MSVC
2853 return drflac__clz_msvc(x);
2854 #elif defined(DRFLAC_IMPLEMENT_CLZ_WATCOM)
2855 return (x == 0) ?
sizeof(x)*8 : drflac__clz_watcom(x);
2868 while (bs->
cache == 0) {
2876 setBitOffsetPlus1 += 1;
2879 bs->
cache <<= setBitOffsetPlus1;
2881 *pOffsetOut = zeroCounter + setBitOffsetPlus1 - 1;
2897 if (offsetFromStart > 0x7FFFFFFF) {
2902 bytesRemaining -= 0x7FFFFFFF;
2904 while (bytesRemaining > 0x7FFFFFFF) {
2908 bytesRemaining -= 0x7FFFFFFF;
2911 if (bytesRemaining > 0) {
2948 if ((utf8[0] & 0x80) == 0) {
2949 *pNumberOut = utf8[0];
2955 if ((utf8[0] & 0xE0) == 0xC0) {
2957 }
else if ((utf8[0] & 0xF0) == 0xE0) {
2959 }
else if ((utf8[0] & 0xF8) == 0xF0) {
2961 }
else if ((utf8[0] & 0xFC) == 0xF8) {
2963 }
else if ((utf8[0] & 0xFE) == 0xFC) {
2965 }
else if ((utf8[0] & 0xFF) == 0xFE) {
2975 result = (
drflac_uint64)(utf8[0] & (0xFF >> (byteCount + 1)));
2976 for (i = 1; i < byteCount; ++i) {
2983 result = (result << 6) | (utf8[i] & 0x3F);
2986 *pNumberOut = result;
3010 case 32: prediction += coefficients[31] * pDecodedSamples[-32];
3011 case 31: prediction += coefficients[30] * pDecodedSamples[-31];
3012 case 30: prediction += coefficients[29] * pDecodedSamples[-30];
3013 case 29: prediction += coefficients[28] * pDecodedSamples[-29];
3014 case 28: prediction += coefficients[27] * pDecodedSamples[-28];
3015 case 27: prediction += coefficients[26] * pDecodedSamples[-27];
3016 case 26: prediction += coefficients[25] * pDecodedSamples[-26];
3017 case 25: prediction += coefficients[24] * pDecodedSamples[-25];
3018 case 24: prediction += coefficients[23] * pDecodedSamples[-24];
3019 case 23: prediction += coefficients[22] * pDecodedSamples[-23];
3020 case 22: prediction += coefficients[21] * pDecodedSamples[-22];
3021 case 21: prediction += coefficients[20] * pDecodedSamples[-21];
3022 case 20: prediction += coefficients[19] * pDecodedSamples[-20];
3023 case 19: prediction += coefficients[18] * pDecodedSamples[-19];
3024 case 18: prediction += coefficients[17] * pDecodedSamples[-18];
3025 case 17: prediction += coefficients[16] * pDecodedSamples[-17];
3026 case 16: prediction += coefficients[15] * pDecodedSamples[-16];
3027 case 15: prediction += coefficients[14] * pDecodedSamples[-15];
3028 case 14: prediction += coefficients[13] * pDecodedSamples[-14];
3029 case 13: prediction += coefficients[12] * pDecodedSamples[-13];
3030 case 12: prediction += coefficients[11] * pDecodedSamples[-12];
3031 case 11: prediction += coefficients[10] * pDecodedSamples[-11];
3032 case 10: prediction += coefficients[ 9] * pDecodedSamples[-10];
3033 case 9: prediction += coefficients[ 8] * pDecodedSamples[- 9];
3034 case 8: prediction += coefficients[ 7] * pDecodedSamples[- 8];
3035 case 7: prediction += coefficients[ 6] * pDecodedSamples[- 7];
3036 case 6: prediction += coefficients[ 5] * pDecodedSamples[- 6];
3037 case 5: prediction += coefficients[ 4] * pDecodedSamples[- 5];
3038 case 4: prediction += coefficients[ 3] * pDecodedSamples[- 4];
3039 case 3: prediction += coefficients[ 2] * pDecodedSamples[- 3];
3040 case 2: prediction += coefficients[ 1] * pDecodedSamples[- 2];
3041 case 1: prediction += coefficients[ 0] * pDecodedSamples[- 1];
3056 #ifndef DRFLAC_64BIT
3059 prediction = coefficients[0] * (
drflac_int64)pDecodedSamples[-1];
3060 prediction += coefficients[1] * (
drflac_int64)pDecodedSamples[-2];
3061 prediction += coefficients[2] * (
drflac_int64)pDecodedSamples[-3];
3062 prediction += coefficients[3] * (
drflac_int64)pDecodedSamples[-4];
3063 prediction += coefficients[4] * (
drflac_int64)pDecodedSamples[-5];
3064 prediction += coefficients[5] * (
drflac_int64)pDecodedSamples[-6];
3065 prediction += coefficients[6] * (
drflac_int64)pDecodedSamples[-7];
3066 prediction += coefficients[7] * (
drflac_int64)pDecodedSamples[-8];
3068 else if (order == 7)
3070 prediction = coefficients[0] * (
drflac_int64)pDecodedSamples[-1];
3071 prediction += coefficients[1] * (
drflac_int64)pDecodedSamples[-2];
3072 prediction += coefficients[2] * (
drflac_int64)pDecodedSamples[-3];
3073 prediction += coefficients[3] * (
drflac_int64)pDecodedSamples[-4];
3074 prediction += coefficients[4] * (
drflac_int64)pDecodedSamples[-5];
3075 prediction += coefficients[5] * (
drflac_int64)pDecodedSamples[-6];
3076 prediction += coefficients[6] * (
drflac_int64)pDecodedSamples[-7];
3078 else if (order == 3)
3080 prediction = coefficients[0] * (
drflac_int64)pDecodedSamples[-1];
3081 prediction += coefficients[1] * (
drflac_int64)pDecodedSamples[-2];
3082 prediction += coefficients[2] * (
drflac_int64)pDecodedSamples[-3];
3084 else if (order == 6)
3086 prediction = coefficients[0] * (
drflac_int64)pDecodedSamples[-1];
3087 prediction += coefficients[1] * (
drflac_int64)pDecodedSamples[-2];
3088 prediction += coefficients[2] * (
drflac_int64)pDecodedSamples[-3];
3089 prediction += coefficients[3] * (
drflac_int64)pDecodedSamples[-4];
3090 prediction += coefficients[4] * (
drflac_int64)pDecodedSamples[-5];
3091 prediction += coefficients[5] * (
drflac_int64)pDecodedSamples[-6];
3093 else if (order == 5)
3095 prediction = coefficients[0] * (
drflac_int64)pDecodedSamples[-1];
3096 prediction += coefficients[1] * (
drflac_int64)pDecodedSamples[-2];
3097 prediction += coefficients[2] * (
drflac_int64)pDecodedSamples[-3];
3098 prediction += coefficients[3] * (
drflac_int64)pDecodedSamples[-4];
3099 prediction += coefficients[4] * (
drflac_int64)pDecodedSamples[-5];
3101 else if (order == 4)
3103 prediction = coefficients[0] * (
drflac_int64)pDecodedSamples[-1];
3104 prediction += coefficients[1] * (
drflac_int64)pDecodedSamples[-2];
3105 prediction += coefficients[2] * (
drflac_int64)pDecodedSamples[-3];
3106 prediction += coefficients[3] * (
drflac_int64)pDecodedSamples[-4];
3108 else if (order == 12)
3110 prediction = coefficients[0] * (
drflac_int64)pDecodedSamples[-1];
3111 prediction += coefficients[1] * (
drflac_int64)pDecodedSamples[-2];
3112 prediction += coefficients[2] * (
drflac_int64)pDecodedSamples[-3];
3113 prediction += coefficients[3] * (
drflac_int64)pDecodedSamples[-4];
3114 prediction += coefficients[4] * (
drflac_int64)pDecodedSamples[-5];
3115 prediction += coefficients[5] * (
drflac_int64)pDecodedSamples[-6];
3116 prediction += coefficients[6] * (
drflac_int64)pDecodedSamples[-7];
3117 prediction += coefficients[7] * (
drflac_int64)pDecodedSamples[-8];
3118 prediction += coefficients[8] * (
drflac_int64)pDecodedSamples[-9];
3119 prediction += coefficients[9] * (
drflac_int64)pDecodedSamples[-10];
3120 prediction += coefficients[10] * (
drflac_int64)pDecodedSamples[-11];
3121 prediction += coefficients[11] * (
drflac_int64)pDecodedSamples[-12];
3123 else if (order == 2)
3125 prediction = coefficients[0] * (
drflac_int64)pDecodedSamples[-1];
3126 prediction += coefficients[1] * (
drflac_int64)pDecodedSamples[-2];
3128 else if (order == 1)
3130 prediction = coefficients[0] * (
drflac_int64)pDecodedSamples[-1];
3132 else if (order == 10)
3134 prediction = coefficients[0] * (
drflac_int64)pDecodedSamples[-1];
3135 prediction += coefficients[1] * (
drflac_int64)pDecodedSamples[-2];
3136 prediction += coefficients[2] * (
drflac_int64)pDecodedSamples[-3];
3137 prediction += coefficients[3] * (
drflac_int64)pDecodedSamples[-4];
3138 prediction += coefficients[4] * (
drflac_int64)pDecodedSamples[-5];
3139 prediction += coefficients[5] * (
drflac_int64)pDecodedSamples[-6];
3140 prediction += coefficients[6] * (
drflac_int64)pDecodedSamples[-7];
3141 prediction += coefficients[7] * (
drflac_int64)pDecodedSamples[-8];
3142 prediction += coefficients[8] * (
drflac_int64)pDecodedSamples[-9];
3143 prediction += coefficients[9] * (
drflac_int64)pDecodedSamples[-10];
3145 else if (order == 9)
3147 prediction = coefficients[0] * (
drflac_int64)pDecodedSamples[-1];
3148 prediction += coefficients[1] * (
drflac_int64)pDecodedSamples[-2];
3149 prediction += coefficients[2] * (
drflac_int64)pDecodedSamples[-3];
3150 prediction += coefficients[3] * (
drflac_int64)pDecodedSamples[-4];
3151 prediction += coefficients[4] * (
drflac_int64)pDecodedSamples[-5];
3152 prediction += coefficients[5] * (
drflac_int64)pDecodedSamples[-6];
3153 prediction += coefficients[6] * (
drflac_int64)pDecodedSamples[-7];
3154 prediction += coefficients[7] * (
drflac_int64)pDecodedSamples[-8];
3155 prediction += coefficients[8] * (
drflac_int64)pDecodedSamples[-9];
3157 else if (order == 11)
3159 prediction = coefficients[0] * (
drflac_int64)pDecodedSamples[-1];
3160 prediction += coefficients[1] * (
drflac_int64)pDecodedSamples[-2];
3161 prediction += coefficients[2] * (
drflac_int64)pDecodedSamples[-3];
3162 prediction += coefficients[3] * (
drflac_int64)pDecodedSamples[-4];
3163 prediction += coefficients[4] * (
drflac_int64)pDecodedSamples[-5];
3164 prediction += coefficients[5] * (
drflac_int64)pDecodedSamples[-6];
3165 prediction += coefficients[6] * (
drflac_int64)pDecodedSamples[-7];
3166 prediction += coefficients[7] * (
drflac_int64)pDecodedSamples[-8];
3167 prediction += coefficients[8] * (
drflac_int64)pDecodedSamples[-9];
3168 prediction += coefficients[9] * (
drflac_int64)pDecodedSamples[-10];
3169 prediction += coefficients[10] * (
drflac_int64)pDecodedSamples[-11];
3176 for (j = 0; j < (int)order; ++j) {
3177 prediction += coefficients[j] * (
drflac_int64)pDecodedSamples[-j-1];
3190 case 32: prediction += coefficients[31] * (
drflac_int64)pDecodedSamples[-32];
3191 case 31: prediction += coefficients[30] * (
drflac_int64)pDecodedSamples[-31];
3192 case 30: prediction += coefficients[29] * (
drflac_int64)pDecodedSamples[-30];
3193 case 29: prediction += coefficients[28] * (
drflac_int64)pDecodedSamples[-29];
3194 case 28: prediction += coefficients[27] * (
drflac_int64)pDecodedSamples[-28];
3195 case 27: prediction += coefficients[26] * (
drflac_int64)pDecodedSamples[-27];
3196 case 26: prediction += coefficients[25] * (
drflac_int64)pDecodedSamples[-26];
3197 case 25: prediction += coefficients[24] * (
drflac_int64)pDecodedSamples[-25];
3198 case 24: prediction += coefficients[23] * (
drflac_int64)pDecodedSamples[-24];
3199 case 23: prediction += coefficients[22] * (
drflac_int64)pDecodedSamples[-23];
3200 case 22: prediction += coefficients[21] * (
drflac_int64)pDecodedSamples[-22];
3201 case 21: prediction += coefficients[20] * (
drflac_int64)pDecodedSamples[-21];
3202 case 20: prediction += coefficients[19] * (
drflac_int64)pDecodedSamples[-20];
3203 case 19: prediction += coefficients[18] * (
drflac_int64)pDecodedSamples[-19];
3204 case 18: prediction += coefficients[17] * (
drflac_int64)pDecodedSamples[-18];
3205 case 17: prediction += coefficients[16] * (
drflac_int64)pDecodedSamples[-17];
3206 case 16: prediction += coefficients[15] * (
drflac_int64)pDecodedSamples[-16];
3207 case 15: prediction += coefficients[14] * (
drflac_int64)pDecodedSamples[-15];
3208 case 14: prediction += coefficients[13] * (
drflac_int64)pDecodedSamples[-14];
3209 case 13: prediction += coefficients[12] * (
drflac_int64)pDecodedSamples[-13];
3210 case 12: prediction += coefficients[11] * (
drflac_int64)pDecodedSamples[-12];
3211 case 11: prediction += coefficients[10] * (
drflac_int64)pDecodedSamples[-11];
3212 case 10: prediction += coefficients[ 9] * (
drflac_int64)pDecodedSamples[-10];
3213 case 9: prediction += coefficients[ 8] * (
drflac_int64)pDecodedSamples[- 9];
3214 case 8: prediction += coefficients[ 7] * (
drflac_int64)pDecodedSamples[- 8];
3215 case 7: prediction += coefficients[ 6] * (
drflac_int64)pDecodedSamples[- 7];
3216 case 6: prediction += coefficients[ 5] * (
drflac_int64)pDecodedSamples[- 6];
3217 case 5: prediction += coefficients[ 4] * (
drflac_int64)pDecodedSamples[- 5];
3218 case 4: prediction += coefficients[ 3] * (
drflac_int64)pDecodedSamples[- 4];
3219 case 3: prediction += coefficients[ 2] * (
drflac_int64)pDecodedSamples[- 3];
3220 case 2: prediction += coefficients[ 1] * (
drflac_int64)pDecodedSamples[- 2];
3221 case 1: prediction += coefficients[ 0] * (
drflac_int64)pDecodedSamples[- 1];
3241 for (i = 0; i <
count; ++i) {
3257 if (riceParam > 0) {
3265 decodedRice |= (zeroCounter << riceParam);
3266 if ((decodedRice & 0x01)) {
3267 decodedRice = ~(decodedRice >> 1);
3269 decodedRice = (decodedRice >> 1);
3273 if (bitsPerSample+shift >= 32) {
3303 if (riceParam > 0) {
3311 *pZeroCounterOut = zeroCounter;
3312 *pRiceParamPartOut = decodedRice;
3331 while (bs->
cache == 0) {
3339 zeroCounter += setBitOffsetPlus1;
3340 setBitOffsetPlus1 += 1;
3342 riceLength = setBitOffsetPlus1 + riceParam;
3347 bs->
cache <<= riceLength;
3360 #ifndef DR_FLAC_NO_CRC
3365 #ifndef DR_FLAC_NO_CRC
3378 bs->
cache <<= bitCountLo;
3381 pZeroCounterOut[0] = zeroCounter;
3382 pRiceParamPartOut[0] = riceParamPart;
3404 if (lzcount <
sizeof(bs_cache)*8) {
3405 pZeroCounterOut[0] = lzcount;
3412 extract_rice_param_part:
3413 bs_cache <<= lzcount;
3414 bs_consumedBits += lzcount;
3416 if (bs_consumedBits <= riceParamPlus1MaxConsumedBits) {
3418 pRiceParamPartOut[0] = (
drflac_uint32)(bs_cache >> riceParamPlus1Shift);
3419 bs_cache <<= riceParamPlus1;
3420 bs_consumedBits += riceParamPlus1;
3432 riceParamPartHi = (
drflac_uint32)(bs_cache >> riceParamPlus1Shift);
3435 riceParamPartLoBitCount = bs_consumedBits - riceParamPlus1MaxConsumedBits;
3436 DRFLAC_ASSERT(riceParamPartLoBitCount > 0 && riceParamPartLoBitCount < 32);
3440 #ifndef DR_FLAC_NO_CRC
3444 bs_consumedBits = riceParamPartLoBitCount;
3445 #ifndef DR_FLAC_NO_CRC
3454 bs_cache = bs->
cache;
3455 bs_consumedBits = bs->
consumedBits + riceParamPartLoBitCount;
3460 pRiceParamPartOut[0] = riceParamPartHi | riceParamPartLo;
3462 bs_cache <<= riceParamPartLoBitCount;
3472 #ifndef DR_FLAC_NO_CRC
3476 bs_consumedBits = 0;
3477 #ifndef DR_FLAC_NO_CRC
3486 bs_cache = bs->
cache;
3491 zeroCounter += lzcount;
3493 if (lzcount <
sizeof(bs_cache)*8) {
3498 pZeroCounterOut[0] = zeroCounter;
3499 goto extract_rice_param_part;
3503 bs->
cache = bs_cache;
3523 if (lzcount <
sizeof(bs_cache)*8) {
3529 extract_rice_param_part:
3530 bs_cache <<= lzcount;
3531 bs_consumedBits += lzcount;
3533 if (bs_consumedBits <= riceParamPlus1MaxConsumedBits) {
3535 bs_cache <<= riceParamPlus1;
3536 bs_consumedBits += riceParamPlus1;
3544 drflac_uint32 riceParamPartLoBitCount = bs_consumedBits - riceParamPlus1MaxConsumedBits;
3545 DRFLAC_ASSERT(riceParamPartLoBitCount > 0 && riceParamPartLoBitCount < 32);
3549 #ifndef DR_FLAC_NO_CRC
3553 bs_consumedBits = riceParamPartLoBitCount;
3554 #ifndef DR_FLAC_NO_CRC
3563 bs_cache = bs->
cache;
3564 bs_consumedBits = bs->
consumedBits + riceParamPartLoBitCount;
3567 bs_cache <<= riceParamPartLoBitCount;
3576 #ifndef DR_FLAC_NO_CRC
3580 bs_consumedBits = 0;
3581 #ifndef DR_FLAC_NO_CRC
3590 bs_cache = bs->
cache;
3595 if (lzcount <
sizeof(bs_cache)*8) {
3600 goto extract_rice_param_part;
3604 bs->
cache = bs_cache;
3622 (void)bitsPerSample;
3637 riceParamPart0 &= riceParamMask;
3638 riceParamPart0 |= (zeroCountPart0 << riceParam);
3639 riceParamPart0 = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
3641 pSamplesOut[i] = riceParamPart0;
3672 pSamplesOutEnd = pSamplesOut + (
count & ~3);
3674 if (bitsPerSample+shift > 32) {
3675 while (pSamplesOut < pSamplesOutEnd) {
3687 riceParamPart0 &= riceParamMask;
3688 riceParamPart1 &= riceParamMask;
3689 riceParamPart2 &= riceParamMask;
3690 riceParamPart3 &= riceParamMask;
3692 riceParamPart0 |= (zeroCountPart0 << riceParam);
3693 riceParamPart1 |= (zeroCountPart1 << riceParam);
3694 riceParamPart2 |= (zeroCountPart2 << riceParam);
3695 riceParamPart3 |= (zeroCountPart3 << riceParam);
3697 riceParamPart0 = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
3698 riceParamPart1 = (riceParamPart1 >> 1) ^ t[riceParamPart1 & 0x01];
3699 riceParamPart2 = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01];
3700 riceParamPart3 = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01];
3710 while (pSamplesOut < pSamplesOutEnd) {
3718 riceParamPart0 &= riceParamMask;
3719 riceParamPart1 &= riceParamMask;
3720 riceParamPart2 &= riceParamMask;
3721 riceParamPart3 &= riceParamMask;
3723 riceParamPart0 |= (zeroCountPart0 << riceParam);
3724 riceParamPart1 |= (zeroCountPart1 << riceParam);
3725 riceParamPart2 |= (zeroCountPart2 << riceParam);
3726 riceParamPart3 |= (zeroCountPart3 << riceParam);
3728 riceParamPart0 = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
3729 riceParamPart1 = (riceParamPart1 >> 1) ^ t[riceParamPart1 & 0x01];
3730 riceParamPart2 = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01];
3731 riceParamPart3 = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01];
3750 riceParamPart0 &= riceParamMask;
3751 riceParamPart0 |= (zeroCountPart0 << riceParam);
3752 riceParamPart0 = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
3756 if (bitsPerSample+shift > 32) {
3769 #if defined(DRFLAC_SUPPORT_SSE2)
3770 static DRFLAC_INLINE __m128i drflac__mm_packs_interleaved_epi32(__m128i a, __m128i b)
3775 r = _mm_packs_epi32(a, b);
3778 r = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 1, 2, 0));
3781 r = _mm_shufflehi_epi16(r, _MM_SHUFFLE(3, 1, 2, 0));
3782 r = _mm_shufflelo_epi16(r, _MM_SHUFFLE(3, 1, 2, 0));
3788 #if defined(DRFLAC_SUPPORT_SSE41)
3789 static DRFLAC_INLINE __m128i drflac__mm_not_si128(__m128i a)
3791 return _mm_xor_si128(a, _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128()));
3794 static DRFLAC_INLINE __m128i drflac__mm_hadd_epi32(__m128i x)
3796 __m128i x64 = _mm_add_epi32(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2)));
3797 __m128i x32 = _mm_shufflelo_epi16(x64, _MM_SHUFFLE(1, 0, 3, 2));
3798 return _mm_add_epi32(x64, x32);
3801 static DRFLAC_INLINE __m128i drflac__mm_hadd_epi64(__m128i x)
3803 return _mm_add_epi64(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2)));
3812 __m128i lo = _mm_srli_epi64(x,
count);
3813 __m128i hi = _mm_srai_epi32(x,
count);
3815 hi = _mm_and_si128(hi, _mm_set_epi32(0xFFFFFFFF, 0, 0xFFFFFFFF, 0));
3817 return _mm_or_si128(lo, hi);
3834 __m128i coefficients128_0;
3835 __m128i coefficients128_4;
3836 __m128i coefficients128_8;
3837 __m128i samples128_0;
3838 __m128i samples128_4;
3839 __m128i samples128_8;
3840 __m128i riceParamMask128;
3845 riceParamMask128 = _mm_set1_epi32(riceParamMask);
3848 coefficients128_0 = _mm_setzero_si128();
3849 coefficients128_4 = _mm_setzero_si128();
3850 coefficients128_8 = _mm_setzero_si128();
3852 samples128_0 = _mm_setzero_si128();
3853 samples128_4 = _mm_setzero_si128();
3854 samples128_8 = _mm_setzero_si128();
3864 int runningOrder = order;
3867 if (runningOrder >= 4) {
3868 coefficients128_0 = _mm_loadu_si128((
const __m128i*)(coefficients + 0));
3869 samples128_0 = _mm_loadu_si128((
const __m128i*)(pSamplesOut - 4));
3872 switch (runningOrder) {
3873 case 3: coefficients128_0 = _mm_set_epi32(0, coefficients[2], coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], pSamplesOut[-3], 0);
break;
3874 case 2: coefficients128_0 = _mm_set_epi32(0, 0, coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], 0, 0);
break;
3875 case 1: coefficients128_0 = _mm_set_epi32(0, 0, 0, coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], 0, 0, 0);
break;
3881 if (runningOrder >= 4) {
3882 coefficients128_4 = _mm_loadu_si128((
const __m128i*)(coefficients + 4));
3883 samples128_4 = _mm_loadu_si128((
const __m128i*)(pSamplesOut - 8));
3886 switch (runningOrder) {
3887 case 3: coefficients128_4 = _mm_set_epi32(0, coefficients[6], coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], pSamplesOut[-7], 0);
break;
3888 case 2: coefficients128_4 = _mm_set_epi32(0, 0, coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], 0, 0);
break;
3889 case 1: coefficients128_4 = _mm_set_epi32(0, 0, 0, coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], 0, 0, 0);
break;
3895 if (runningOrder == 4) {
3896 coefficients128_8 = _mm_loadu_si128((
const __m128i*)(coefficients + 8));
3897 samples128_8 = _mm_loadu_si128((
const __m128i*)(pSamplesOut - 12));
3900 switch (runningOrder) {
3901 case 3: coefficients128_8 = _mm_set_epi32(0, coefficients[10], coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], pSamplesOut[-11], 0);
break;
3902 case 2: coefficients128_8 = _mm_set_epi32(0, 0, coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], 0, 0);
break;
3903 case 1: coefficients128_8 = _mm_set_epi32(0, 0, 0, coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], 0, 0, 0);
break;
3909 coefficients128_0 = _mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(0, 1, 2, 3));
3910 coefficients128_4 = _mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(0, 1, 2, 3));
3911 coefficients128_8 = _mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(0, 1, 2, 3));
3917 case 12: ((
drflac_int32*)&coefficients128_8)[0] = coefficients[11]; ((
drflac_int32*)&samples128_8)[0] = pDecodedSamples[-12];
3918 case 11: ((
drflac_int32*)&coefficients128_8)[1] = coefficients[10]; ((
drflac_int32*)&samples128_8)[1] = pDecodedSamples[-11];
3919 case 10: ((
drflac_int32*)&coefficients128_8)[2] = coefficients[ 9]; ((
drflac_int32*)&samples128_8)[2] = pDecodedSamples[-10];
3920 case 9: ((
drflac_int32*)&coefficients128_8)[3] = coefficients[ 8]; ((
drflac_int32*)&samples128_8)[3] = pDecodedSamples[- 9];
3921 case 8: ((
drflac_int32*)&coefficients128_4)[0] = coefficients[ 7]; ((
drflac_int32*)&samples128_4)[0] = pDecodedSamples[- 8];
3922 case 7: ((
drflac_int32*)&coefficients128_4)[1] = coefficients[ 6]; ((
drflac_int32*)&samples128_4)[1] = pDecodedSamples[- 7];
3923 case 6: ((
drflac_int32*)&coefficients128_4)[2] = coefficients[ 5]; ((
drflac_int32*)&samples128_4)[2] = pDecodedSamples[- 6];
3924 case 5: ((
drflac_int32*)&coefficients128_4)[3] = coefficients[ 4]; ((
drflac_int32*)&samples128_4)[3] = pDecodedSamples[- 5];
3925 case 4: ((
drflac_int32*)&coefficients128_0)[0] = coefficients[ 3]; ((
drflac_int32*)&samples128_0)[0] = pDecodedSamples[- 4];
3926 case 3: ((
drflac_int32*)&coefficients128_0)[1] = coefficients[ 2]; ((
drflac_int32*)&samples128_0)[1] = pDecodedSamples[- 3];
3927 case 2: ((
drflac_int32*)&coefficients128_0)[2] = coefficients[ 1]; ((
drflac_int32*)&samples128_0)[2] = pDecodedSamples[- 2];
3928 case 1: ((
drflac_int32*)&coefficients128_0)[3] = coefficients[ 0]; ((
drflac_int32*)&samples128_0)[3] = pDecodedSamples[- 1];
3933 while (pDecodedSamples < pDecodedSamplesEnd) {
3934 __m128i prediction128;
3935 __m128i zeroCountPart128;
3936 __m128i riceParamPart128;
3945 zeroCountPart128 = _mm_set_epi32(zeroCountParts3, zeroCountParts2, zeroCountParts1, zeroCountParts0);
3946 riceParamPart128 = _mm_set_epi32(riceParamParts3, riceParamParts2, riceParamParts1, riceParamParts0);
3948 riceParamPart128 = _mm_and_si128(riceParamPart128, riceParamMask128);
3949 riceParamPart128 = _mm_or_si128(riceParamPart128, _mm_slli_epi32(zeroCountPart128, riceParam));
3950 riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_add_epi32(drflac__mm_not_si128(_mm_and_si128(riceParamPart128, _mm_set1_epi32(0x01))), _mm_set1_epi32(0x01)));
3954 for (i = 0; i < 4; i += 1) {
3955 prediction128 = _mm_mullo_epi32(coefficients128_0, samples128_0);
3958 prediction128 = drflac__mm_hadd_epi32(prediction128);
3959 prediction128 = _mm_srai_epi32(prediction128, shift);
3960 prediction128 = _mm_add_epi32(riceParamPart128, prediction128);
3962 samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4);
3963 riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4);
3965 }
else if (order <= 8) {
3966 for (i = 0; i < 4; i += 1) {
3967 prediction128 = _mm_mullo_epi32(coefficients128_4, samples128_4);
3968 prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_0, samples128_0));
3971 prediction128 = drflac__mm_hadd_epi32(prediction128);
3972 prediction128 = _mm_srai_epi32(prediction128, shift);
3973 prediction128 = _mm_add_epi32(riceParamPart128, prediction128);
3975 samples128_4 = _mm_alignr_epi8(samples128_0, samples128_4, 4);
3976 samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4);
3977 riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4);
3980 for (i = 0; i < 4; i += 1) {
3981 prediction128 = _mm_mullo_epi32(coefficients128_8, samples128_8);
3982 prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_4, samples128_4));
3983 prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_0, samples128_0));
3986 prediction128 = drflac__mm_hadd_epi32(prediction128);
3987 prediction128 = _mm_srai_epi32(prediction128, shift);
3988 prediction128 = _mm_add_epi32(riceParamPart128, prediction128);
3990 samples128_8 = _mm_alignr_epi8(samples128_4, samples128_8, 4);
3991 samples128_4 = _mm_alignr_epi8(samples128_0, samples128_4, 4);
3992 samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4);
3993 riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4);
3998 _mm_storeu_si128((__m128i*)pDecodedSamples, samples128_0);
3999 pDecodedSamples += 4;
4004 while (i < (
int)
count) {
4011 riceParamParts0 &= riceParamMask;
4012 riceParamParts0 |= (zeroCountParts0 << riceParam);
4013 riceParamParts0 = (riceParamParts0 >> 1) ^ t[riceParamParts0 & 0x01];
4019 pDecodedSamples += 1;
4039 __m128i coefficients128_0;
4040 __m128i coefficients128_4;
4041 __m128i coefficients128_8;
4042 __m128i samples128_0;
4043 __m128i samples128_4;
4044 __m128i samples128_8;
4045 __m128i prediction128;
4046 __m128i riceParamMask128;
4053 riceParamMask128 = _mm_set1_epi32(riceParamMask);
4055 prediction128 = _mm_setzero_si128();
4058 coefficients128_0 = _mm_setzero_si128();
4059 coefficients128_4 = _mm_setzero_si128();
4060 coefficients128_8 = _mm_setzero_si128();
4062 samples128_0 = _mm_setzero_si128();
4063 samples128_4 = _mm_setzero_si128();
4064 samples128_8 = _mm_setzero_si128();
4068 int runningOrder = order;
4071 if (runningOrder >= 4) {
4072 coefficients128_0 = _mm_loadu_si128((
const __m128i*)(coefficients + 0));
4073 samples128_0 = _mm_loadu_si128((
const __m128i*)(pSamplesOut - 4));
4076 switch (runningOrder) {
4077 case 3: coefficients128_0 = _mm_set_epi32(0, coefficients[2], coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], pSamplesOut[-3], 0);
break;
4078 case 2: coefficients128_0 = _mm_set_epi32(0, 0, coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], 0, 0);
break;
4079 case 1: coefficients128_0 = _mm_set_epi32(0, 0, 0, coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], 0, 0, 0);
break;
4085 if (runningOrder >= 4) {
4086 coefficients128_4 = _mm_loadu_si128((
const __m128i*)(coefficients + 4));
4087 samples128_4 = _mm_loadu_si128((
const __m128i*)(pSamplesOut - 8));
4090 switch (runningOrder) {
4091 case 3: coefficients128_4 = _mm_set_epi32(0, coefficients[6], coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], pSamplesOut[-7], 0);
break;
4092 case 2: coefficients128_4 = _mm_set_epi32(0, 0, coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], 0, 0);
break;
4093 case 1: coefficients128_4 = _mm_set_epi32(0, 0, 0, coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], 0, 0, 0);
break;
4099 if (runningOrder == 4) {
4100 coefficients128_8 = _mm_loadu_si128((
const __m128i*)(coefficients + 8));
4101 samples128_8 = _mm_loadu_si128((
const __m128i*)(pSamplesOut - 12));
4104 switch (runningOrder) {
4105 case 3: coefficients128_8 = _mm_set_epi32(0, coefficients[10], coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], pSamplesOut[-11], 0);
break;
4106 case 2: coefficients128_8 = _mm_set_epi32(0, 0, coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], 0, 0);
break;
4107 case 1: coefficients128_8 = _mm_set_epi32(0, 0, 0, coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], 0, 0, 0);
break;
4113 coefficients128_0 = _mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(0, 1, 2, 3));
4114 coefficients128_4 = _mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(0, 1, 2, 3));
4115 coefficients128_8 = _mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(0, 1, 2, 3));
4120 case 12: ((
drflac_int32*)&coefficients128_8)[0] = coefficients[11]; ((
drflac_int32*)&samples128_8)[0] = pDecodedSamples[-12];
4121 case 11: ((
drflac_int32*)&coefficients128_8)[1] = coefficients[10]; ((
drflac_int32*)&samples128_8)[1] = pDecodedSamples[-11];
4122 case 10: ((
drflac_int32*)&coefficients128_8)[2] = coefficients[ 9]; ((
drflac_int32*)&samples128_8)[2] = pDecodedSamples[-10];
4123 case 9: ((
drflac_int32*)&coefficients128_8)[3] = coefficients[ 8]; ((
drflac_int32*)&samples128_8)[3] = pDecodedSamples[- 9];
4124 case 8: ((
drflac_int32*)&coefficients128_4)[0] = coefficients[ 7]; ((
drflac_int32*)&samples128_4)[0] = pDecodedSamples[- 8];
4125 case 7: ((
drflac_int32*)&coefficients128_4)[1] = coefficients[ 6]; ((
drflac_int32*)&samples128_4)[1] = pDecodedSamples[- 7];
4126 case 6: ((
drflac_int32*)&coefficients128_4)[2] = coefficients[ 5]; ((
drflac_int32*)&samples128_4)[2] = pDecodedSamples[- 6];
4127 case 5: ((
drflac_int32*)&coefficients128_4)[3] = coefficients[ 4]; ((
drflac_int32*)&samples128_4)[3] = pDecodedSamples[- 5];
4128 case 4: ((
drflac_int32*)&coefficients128_0)[0] = coefficients[ 3]; ((
drflac_int32*)&samples128_0)[0] = pDecodedSamples[- 4];
4129 case 3: ((
drflac_int32*)&coefficients128_0)[1] = coefficients[ 2]; ((
drflac_int32*)&samples128_0)[1] = pDecodedSamples[- 3];
4130 case 2: ((
drflac_int32*)&coefficients128_0)[2] = coefficients[ 1]; ((
drflac_int32*)&samples128_0)[2] = pDecodedSamples[- 2];
4131 case 1: ((
drflac_int32*)&coefficients128_0)[3] = coefficients[ 0]; ((
drflac_int32*)&samples128_0)[3] = pDecodedSamples[- 1];
4136 while (pDecodedSamples < pDecodedSamplesEnd) {
4137 __m128i zeroCountPart128;
4138 __m128i riceParamPart128;
4147 zeroCountPart128 = _mm_set_epi32(zeroCountParts3, zeroCountParts2, zeroCountParts1, zeroCountParts0);
4148 riceParamPart128 = _mm_set_epi32(riceParamParts3, riceParamParts2, riceParamParts1, riceParamParts0);
4150 riceParamPart128 = _mm_and_si128(riceParamPart128, riceParamMask128);
4151 riceParamPart128 = _mm_or_si128(riceParamPart128, _mm_slli_epi32(zeroCountPart128, riceParam));
4152 riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_add_epi32(drflac__mm_not_si128(_mm_and_si128(riceParamPart128, _mm_set1_epi32(1))), _mm_set1_epi32(1)));
4154 for (i = 0; i < 4; i += 1) {
4155 prediction128 = _mm_xor_si128(prediction128, prediction128);
4160 case 11: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_8, _MM_SHUFFLE(1, 1, 0, 0))));
4162 case 9: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_8, _MM_SHUFFLE(3, 3, 2, 2))));
4164 case 7: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_4, _MM_SHUFFLE(1, 1, 0, 0))));
4166 case 5: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_4, _MM_SHUFFLE(3, 3, 2, 2))));
4168 case 3: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_0, _MM_SHUFFLE(1, 1, 0, 0))));
4170 case 1: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_0, _MM_SHUFFLE(3, 3, 2, 2))));
4174 prediction128 = drflac__mm_hadd_epi64(prediction128);
4175 prediction128 = drflac__mm_srai_epi64(prediction128, shift);
4176 prediction128 = _mm_add_epi32(riceParamPart128, prediction128);
4179 samples128_8 = _mm_alignr_epi8(samples128_4, samples128_8, 4);
4180 samples128_4 = _mm_alignr_epi8(samples128_0, samples128_4, 4);
4181 samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4);
4184 riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4);
4188 _mm_storeu_si128((__m128i*)pDecodedSamples, samples128_0);
4189 pDecodedSamples += 4;
4194 while (i < (
int)
count) {
4201 riceParamParts0 &= riceParamMask;
4202 riceParamParts0 |= (zeroCountParts0 << riceParam);
4203 riceParamParts0 = (riceParamParts0 >> 1) ^ t[riceParamParts0 & 0x01];
4209 pDecodedSamples += 1;
4221 if (order > 0 && order <= 12) {
4222 if (bitsPerSample+shift > 32) {
4223 return drflac__decode_samples_with_residual__rice__sse41_64(bs,
count, riceParam, order, shift, coefficients, pSamplesOut);
4225 return drflac__decode_samples_with_residual__rice__sse41_32(bs,
count, riceParam, order, shift, coefficients, pSamplesOut);
4233 #if defined(DRFLAC_SUPPORT_NEON)
4236 vst1q_s32(p+0, x.val[0]);
4237 vst1q_s32(p+4, x.val[1]);
4242 vst1q_u32(p+0, x.val[0]);
4243 vst1q_u32(p+4, x.val[1]);
4246 static DRFLAC_INLINE void drflac__vst2q_f32(
float* p, float32x4x2_t x)
4248 vst1q_f32(p+0, x.val[0]);
4249 vst1q_f32(p+4, x.val[1]);
4254 vst1q_s16(p, vcombine_s16(x.val[0], x.val[1]));
4259 vst1q_u16(p, vcombine_u16(x.val[0], x.val[1]));
4269 return vld1q_s32(x);
4272 static DRFLAC_INLINE int32x4_t drflac__valignrq_s32_1(int32x4_t a, int32x4_t b)
4284 return vextq_s32(b, a, 1);
4287 static DRFLAC_INLINE uint32x4_t drflac__valignrq_u32_1(uint32x4_t a, uint32x4_t b)
4299 return vextq_u32(b, a, 1);
4302 static DRFLAC_INLINE int32x2_t drflac__vhaddq_s32(int32x4_t x)
4314 int32x2_t r = vadd_s32(vget_high_s32(x), vget_low_s32(x));
4315 return vpadd_s32(r, r);
4318 static DRFLAC_INLINE int64x1_t drflac__vhaddq_s64(int64x2_t x)
4320 return vadd_s64(vget_high_s64(x), vget_low_s64(x));
4323 static DRFLAC_INLINE int32x4_t drflac__vrevq_s32(int32x4_t x)
4333 return vrev64q_s32(vcombine_s32(vget_high_s32(x), vget_low_s32(x)));
4336 static DRFLAC_INLINE int32x4_t drflac__vnotq_s32(int32x4_t x)
4338 return veorq_s32(x, vdupq_n_s32(0xFFFFFFFF));
4341 static DRFLAC_INLINE uint32x4_t drflac__vnotq_u32(uint32x4_t x)
4343 return veorq_u32(x, vdupq_n_u32(0xFFFFFFFF));
4354 int32x4_t coefficients128_0;
4355 int32x4_t coefficients128_4;
4356 int32x4_t coefficients128_8;
4357 int32x4_t samples128_0;
4358 int32x4_t samples128_4;
4359 int32x4_t samples128_8;
4360 uint32x4_t riceParamMask128;
4361 int32x4_t riceParam128;
4367 riceParamMask = ~((~0UL) << riceParam);
4368 riceParamMask128 = vdupq_n_u32(riceParamMask);
4370 riceParam128 = vdupq_n_s32(riceParam);
4371 shift64 = vdup_n_s32(-shift);
4372 one128 = vdupq_n_u32(1);
4381 int runningOrder = order;
4386 if (runningOrder >= 4) {
4387 coefficients128_0 = vld1q_s32(coefficients + 0);
4388 samples128_0 = vld1q_s32(pSamplesOut - 4);
4391 switch (runningOrder) {
4392 case 3: tempC[2] = coefficients[2]; tempS[1] = pSamplesOut[-3];
4393 case 2: tempC[1] = coefficients[1]; tempS[2] = pSamplesOut[-2];
4394 case 1: tempC[0] = coefficients[0]; tempS[3] = pSamplesOut[-1];
4397 coefficients128_0 = vld1q_s32(tempC);
4398 samples128_0 = vld1q_s32(tempS);
4403 if (runningOrder >= 4) {
4404 coefficients128_4 = vld1q_s32(coefficients + 4);
4405 samples128_4 = vld1q_s32(pSamplesOut - 8);
4408 switch (runningOrder) {
4409 case 3: tempC[2] = coefficients[6]; tempS[1] = pSamplesOut[-7];
4410 case 2: tempC[1] = coefficients[5]; tempS[2] = pSamplesOut[-6];
4411 case 1: tempC[0] = coefficients[4]; tempS[3] = pSamplesOut[-5];
4414 coefficients128_4 = vld1q_s32(tempC);
4415 samples128_4 = vld1q_s32(tempS);
4420 if (runningOrder == 4) {
4421 coefficients128_8 = vld1q_s32(coefficients + 8);
4422 samples128_8 = vld1q_s32(pSamplesOut - 12);
4425 switch (runningOrder) {
4426 case 3: tempC[2] = coefficients[10]; tempS[1] = pSamplesOut[-11];
4427 case 2: tempC[1] = coefficients[ 9]; tempS[2] = pSamplesOut[-10];
4428 case 1: tempC[0] = coefficients[ 8]; tempS[3] = pSamplesOut[- 9];
4431 coefficients128_8 = vld1q_s32(tempC);
4432 samples128_8 = vld1q_s32(tempS);
4437 coefficients128_0 = drflac__vrevq_s32(coefficients128_0);
4438 coefficients128_4 = drflac__vrevq_s32(coefficients128_4);
4439 coefficients128_8 = drflac__vrevq_s32(coefficients128_8);
4443 while (pDecodedSamples < pDecodedSamplesEnd) {
4444 int32x4_t prediction128;
4445 int32x2_t prediction64;
4446 uint32x4_t zeroCountPart128;
4447 uint32x4_t riceParamPart128;
4456 zeroCountPart128 = vld1q_u32(zeroCountParts);
4457 riceParamPart128 = vld1q_u32(riceParamParts);
4459 riceParamPart128 = vandq_u32(riceParamPart128, riceParamMask128);
4460 riceParamPart128 = vorrq_u32(riceParamPart128, vshlq_u32(zeroCountPart128, riceParam128));
4461 riceParamPart128 = veorq_u32(vshrq_n_u32(riceParamPart128, 1), vaddq_u32(drflac__vnotq_u32(vandq_u32(riceParamPart128, one128)), one128));
4464 for (i = 0; i < 4; i += 1) {
4465 prediction128 = vmulq_s32(coefficients128_0, samples128_0);
4468 prediction64 = drflac__vhaddq_s32(prediction128);
4469 prediction64 = vshl_s32(prediction64, shift64);
4470 prediction64 = vadd_s32(prediction64, vget_low_s32(vreinterpretq_s32_u32(riceParamPart128)));
4472 samples128_0 = drflac__valignrq_s32_1(vcombine_s32(prediction64, vdup_n_s32(0)), samples128_0);
4473 riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128);
4475 }
else if (order <= 8) {
4476 for (i = 0; i < 4; i += 1) {
4477 prediction128 = vmulq_s32(coefficients128_4, samples128_4);
4478 prediction128 = vmlaq_s32(prediction128, coefficients128_0, samples128_0);
4481 prediction64 = drflac__vhaddq_s32(prediction128);
4482 prediction64 = vshl_s32(prediction64, shift64);
4483 prediction64 = vadd_s32(prediction64, vget_low_s32(vreinterpretq_s32_u32(riceParamPart128)));
4485 samples128_4 = drflac__valignrq_s32_1(samples128_0, samples128_4);
4486 samples128_0 = drflac__valignrq_s32_1(vcombine_s32(prediction64, vdup_n_s32(0)), samples128_0);
4487 riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128);
4490 for (i = 0; i < 4; i += 1) {
4491 prediction128 = vmulq_s32(coefficients128_8, samples128_8);
4492 prediction128 = vmlaq_s32(prediction128, coefficients128_4, samples128_4);
4493 prediction128 = vmlaq_s32(prediction128, coefficients128_0, samples128_0);
4496 prediction64 = drflac__vhaddq_s32(prediction128);
4497 prediction64 = vshl_s32(prediction64, shift64);
4498 prediction64 = vadd_s32(prediction64, vget_low_s32(vreinterpretq_s32_u32(riceParamPart128)));
4500 samples128_8 = drflac__valignrq_s32_1(samples128_4, samples128_8);
4501 samples128_4 = drflac__valignrq_s32_1(samples128_0, samples128_4);
4502 samples128_0 = drflac__valignrq_s32_1(vcombine_s32(prediction64, vdup_n_s32(0)), samples128_0);
4503 riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128);
4508 vst1q_s32(pDecodedSamples, samples128_0);
4509 pDecodedSamples += 4;
4514 while (i < (
int)
count) {
4521 riceParamParts[0] &= riceParamMask;
4522 riceParamParts[0] |= (zeroCountParts[0] << riceParam);
4523 riceParamParts[0] = (riceParamParts[0] >> 1) ^ t[riceParamParts[0] & 0x01];
4529 pDecodedSamples += 1;
4543 int32x4_t coefficients128_0;
4544 int32x4_t coefficients128_4;
4545 int32x4_t coefficients128_8;
4546 int32x4_t samples128_0;
4547 int32x4_t samples128_4;
4548 int32x4_t samples128_8;
4549 uint32x4_t riceParamMask128;
4550 int32x4_t riceParam128;
4556 riceParamMask = ~((~0UL) << riceParam);
4557 riceParamMask128 = vdupq_n_u32(riceParamMask);
4559 riceParam128 = vdupq_n_s32(riceParam);
4560 shift64 = vdup_n_s64(-shift);
4561 one128 = vdupq_n_u32(1);
4570 int runningOrder = order;
4575 if (runningOrder >= 4) {
4576 coefficients128_0 = vld1q_s32(coefficients + 0);
4577 samples128_0 = vld1q_s32(pSamplesOut - 4);
4580 switch (runningOrder) {
4581 case 3: tempC[2] = coefficients[2]; tempS[1] = pSamplesOut[-3];
4582 case 2: tempC[1] = coefficients[1]; tempS[2] = pSamplesOut[-2];
4583 case 1: tempC[0] = coefficients[0]; tempS[3] = pSamplesOut[-1];
4586 coefficients128_0 = vld1q_s32(tempC);
4587 samples128_0 = vld1q_s32(tempS);
4592 if (runningOrder >= 4) {
4593 coefficients128_4 = vld1q_s32(coefficients + 4);
4594 samples128_4 = vld1q_s32(pSamplesOut - 8);
4597 switch (runningOrder) {
4598 case 3: tempC[2] = coefficients[6]; tempS[1] = pSamplesOut[-7];
4599 case 2: tempC[1] = coefficients[5]; tempS[2] = pSamplesOut[-6];
4600 case 1: tempC[0] = coefficients[4]; tempS[3] = pSamplesOut[-5];
4603 coefficients128_4 = vld1q_s32(tempC);
4604 samples128_4 = vld1q_s32(tempS);
4609 if (runningOrder == 4) {
4610 coefficients128_8 = vld1q_s32(coefficients + 8);
4611 samples128_8 = vld1q_s32(pSamplesOut - 12);
4614 switch (runningOrder) {
4615 case 3: tempC[2] = coefficients[10]; tempS[1] = pSamplesOut[-11];
4616 case 2: tempC[1] = coefficients[ 9]; tempS[2] = pSamplesOut[-10];
4617 case 1: tempC[0] = coefficients[ 8]; tempS[3] = pSamplesOut[- 9];
4620 coefficients128_8 = vld1q_s32(tempC);
4621 samples128_8 = vld1q_s32(tempS);
4626 coefficients128_0 = drflac__vrevq_s32(coefficients128_0);
4627 coefficients128_4 = drflac__vrevq_s32(coefficients128_4);
4628 coefficients128_8 = drflac__vrevq_s32(coefficients128_8);
4632 while (pDecodedSamples < pDecodedSamplesEnd) {
4633 int64x2_t prediction128;
4634 uint32x4_t zeroCountPart128;
4635 uint32x4_t riceParamPart128;
4644 zeroCountPart128 = vld1q_u32(zeroCountParts);
4645 riceParamPart128 = vld1q_u32(riceParamParts);
4647 riceParamPart128 = vandq_u32(riceParamPart128, riceParamMask128);
4648 riceParamPart128 = vorrq_u32(riceParamPart128, vshlq_u32(zeroCountPart128, riceParam128));
4649 riceParamPart128 = veorq_u32(vshrq_n_u32(riceParamPart128, 1), vaddq_u32(drflac__vnotq_u32(vandq_u32(riceParamPart128, one128)), one128));
4651 for (i = 0; i < 4; i += 1) {
4652 int64x1_t prediction64;
4654 prediction128 = veorq_s64(prediction128, prediction128);
4658 case 11: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_low_s32(coefficients128_8), vget_low_s32(samples128_8)));
4660 case 9: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_high_s32(coefficients128_8), vget_high_s32(samples128_8)));
4662 case 7: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_low_s32(coefficients128_4), vget_low_s32(samples128_4)));
4664 case 5: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_high_s32(coefficients128_4), vget_high_s32(samples128_4)));
4666 case 3: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_low_s32(coefficients128_0), vget_low_s32(samples128_0)));
4668 case 1: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_high_s32(coefficients128_0), vget_high_s32(samples128_0)));
4672 prediction64 = drflac__vhaddq_s64(prediction128);
4673 prediction64 = vshl_s64(prediction64, shift64);
4674 prediction64 = vadd_s64(prediction64, vdup_n_s64(vgetq_lane_u32(riceParamPart128, 0)));
4677 samples128_8 = drflac__valignrq_s32_1(samples128_4, samples128_8);
4678 samples128_4 = drflac__valignrq_s32_1(samples128_0, samples128_4);
4679 samples128_0 = drflac__valignrq_s32_1(vcombine_s32(vreinterpret_s32_s64(prediction64), vdup_n_s32(0)), samples128_0);
4682 riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128);
4686 vst1q_s32(pDecodedSamples, samples128_0);
4687 pDecodedSamples += 4;
4692 while (i < (
int)
count) {
4699 riceParamParts[0] &= riceParamMask;
4700 riceParamParts[0] |= (zeroCountParts[0] << riceParam);
4701 riceParamParts[0] = (riceParamParts[0] >> 1) ^ t[riceParamParts[0] & 0x01];
4707 pDecodedSamples += 1;
4719 if (order > 0 && order <= 12) {
4720 if (bitsPerSample+shift > 32) {
4721 return drflac__decode_samples_with_residual__rice__neon_64(bs,
count, riceParam, order, shift, coefficients, pSamplesOut);
4723 return drflac__decode_samples_with_residual__rice__neon_32(bs,
count, riceParam, order, shift, coefficients, pSamplesOut);
4733 #if defined(DRFLAC_SUPPORT_SSE41)
4734 if (drflac__gIsSSE41Supported) {
4735 return drflac__decode_samples_with_residual__rice__sse41(bs, bitsPerSample,
count, riceParam, order, shift, coefficients, pSamplesOut);
4737 #elif defined(DRFLAC_SUPPORT_NEON)
4739 return drflac__decode_samples_with_residual__rice__neon(bs, bitsPerSample,
count, riceParam, order, shift, coefficients, pSamplesOut);
4745 return drflac__decode_samples_with_residual__rice__reference(bs, bitsPerSample,
count, riceParam, order, shift, coefficients, pSamplesOut);
4759 for (i = 0; i <
count; ++i) {
4776 for (i = 0; i <
count; ++i) {
4777 if (unencodedBitsPerSample > 0) {
4785 if (bitsPerSample >= 24) {
4821 pDecodedSamples += order;
4831 if (partitionOrder > 8) {
4836 if ((blockSize / (1 << partitionOrder)) < order) {
4840 samplesInPartition = (blockSize / (1 << partitionOrder)) - order;
4841 partitionsRemaining = (1 << partitionOrder);
4848 if (riceParam == 15) {
4855 if (riceParam == 31) {
4860 if (riceParam != 0xFF) {
4875 pDecodedSamples += samplesInPartition;
4877 if (partitionsRemaining == 1) {
4881 partitionsRemaining -= 1;
4883 if (partitionOrder != 0) {
4884 samplesInPartition = blockSize / (1 << partitionOrder);
4922 if (partitionOrder > 8) {
4927 if ((blockSize / (1 << partitionOrder)) <= order) {
4931 samplesInPartition = (blockSize / (1 << partitionOrder)) - order;
4932 partitionsRemaining = (1 << partitionOrder);
4940 if (riceParam == 15) {
4947 if (riceParam == 31) {
4952 if (riceParam != 0xFF) {
4968 if (partitionsRemaining == 1) {
4972 partitionsRemaining -= 1;
4973 samplesInPartition = blockSize / (1 << partitionOrder);
4994 for (i = 0; i < blockSize; ++i) {
4995 pDecodedSamples[i] = sample;
5005 for (i = 0; i < blockSize; ++i) {
5011 pDecodedSamples[i] = sample;
5030 for (i = 0; i < lpcOrder; ++i) {
5036 pDecodedSamples[i] = sample;
5054 for (i = 0; i < lpcOrder; ++i) {
5060 pDecodedSamples[i] = sample;
5066 if (lpcPrecision == 15) {
5088 for (i = 0; i < lpcOrder; ++i) {
5104 const drflac_uint32 sampleRateTable[12] = {0, 88200, 176400, 192000, 8000, 16000, 22050, 24000, 32000, 44100, 48000, 96000};
5128 if (reserved == 1) {
5141 if (blockSize == 0) {
5154 if (channelAssignment > 10) {
5162 if (bitsPerSample == 3 || bitsPerSample == 7) {
5171 if (reserved == 1) {
5177 isVariableBlockSize = blockingStrategy == 1;
5178 if (isVariableBlockSize) {
5188 header->flacFrameNumber = 0;
5189 header->pcmFrameNumber = pcmFrameNumber;
5201 header->pcmFrameNumber = 0;
5206 if (blockSize == 1) {
5207 header->blockSizeInPCMFrames = 192;
5208 }
else if (blockSize <= 5) {
5210 header->blockSizeInPCMFrames = 576 * (1 << (blockSize - 2));
5211 }
else if (blockSize == 6) {
5216 header->blockSizeInPCMFrames += 1;
5217 }
else if (blockSize == 7) {
5222 header->blockSizeInPCMFrames += 1;
5225 header->blockSizeInPCMFrames = 256 * (1 << (blockSize - 8));
5229 if (sampleRate <= 11) {
5230 header->sampleRate = sampleRateTable[sampleRate];
5231 }
else if (sampleRate == 12) {
5236 header->sampleRate *= 1000;
5237 }
else if (sampleRate == 13) {
5242 }
else if (sampleRate == 14) {
5247 header->sampleRate *= 10;
5253 header->channelAssignment = channelAssignment;
5255 header->bitsPerSample = bitsPerSampleTable[bitsPerSample];
5256 if (
header->bitsPerSample == 0) {
5257 header->bitsPerSample = streaminfoBitsPerSample;
5264 #ifndef DR_FLAC_NO_CRC
5265 if (
header->crc8 != crc8) {
5283 if ((header & 0x80) != 0) {
5287 type = (
header & 0x7E) >> 1;
5290 }
else if (type == 1) {
5293 if ((type & 0x20) != 0) {
5296 }
else if ((type & 0x08) != 0) {
5314 if ((header & 0x01) == 1) {
5315 unsigned int wastedBitsPerSample;
5333 pSubframe = frame->
subframes + subframeIndex;
5341 subframeBitsPerSample += 1;
5343 subframeBitsPerSample += 1;
5390 pSubframe = frame->
subframes + subframeIndex;
5398 subframeBitsPerSample += 1;
5400 subframeBitsPerSample += 1;
5430 unsigned int bitsToSeek = pSubframe->
lpcOrder * subframeBitsPerSample;
5444 unsigned int bitsToSeek = pSubframe->
lpcOrder * subframeBitsPerSample;
5452 if (lpcPrecision == 15) {
5458 bitsToSeek = (pSubframe->
lpcOrder * lpcPrecision) + 5;
5477 drflac_uint8 lookup[] = {1, 2, 3, 4, 5, 6, 7, 8, 2, 2, 2};
5480 return lookup[channelAssignment];
5489 #ifndef DR_FLAC_NO_CRC
5503 if (channelCount != (
int)pFlac->
channels) {
5507 for (i = 0; i < channelCount; ++i) {
5514 if (paddingSizeInBits > 0) {
5521 #ifndef DR_FLAC_NO_CRC
5528 #ifndef DR_FLAC_NO_CRC
5529 if (actualCRC16 != desiredCRC16) {
5544 #ifndef DR_FLAC_NO_CRC
5549 for (i = 0; i < channelCount; ++i) {
5561 #ifndef DR_FLAC_NO_CRC
5568 #ifndef DR_FLAC_NO_CRC
5569 if (actualCRC16 != desiredCRC16) {
5609 if (firstPCMFrame == 0) {
5614 if (lastPCMFrame > 0) {
5618 if (pFirstPCMFrame) {
5619 *pFirstPCMFrame = firstPCMFrame;
5621 if (pLastPCMFrame) {
5622 *pLastPCMFrame = lastPCMFrame;
5651 while (pcmFramesToSeek > 0) {
5658 pcmFramesRead += pcmFramesToSeek;
5660 pcmFramesToSeek = 0;
5670 return pcmFramesRead;
5696 runningPCMFrameCount = 0;
5720 pcmFrameCountInThisFLACFrame = (lastPCMFrameInFLACFrame - firstPCMFrameInFLACFrame) + 1;
5721 if (pcmFrameIndex < (runningPCMFrameCount + pcmFrameCountInThisFLACFrame)) {
5726 drflac_uint64 pcmFramesToDecode = pcmFrameIndex - runningPCMFrameCount;
5735 goto next_iteration;
5752 runningPCMFrameCount += pcmFrameCountInThisFLACFrame;
5755 goto next_iteration;
5785 #if !defined(DR_FLAC_NO_CRC)
5791 #define DRFLAC_BINARY_SEARCH_APPROX_COMPRESSION_RATIO 0.6f
5809 if (targetByte == 0) {
5815 targetByte = rangeLo + ((rangeHi - rangeLo)/2);
5816 rangeHi = targetByte;
5831 targetByte = rangeLo + ((rangeHi - rangeLo)/2);
5832 rangeHi = targetByte;
5839 targetByte = rangeLo + ((rangeHi - rangeLo)/2);
5840 rangeHi = targetByte;
5848 if(targetByte == lastTargetByte) {
5858 *pLastSuccessfulSeekOffset = targetByte;
5886 drflac_uint64 closestSeekOffsetBeforeTargetPCMFrame = byteRangeLo;
5890 if (targetByte > byteRangeHi) {
5891 targetByte = byteRangeHi;
5902 if (pcmRangeLo == newPCMRangeLo) {
5914 pcmRangeLo = newPCMRangeLo;
5915 pcmRangeHi = newPCMRangeHi;
5917 if (pcmRangeLo <= pcmFrameIndex && pcmRangeHi >= pcmFrameIndex) {
5927 if (pcmRangeLo > pcmFrameIndex) {
5929 byteRangeHi = lastSuccessfulSeekOffset;
5930 if (byteRangeLo > byteRangeHi) {
5931 byteRangeLo = byteRangeHi;
5934 targetByte = byteRangeLo + ((byteRangeHi - byteRangeLo) / 2);
5935 if (targetByte < byteRangeLo) {
5936 targetByte = byteRangeLo;
5942 if ((pcmFrameIndex - pcmRangeLo) < seekForwardThreshold) {
5949 byteRangeLo = lastSuccessfulSeekOffset;
5950 if (byteRangeHi < byteRangeLo) {
5951 byteRangeHi = byteRangeLo;
5955 if (targetByte > byteRangeHi) {
5956 targetByte = byteRangeHi;
5959 if (closestSeekOffsetBeforeTargetPCMFrame < lastSuccessfulSeekOffset) {
5960 closestSeekOffsetBeforeTargetPCMFrame = lastSuccessfulSeekOffset;
5987 if (pcmFrameIndex < seekForwardThreshold) {
6016 for (iSeekpoint = 0; iSeekpoint < pFlac->
seekpointCount; ++iSeekpoint) {
6021 iClosestSeekpoint = iSeekpoint;
6032 #if !defined(DR_FLAC_NO_CRC)
6048 if (iClosestSeekpoint < pFlac->seekpointCount-1) {
6112 pcmFrameCountInThisFLACFrame = (lastPCMFrameInFLACFrame - firstPCMFrameInFLACFrame) + 1;
6113 if (pcmFrameIndex < (runningPCMFrameCount + pcmFrameCountInThisFLACFrame)) {
6118 drflac_uint64 pcmFramesToDecode = pcmFrameIndex - runningPCMFrameCount;
6127 goto next_iteration;
6144 runningPCMFrameCount += pcmFrameCountInThisFLACFrame;
6147 goto next_iteration;
6177 #ifndef DR_FLAC_NO_OGG
6211 #ifndef DR_FLAC_NO_OGG
6221 *isLastBlock = (
drflac_uint8)((blockHeader & 0x80000000UL) >> 31);
6222 *blockType = (
drflac_uint8)((blockHeader & 0x7F000000UL) >> 24);
6223 *blockSize = (blockHeader & 0x00FFFFFFUL);
6231 if (onRead(pUserData, &blockHeader, 4) != 4) {
6247 if (onRead(pUserData, &blockSizes, 4) != 4) {
6252 if (onRead(pUserData, &frameSizes, 6) != 6) {
6257 if (onRead(pUserData, &importantProps, 8) != 8) {
6262 if (onRead(pUserData, md5,
sizeof(md5)) !=
sizeof(md5)) {
6305 if (pAllocationCallbacks ==
NULL) {
6323 if (pAllocationCallbacks ==
NULL) {
6353 if (p ==
NULL || pAllocationCallbacks ==
NULL) {
6381 runningFilePos += 4;
6383 metadata.
type = blockType;
6391 if (blockSize < 4) {
6397 if (pRawData ==
NULL) {
6401 if (onRead(pUserData, pRawData, blockSize) != blockSize) {
6411 onMeta(pUserDataMD, &metadata);
6419 seektablePos = runningFilePos;
6420 seektableSize = blockSize;
6427 if (pRawData ==
NULL) {
6431 if (onRead(pUserData, pRawData, blockSize) != blockSize) {
6442 for (iSeekpoint = 0; iSeekpoint < metadata.
data.
seektable.seekpointCount; ++iSeekpoint) {
6449 onMeta(pUserDataMD, &metadata);
6457 if (blockSize < 8) {
6463 const char* pRunningData;
6464 const char* pRunningDataEnd;
6468 if (pRawData ==
NULL) {
6472 if (onRead(pUserData, pRawData, blockSize) != blockSize) {
6480 pRunningData = (
const char*)pRawData;
6481 pRunningDataEnd = (
const char*)pRawData + blockSize;
6504 if (pRunningDataEnd - pRunningData < 4) {
6510 if (pRunningDataEnd - pRunningData < (
drflac_int64)commentLength) {
6514 pRunningData += commentLength;
6517 onMeta(pUserDataMD, &metadata);
6525 if (blockSize < 396) {
6531 const char* pRunningData;
6532 const char* pRunningDataEnd;
6537 if (pRawData ==
NULL) {
6541 if (onRead(pUserData, pRawData, blockSize) != blockSize) {
6549 pRunningData = (
const char*)pRawData;
6550 pRunningDataEnd = (
const char*)pRawData + blockSize;
6554 metadata.
data.
cuesheet.isCD = (pRunningData[0] & 0x80) != 0; pRunningData += 259;
6555 metadata.
data.
cuesheet.trackCount = pRunningData[0]; pRunningData += 1;
6559 for (iTrack = 0; iTrack < metadata.
data.
cuesheet.trackCount; ++iTrack) {
6563 if (pRunningDataEnd - pRunningData < 36) {
6570 indexCount = pRunningData[0]; pRunningData += 1;
6572 if (pRunningDataEnd - pRunningData < (
drflac_int64)indexPointSize) {
6578 for (iIndex = 0; iIndex < indexCount; ++iIndex) {
6585 onMeta(pUserDataMD, &metadata);
6593 if (blockSize < 32) {
6599 const char* pRunningData;
6600 const char* pRunningDataEnd;
6603 if (pRawData ==
NULL) {
6607 if (onRead(pUserData, pRawData, blockSize) != blockSize) {
6615 pRunningData = (
const char*)pRawData;
6616 pRunningDataEnd = (
const char*)pRawData + blockSize;
6634 metadata.
data.
picture.description = pRunningData; pRunningData += metadata.
data.
picture.descriptionLength;
6648 onMeta(pUserDataMD, &metadata);
6663 onMeta(pUserDataMD, &metadata);
6686 if (pRawData ==
NULL) {
6690 if (onRead(pUserData, pRawData, blockSize) != blockSize) {
6697 onMeta(pUserDataMD, &metadata);
6705 if (onMeta ==
NULL && blockSize > 0) {
6711 runningFilePos += blockSize;
6717 *pSeektablePos = seektablePos;
6718 *pSeektableSize = seektableSize;
6719 *pFirstFramePos = runningFilePos;
6787 onMeta(pUserDataMD, &metadata);
6794 #ifndef DR_FLAC_NO_OGG
6795 #define DRFLAC_OGG_MAX_PAGE_SIZE 65307
6796 #define DRFLAC_OGG_CAPTURE_PATTERN_CRC32 1605413199
6804 #ifndef DR_FLAC_NO_CRC
6806 0x00000000
L, 0x04C11DB7
L, 0x09823B6E
L, 0x0D4326D9
L,
6807 0x130476DC
L, 0x17C56B6B
L, 0x1A864DB2
L, 0x1E475005
L,
6808 0x2608EDB8
L, 0x22C9F00F
L, 0x2F8AD6D6
L, 0x2B4BCB61
L,
6809 0x350C9B64
L, 0x31CD86D3
L, 0x3C8EA00A
L, 0x384FBDBD
L,
6810 0x4C11DB70
L, 0x48D0C6C7
L, 0x4593E01E
L, 0x4152FDA9
L,
6811 0x5F15ADAC
L, 0x5BD4B01B
L, 0x569796C2
L, 0x52568B75
L,
6812 0x6A1936C8
L, 0x6ED82B7F
L, 0x639B0DA6
L, 0x675A1011
L,
6813 0x791D4014
L, 0x7DDC5DA3
L, 0x709F7B7A
L, 0x745E66CD
L,
6814 0x9823B6E0
L, 0x9CE2AB57
L, 0x91A18D8E
L, 0x95609039
L,
6815 0x8B27C03C
L, 0x8FE6DD8B
L, 0x82A5FB52
L, 0x8664E6E5
L,
6816 0xBE2B5B58
L, 0xBAEA46EF
L, 0xB7A96036
L, 0xB3687D81
L,
6817 0xAD2F2D84
L, 0xA9EE3033
L, 0xA4AD16EA
L, 0xA06C0B5D
L,
6818 0xD4326D90
L, 0xD0F37027
L, 0xDDB056FE
L, 0xD9714B49
L,
6819 0xC7361B4C
L, 0xC3F706FB
L, 0xCEB42022
L, 0xCA753D95
L,
6820 0xF23A8028
L, 0xF6FB9D9F
L, 0xFBB8BB46
L, 0xFF79A6F1
L,
6821 0xE13EF6F4
L, 0xE5FFEB43
L, 0xE8BCCD9A
L, 0xEC7DD02D
L,
6822 0x34867077
L, 0x30476DC0
L, 0x3D044B19
L, 0x39C556AE
L,
6823 0x278206AB
L, 0x23431B1C
L, 0x2E003DC5
L, 0x2AC12072
L,
6824 0x128E9DCF
L, 0x164F8078
L, 0x1B0CA6A1
L, 0x1FCDBB16
L,
6825 0x018AEB13
L, 0x054BF6A4
L, 0x0808D07D
L, 0x0CC9CDCA
L,
6826 0x7897AB07
L, 0x7C56B6B0
L, 0x71159069
L, 0x75D48DDE
L,
6827 0x6B93DDDB
L, 0x6F52C06C
L, 0x6211E6B5
L, 0x66D0FB02
L,
6828 0x5E9F46BF
L, 0x5A5E5B08
L, 0x571D7DD1
L, 0x53DC6066
L,
6829 0x4D9B3063
L, 0x495A2DD4
L, 0x44190B0D
L, 0x40D816BA
L,
6830 0xACA5C697
L, 0xA864DB20
L, 0xA527FDF9
L, 0xA1E6E04E
L,
6831 0xBFA1B04B
L, 0xBB60ADFC
L, 0xB6238B25
L, 0xB2E29692
L,
6832 0x8AAD2B2F
L, 0x8E6C3698
L, 0x832F1041
L, 0x87EE0DF6
L,
6833 0x99A95DF3
L, 0x9D684044
L, 0x902B669D
L, 0x94EA7B2A
L,
6834 0xE0B41DE7
L, 0xE4750050
L, 0xE9362689
L, 0xEDF73B3E
L,
6835 0xF3B06B3B
L, 0xF771768C
L, 0xFA325055
L, 0xFEF34DE2
L,
6836 0xC6BCF05F
L, 0xC27DEDE8
L, 0xCF3ECB31
L, 0xCBFFD686
L,
6837 0xD5B88683
L, 0xD1799B34
L, 0xDC3ABDED
L, 0xD8FBA05A
L,
6838 0x690CE0EE
L, 0x6DCDFD59
L, 0x608EDB80
L, 0x644FC637
L,
6839 0x7A089632
L, 0x7EC98B85
L, 0x738AAD5C
L, 0x774BB0EB
L,
6840 0x4F040D56
L, 0x4BC510E1
L, 0x46863638
L, 0x42472B8F
L,
6841 0x5C007B8A
L, 0x58C1663D
L, 0x558240E4
L, 0x51435D53
L,
6842 0x251D3B9E
L, 0x21DC2629
L, 0x2C9F00F0
L, 0x285E1D47
L,
6843 0x36194D42
L, 0x32D850F5
L, 0x3F9B762C
L, 0x3B5A6B9B
L,
6844 0x0315D626
L, 0x07D4CB91
L, 0x0A97ED48
L, 0x0E56F0FF
L,
6845 0x1011A0FA
L, 0x14D0BD4D
L, 0x19939B94
L, 0x1D528623
L,
6846 0xF12F560E
L, 0xF5EE4BB9
L, 0xF8AD6D60
L, 0xFC6C70D7
L,
6847 0xE22B20D2
L, 0xE6EA3D65
L, 0xEBA91BBC
L, 0xEF68060B
L,
6848 0xD727BBB6
L, 0xD3E6A601
L, 0xDEA580D8
L, 0xDA649D6F
L,
6849 0xC423CD6A
L, 0xC0E2D0DD
L, 0xCDA1F604
L, 0xC960EBB3
L,
6850 0xBD3E8D7E
L, 0xB9FF90C9
L, 0xB4BCB610
L, 0xB07DABA7
L,
6851 0xAE3AFBA2
L, 0xAAFBE615
L, 0xA7B8C0CC
L, 0xA379DD7B
L,
6852 0x9B3660C6
L, 0x9FF77D71
L, 0x92B45BA8
L, 0x9675461F
L,
6853 0x8832161A
L, 0x8CF30BAD
L, 0x81B02D74
L, 0x857130C3
L,
6854 0x5D8A9099
L, 0x594B8D2E
L, 0x5408ABF7
L, 0x50C9B640
L,
6855 0x4E8EE645
L, 0x4A4FFBF2
L, 0x470CDD2B
L, 0x43CDC09C
L,
6856 0x7B827D21
L, 0x7F436096
L, 0x7200464F
L, 0x76C15BF8
L,
6857 0x68860BFD
L, 0x6C47164A
L, 0x61043093
L, 0x65C52D24
L,
6858 0x119B4BE9
L, 0x155A565E
L, 0x18197087
L, 0x1CD86D30
L,
6859 0x029F3D35
L, 0x065E2082
L, 0x0B1D065B
L, 0x0FDC1BEC
L,
6860 0x3793A651
L, 0x3352BBE6
L, 0x3E119D3F
L, 0x3AD08088
L,
6861 0x2497D08D
L, 0x2056CD3A
L, 0x2D15EBE3
L, 0x29D4F654
L,
6862 0xC5A92679
L, 0xC1683BCE
L, 0xCC2B1D17
L, 0xC8EA00A0
L,
6863 0xD6AD50A5
L, 0xD26C4D12
L, 0xDF2F6BCB
L, 0xDBEE767C
L,
6864 0xE3A1CBC1
L, 0xE760D676
L, 0xEA23F0AF
L, 0xEEE2ED18
L,
6865 0xF0A5BD1D
L, 0xF464A0AA
L, 0xF9278673
L, 0xFDE69BC4
L,
6866 0x89B8FD09
L, 0x8D79E0BE
L, 0x803AC667
L, 0x84FBDBD0
L,
6867 0x9ABC8BD5
L, 0x9E7D9662
L, 0x933EB0BB
L, 0x97FFAD0C
L,
6868 0xAFB010B1
L, 0xAB710D06
L, 0xA6322BDF
L, 0xA2F33668
L,
6869 0xBCB4666D
L, 0xB8757BDA
L, 0xB5365D03
L, 0xB1F740B4
L
6875 #ifndef DR_FLAC_NO_CRC
6895 crc32 = drflac_crc32_uint32(crc32, (
drflac_uint32)((data >> 32) & 0xFFFFFFFF));
6896 crc32 = drflac_crc32_uint32(crc32, (
drflac_uint32)((data >> 0) & 0xFFFFFFFF));
6905 for (i = 0; i < dataSize; ++i) {
6914 return pattern[0] ==
'O' && pattern[1] ==
'g' && pattern[2] ==
'g' && pattern[3] ==
'S';
6931 return pageBodySize;
6941 if (onRead(pUserData, data, 23) != 23) {
6970 for (i = 0; i < 23; ++i) {
6993 if (onRead(pUserData,
id, 4) != 4) {
7020 if (onRead(pUserData, &
id[3], 1) != 1) {
7053 size_t bytesActuallyRead = oggbs->
onRead(oggbs->
pUserData, bufferOut, bytesToRead);
7056 return bytesActuallyRead;
7062 if (offset <= 0x7FFFFFFF) {
7078 while (offset > 0x7FFFFFFF) {
7083 offset -= 0x7FFFFFFF;
7102 #ifndef DR_FLAC_NO_CRC
7131 #ifndef DR_FLAC_NO_CRC
7133 if (actualCRC32 !=
header.checksum) {
7147 (void)recoveryMethod;
7163 while (iByte < bytesConsumedInPage) {
7165 if (iByte + segmentSize > bytesConsumedInPage) {
7169 iByte += segmentSize;
7184 drflac_uint8 iFirstSeg = drflac_oggbs__get_current_segment_index(oggbs, &bytesRemainingInSeg);
7186 drflac_uint32 bytesToEndOfPacketOrPage = bytesRemainingInSeg;
7189 if (segmentSize < 255) {
7197 bytesToEndOfPacketOrPage += segmentSize;
7232 return drflac_oggbs__seek_to_next_packet(oggbs);
7240 size_t bytesRead = 0;
7246 while (bytesRead < bytesToRead) {
7247 size_t bytesRemainingToRead = bytesToRead - bytesRead;
7251 bytesRead += bytesRemainingToRead;
7276 int bytesSeeked = 0;
7296 while (bytesSeeked < offset) {
7297 int bytesRemainingToSeek = offset - bytesSeeked;
7301 bytesSeeked += bytesRemainingToSeek;
7342 runningGranulePosition = 0;
7361 firstBytesInPage[0] = oggbs->
pageData[0];
7362 firstBytesInPage[1] = oggbs->
pageData[1];
7364 if ((firstBytesInPage[0] == 0xFF) && (firstBytesInPage[1] & 0xFC) == 0xF8) {
7390 runningPCMFrameCount = runningGranulePosition;
7423 pcmFrameCountInThisFrame = (lastPCMFrameInFLACFrame - firstPCMFrameInFLACFrame) + 1;
7437 if (pcmFrameIndex < (runningPCMFrameCount + pcmFrameCountInThisFrame)) {
7445 drflac_uint64 pcmFramesToDecode = (size_t)(pcmFrameIndex - runningPCMFrameCount);
7446 if (pcmFramesToDecode == 0) {
7467 runningPCMFrameCount += pcmFrameCountInThisFrame;
7507 if ((
header.headerType & 0x02) == 0) {
7513 if (pageBodySize == 51) {
7518 if (onRead(pUserData, &packetType, 1) != 1) {
7522 bytesRemainingInPage -= 1;
7523 if (packetType == 0x7F) {
7526 if (onRead(pUserData, sig, 4) != 4) {
7530 bytesRemainingInPage -= 4;
7531 if (sig[0] ==
'F' && sig[1] ==
'L' && sig[2] ==
'A' && sig[3] ==
'C') {
7534 if (onRead(pUserData, mappingVersion, 2) != 2) {
7538 if (mappingVersion[0] != 1) {
7551 if (onRead(pUserData, sig, 4) != 4) {
7555 if (sig[0] ==
'f' && sig[1] ==
'L' && sig[2] ==
'a' && sig[3] ==
'C') {
7585 onMeta(pUserDataMD, &metadata);
7644 if (pInit ==
NULL || onRead ==
NULL || onSeek ==
NULL) {
7667 if (onRead(pUserData,
id, 4) != 4) {
7672 if (
id[0] ==
'I' &&
id[1] ==
'D' &&
id[2] ==
'3') {
7677 if (onRead(pUserData, header, 6) != 6) {
7699 if (
id[0] ==
'f' &&
id[1] ==
'L' &&
id[2] ==
'a' &&
id[3] ==
'C') {
7702 #ifndef DR_FLAC_NO_OGG
7703 if (
id[0] ==
'O' &&
id[1] ==
'g' &&
id[2] ==
'g' &&
id[3] ==
'S') {
7713 #ifndef DR_FLAC_NO_OGG
7730 pFlac->
bs = pInit->
bs;
7748 #ifndef DR_FLAC_NO_OGG
7764 if (pAllocationCallbacks !=
NULL) {
7765 allocationCallbacks = *pAllocationCallbacks;
7786 allocationSize =
sizeof(
drflac);
7800 allocationSize += decodedSamplesAllocationSize;
7803 #ifndef DR_FLAC_NO_OGG
7830 if (
init.hasMetadataBlocks) {
7833 void* pUserDataOverride = pUserData;
7835 #ifndef DR_FLAC_NO_OGG
7839 pUserDataOverride = (
void*)&oggbs;
7843 if (!
drflac__read_and_decode_metadata(onReadOverride, onSeekOverride, onMeta, pUserDataOverride, pUserDataMD, &firstFramePos, &seektablePos, &seektableSize, &allocationCallbacks)) {
7847 allocationSize += seektableSize;
7852 if (pFlac ==
NULL) {
7860 #ifndef DR_FLAC_NO_OGG
7863 *pInternalOggbs = oggbs;
7869 pFlac->
_oggbs = (
void*)pInternalOggbs;
7876 #ifndef DR_FLAC_NO_OGG
7886 if (seektablePos != 0) {
7898 for (iSeekpoint = 0; iSeekpoint < pFlac->
seekpointCount; ++iSeekpoint) {
7927 if (!
init.hasStreamInfoBlock) {
7953 #ifndef DR_FLAC_NO_STDIO
8234 #ifdef EPROTONOSUPPORT
8237 #ifdef ESOCKTNOSUPPORT
8252 #ifdef EADDRNOTAVAIL
8348 #ifdef ENOTRECOVERABLE
8363 #if defined(_MSC_VER) && _MSC_VER >= 1400
8367 if (ppFile !=
NULL) {
8371 if (pFilePath ==
NULL || pOpenMode ==
NULL || ppFile ==
NULL) {
8375 #if defined(_MSC_VER) && _MSC_VER >= 1400
8376 err = fopen_s(ppFile, pFilePath, pOpenMode);
8381 #if defined(_WIN32) || defined(__APPLE__)
8382 *ppFile = fopen(pFilePath, pOpenMode);
8384 #if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS == 64 && defined(_LARGEFILE64_SOURCE)
8385 *ppFile = fopen64(pFilePath, pOpenMode);
8387 *ppFile = fopen(pFilePath, pOpenMode);
8390 if (*ppFile ==
NULL) {
8416 #if defined(_MSC_VER) || defined(__MINGW64__) || (!defined(__STRICT_ANSI__) && !defined(_NO_EXT_KEYS))
8417 #define DRFLAC_HAS_WFOPEN
8423 if (ppFile !=
NULL) {
8427 if (pFilePath ==
NULL || pOpenMode ==
NULL || ppFile ==
NULL) {
8431 #if defined(DRFLAC_HAS_WFOPEN)
8434 #if defined(_MSC_VER) && _MSC_VER >= 1400
8435 errno_t err = _wfopen_s(ppFile, pFilePath, pOpenMode);
8440 *ppFile = _wfopen(pFilePath, pOpenMode);
8441 if (*ppFile ==
NULL) {
8445 (void)pAllocationCallbacks;
8456 const wchar_t* pFilePathTemp = pFilePath;
8457 char* pFilePathMB =
NULL;
8458 char pOpenModeMB[32] = {0};
8462 lenMB = wcsrtombs(
NULL, &pFilePathTemp, 0, &mbs);
8463 if (lenMB == (
size_t)-1) {
8468 if (pFilePathMB ==
NULL) {
8472 pFilePathTemp = pFilePath;
8474 wcsrtombs(pFilePathMB, &pFilePathTemp, lenMB + 1, &mbs);
8480 if (pOpenMode[i] == 0) {
8481 pOpenModeMB[i] =
'\0';
8485 pOpenModeMB[i] = (char)pOpenMode[i];
8490 *ppFile = fopen(pFilePathMB, pOpenModeMB);
8495 if (*ppFile ==
NULL) {
8505 return fread(bufferOut, 1, bytesToRead, (FILE*)pUserData);
8526 if (pFlac ==
NULL) {
8544 if (pFlac ==
NULL) {
8562 if (pFlac ==
NULL) {
8580 if (pFlac ==
NULL) {
8592 size_t bytesRemaining;
8598 if (bytesToRead > bytesRemaining) {
8599 bytesToRead = bytesRemaining;
8602 if (bytesToRead > 0) {
8622 if (memoryStream->
currentReadPos + offset <= memoryStream->dataSize) {
8647 if (pFlac ==
NULL) {
8654 #ifndef DR_FLAC_NO_OGG
8678 if (pFlac ==
NULL) {
8685 #ifndef DR_FLAC_NO_OGG
8722 if (pFlac ==
NULL) {
8726 #ifndef DR_FLAC_NO_STDIO
8735 #ifndef DR_FLAC_NO_OGG
8756 for (i = 0; i < frameCount; ++i) {
8776 for (i = 0; i < frameCount4; ++i) {
8802 for (i = (frameCount4 << 2); i < frameCount; ++i) {
8812 #if defined(DRFLAC_SUPPORT_SSE2)
8824 for (i = 0; i < frameCount4; ++i) {
8825 __m128i left = _mm_slli_epi32(_mm_loadu_si128((
const __m128i*)pInputSamples0 + i), shift0);
8826 __m128i side = _mm_slli_epi32(_mm_loadu_si128((
const __m128i*)pInputSamples1 + i), shift1);
8827 __m128i right = _mm_sub_epi32(left, side);
8829 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
8830 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
8833 for (i = (frameCount4 << 2); i < frameCount; ++i) {
8844 #if defined(DRFLAC_SUPPORT_NEON)
8858 shift0_4 = vdupq_n_s32(shift0);
8859 shift1_4 = vdupq_n_s32(shift1);
8861 for (i = 0; i < frameCount4; ++i) {
8866 left = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
8867 side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
8868 right = vsubq_u32(left, side);
8870 drflac__vst2q_u32((
drflac_uint32*)pOutputSamples + i*8, vzipq_u32(left, right));
8873 for (i = (frameCount4 << 2); i < frameCount; ++i) {
8886 #if defined(DRFLAC_SUPPORT_SSE2)
8887 if (drflac__gIsSSE2Supported && pFlac->
bitsPerSample <= 24) {
8888 drflac_read_pcm_frames_s32__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
8890 #elif defined(DRFLAC_SUPPORT_NEON)
8892 drflac_read_pcm_frames_s32__decode_left_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
8898 drflac_read_pcm_frames_s32__decode_left_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
8910 for (i = 0; i < frameCount; ++i) {
8930 for (i = 0; i < frameCount4; ++i) {
8956 for (i = (frameCount4 << 2); i < frameCount; ++i) {
8966 #if defined(DRFLAC_SUPPORT_SSE2)
8978 for (i = 0; i < frameCount4; ++i) {
8979 __m128i side = _mm_slli_epi32(_mm_loadu_si128((
const __m128i*)pInputSamples0 + i), shift0);
8980 __m128i right = _mm_slli_epi32(_mm_loadu_si128((
const __m128i*)pInputSamples1 + i), shift1);
8981 __m128i left = _mm_add_epi32(right, side);
8983 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
8984 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
8987 for (i = (frameCount4 << 2); i < frameCount; ++i) {
8998 #if defined(DRFLAC_SUPPORT_NEON)
9012 shift0_4 = vdupq_n_s32(shift0);
9013 shift1_4 = vdupq_n_s32(shift1);
9015 for (i = 0; i < frameCount4; ++i) {
9020 side = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
9021 right = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
9022 left = vaddq_u32(right, side);
9024 drflac__vst2q_u32((
drflac_uint32*)pOutputSamples + i*8, vzipq_u32(left, right));
9027 for (i = (frameCount4 << 2); i < frameCount; ++i) {
9040 #if defined(DRFLAC_SUPPORT_SSE2)
9041 if (drflac__gIsSSE2Supported && pFlac->
bitsPerSample <= 24) {
9042 drflac_read_pcm_frames_s32__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
9044 #elif defined(DRFLAC_SUPPORT_NEON)
9046 drflac_read_pcm_frames_s32__decode_right_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
9052 drflac_read_pcm_frames_s32__decode_right_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
9067 mid = (mid << 1) | (side & 0x01);
9085 for (i = 0; i < frameCount4; ++i) {
9105 mid0 = (mid0 << 1) | (side0 & 0x01);
9106 mid1 = (mid1 << 1) | (side1 & 0x01);
9107 mid2 = (mid2 << 1) | (side2 & 0x01);
9108 mid3 = (mid3 << 1) | (side3 & 0x01);
9110 temp0L = (mid0 + side0) << shift;
9111 temp1L = (mid1 + side1) << shift;
9112 temp2L = (mid2 + side2) << shift;
9113 temp3L = (mid3 + side3) << shift;
9115 temp0R = (mid0 - side0) << shift;
9116 temp1R = (mid1 - side1) << shift;
9117 temp2R = (mid2 - side2) << shift;
9118 temp3R = (mid3 - side3) << shift;
9130 for (i = 0; i < frameCount4; ++i) {
9150 mid0 = (mid0 << 1) | (side0 & 0x01);
9151 mid1 = (mid1 << 1) | (side1 & 0x01);
9152 mid2 = (mid2 << 1) | (side2 & 0x01);
9153 mid3 = (mid3 << 1) | (side3 & 0x01);
9176 for (i = (frameCount4 << 2); i < frameCount; ++i) {
9180 mid = (mid << 1) | (side & 0x01);
9187 #if defined(DRFLAC_SUPPORT_SSE2)
9199 for (i = 0; i < frameCount4; ++i) {
9208 mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
9210 left = _mm_srai_epi32(_mm_add_epi32(mid, side), 1);
9211 right = _mm_srai_epi32(_mm_sub_epi32(mid, side), 1);
9213 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
9214 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
9217 for (i = (frameCount4 << 2); i < frameCount; ++i) {
9221 mid = (mid << 1) | (side & 0x01);
9223 pOutputSamples[i*2+0] = (
drflac_int32)(mid + side) >> 1;
9224 pOutputSamples[i*2+1] = (
drflac_int32)(mid - side) >> 1;
9228 for (i = 0; i < frameCount4; ++i) {
9237 mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
9239 left = _mm_slli_epi32(_mm_add_epi32(mid, side), shift);
9240 right = _mm_slli_epi32(_mm_sub_epi32(mid, side), shift);
9242 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
9243 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
9246 for (i = (frameCount4 << 2); i < frameCount; ++i) {
9250 mid = (mid << 1) | (side & 0x01);
9252 pOutputSamples[i*2+0] = (
drflac_int32)((mid + side) << shift);
9253 pOutputSamples[i*2+1] = (
drflac_int32)((mid - side) << shift);
9259 #if defined(DRFLAC_SUPPORT_NEON)
9267 int32x4_t wbpsShift0_4;
9268 int32x4_t wbpsShift1_4;
9275 one4 = vdupq_n_u32(1);
9278 for (i = 0; i < frameCount4; ++i) {
9284 mid = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4);
9285 side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4);
9287 mid = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, one4));
9289 left = vshrq_n_s32(vreinterpretq_s32_u32(vaddq_u32(mid, side)), 1);
9290 right = vshrq_n_s32(vreinterpretq_s32_u32(vsubq_u32(mid, side)), 1);
9292 drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right));
9295 for (i = (frameCount4 << 2); i < frameCount; ++i) {
9299 mid = (mid << 1) | (side & 0x01);
9301 pOutputSamples[i*2+0] = (
drflac_int32)(mid + side) >> 1;
9302 pOutputSamples[i*2+1] = (
drflac_int32)(mid - side) >> 1;
9308 shift4 = vdupq_n_s32(shift);
9310 for (i = 0; i < frameCount4; ++i) {
9316 mid = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4);
9317 side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4);
9319 mid = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, one4));
9321 left = vreinterpretq_s32_u32(vshlq_u32(vaddq_u32(mid, side), shift4));
9322 right = vreinterpretq_s32_u32(vshlq_u32(vsubq_u32(mid, side), shift4));
9324 drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right));
9327 for (i = (frameCount4 << 2); i < frameCount; ++i) {
9331 mid = (mid << 1) | (side & 0x01);
9333 pOutputSamples[i*2+0] = (
drflac_int32)((mid + side) << shift);
9334 pOutputSamples[i*2+1] = (
drflac_int32)((mid - side) << shift);
9342 #if defined(DRFLAC_SUPPORT_SSE2)
9343 if (drflac__gIsSSE2Supported && pFlac->
bitsPerSample <= 24) {
9344 drflac_read_pcm_frames_s32__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
9346 #elif defined(DRFLAC_SUPPORT_NEON)
9348 drflac_read_pcm_frames_s32__decode_mid_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
9354 drflac_read_pcm_frames_s32__decode_mid_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
9381 for (i = 0; i < frameCount4; ++i) {
9402 for (i = (frameCount4 << 2); i < frameCount; ++i) {
9403 pOutputSamples[i*2+0] = (
drflac_int32)(pInputSamples0U32[i] << shift0);
9404 pOutputSamples[i*2+1] = (
drflac_int32)(pInputSamples1U32[i] << shift1);
9408 #if defined(DRFLAC_SUPPORT_SSE2)
9418 for (i = 0; i < frameCount4; ++i) {
9419 __m128i left = _mm_slli_epi32(_mm_loadu_si128((
const __m128i*)pInputSamples0 + i), shift0);
9420 __m128i right = _mm_slli_epi32(_mm_loadu_si128((
const __m128i*)pInputSamples1 + i), shift1);
9422 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
9423 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
9426 for (i = (frameCount4 << 2); i < frameCount; ++i) {
9427 pOutputSamples[i*2+0] = (
drflac_int32)(pInputSamples0U32[i] << shift0);
9428 pOutputSamples[i*2+1] = (
drflac_int32)(pInputSamples1U32[i] << shift1);
9433 #if defined(DRFLAC_SUPPORT_NEON)
9443 int32x4_t shift4_0 = vdupq_n_s32(shift0);
9444 int32x4_t shift4_1 = vdupq_n_s32(shift1);
9446 for (i = 0; i < frameCount4; ++i) {
9450 left = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift4_0));
9451 right = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift4_1));
9453 drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right));
9456 for (i = (frameCount4 << 2); i < frameCount; ++i) {
9457 pOutputSamples[i*2+0] = (
drflac_int32)(pInputSamples0U32[i] << shift0);
9458 pOutputSamples[i*2+1] = (
drflac_int32)(pInputSamples1U32[i] << shift1);
9465 #if defined(DRFLAC_SUPPORT_SSE2)
9466 if (drflac__gIsSSE2Supported && pFlac->
bitsPerSample <= 24) {
9467 drflac_read_pcm_frames_s32__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
9469 #elif defined(DRFLAC_SUPPORT_NEON)
9471 drflac_read_pcm_frames_s32__decode_independent_stereo__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
9477 drflac_read_pcm_frames_s32__decode_independent_stereo__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
9490 if (pFlac ==
NULL || framesToRead == 0) {
9494 if (pBufferOut ==
NULL) {
9502 while (framesToRead > 0) {
9517 if (channelCount == 2) {
9547 for (i = 0; i < frameCountThisIteration; ++i) {
9549 for (j = 0; j < channelCount; ++j) {
9555 framesRead += frameCountThisIteration;
9556 pBufferOut += frameCountThisIteration * channelCount;
9557 framesToRead -= frameCountThisIteration;
9571 for (i = 0; i < frameCount; ++i) {
9594 for (i = 0; i < frameCount4; ++i) {
9630 for (i = (frameCount4 << 2); i < frameCount; ++i) {
9643 #if defined(DRFLAC_SUPPORT_SSE2)
9655 for (i = 0; i < frameCount4; ++i) {
9656 __m128i left = _mm_slli_epi32(_mm_loadu_si128((
const __m128i*)pInputSamples0 + i), shift0);
9657 __m128i side = _mm_slli_epi32(_mm_loadu_si128((
const __m128i*)pInputSamples1 + i), shift1);
9658 __m128i right = _mm_sub_epi32(left, side);
9660 left = _mm_srai_epi32(left, 16);
9661 right = _mm_srai_epi32(right, 16);
9663 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
9666 for (i = (frameCount4 << 2); i < frameCount; ++i) {
9680 #if defined(DRFLAC_SUPPORT_NEON)
9694 shift0_4 = vdupq_n_s32(shift0);
9695 shift1_4 = vdupq_n_s32(shift1);
9697 for (i = 0; i < frameCount4; ++i) {
9702 left = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
9703 side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
9704 right = vsubq_u32(left, side);
9706 left = vshrq_n_u32(left, 16);
9707 right = vshrq_n_u32(right, 16);
9709 drflac__vst2q_u16((
drflac_uint16*)pOutputSamples + i*8, vzip_u16(vmovn_u32(left), vmovn_u32(right)));
9712 for (i = (frameCount4 << 2); i < frameCount; ++i) {
9728 #if defined(DRFLAC_SUPPORT_SSE2)
9729 if (drflac__gIsSSE2Supported && pFlac->
bitsPerSample <= 24) {
9730 drflac_read_pcm_frames_s16__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
9732 #elif defined(DRFLAC_SUPPORT_NEON)
9734 drflac_read_pcm_frames_s16__decode_left_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
9740 drflac_read_pcm_frames_s16__decode_left_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
9752 for (i = 0; i < frameCount; ++i) {
9775 for (i = 0; i < frameCount4; ++i) {
9811 for (i = (frameCount4 << 2); i < frameCount; ++i) {
9824 #if defined(DRFLAC_SUPPORT_SSE2)
9836 for (i = 0; i < frameCount4; ++i) {
9837 __m128i side = _mm_slli_epi32(_mm_loadu_si128((
const __m128i*)pInputSamples0 + i), shift0);
9838 __m128i right = _mm_slli_epi32(_mm_loadu_si128((
const __m128i*)pInputSamples1 + i), shift1);
9839 __m128i left = _mm_add_epi32(right, side);
9841 left = _mm_srai_epi32(left, 16);
9842 right = _mm_srai_epi32(right, 16);
9844 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
9847 for (i = (frameCount4 << 2); i < frameCount; ++i) {
9861 #if defined(DRFLAC_SUPPORT_NEON)
9875 shift0_4 = vdupq_n_s32(shift0);
9876 shift1_4 = vdupq_n_s32(shift1);
9878 for (i = 0; i < frameCount4; ++i) {
9883 side = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
9884 right = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
9885 left = vaddq_u32(right, side);
9887 left = vshrq_n_u32(left, 16);
9888 right = vshrq_n_u32(right, 16);
9890 drflac__vst2q_u16((
drflac_uint16*)pOutputSamples + i*8, vzip_u16(vmovn_u32(left), vmovn_u32(right)));
9893 for (i = (frameCount4 << 2); i < frameCount; ++i) {
9909 #if defined(DRFLAC_SUPPORT_SSE2)
9910 if (drflac__gIsSSE2Supported && pFlac->
bitsPerSample <= 24) {
9911 drflac_read_pcm_frames_s16__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
9913 #elif defined(DRFLAC_SUPPORT_NEON)
9915 drflac_read_pcm_frames_s16__decode_right_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
9921 drflac_read_pcm_frames_s16__decode_right_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
9936 mid = (mid << 1) | (side & 0x01);
9954 for (i = 0; i < frameCount4; ++i) {
9974 mid0 = (mid0 << 1) | (side0 & 0x01);
9975 mid1 = (mid1 << 1) | (side1 & 0x01);
9976 mid2 = (mid2 << 1) | (side2 & 0x01);
9977 mid3 = (mid3 << 1) | (side3 & 0x01);
9979 temp0L = (mid0 + side0) << shift;
9980 temp1L = (mid1 + side1) << shift;
9981 temp2L = (mid2 + side2) << shift;
9982 temp3L = (mid3 + side3) << shift;
9984 temp0R = (mid0 - side0) << shift;
9985 temp1R = (mid1 - side1) << shift;
9986 temp2R = (mid2 - side2) << shift;
9987 temp3R = (mid3 - side3) << shift;
10009 for (i = 0; i < frameCount4; ++i) {
10029 mid0 = (mid0 << 1) | (side0 & 0x01);
10030 mid1 = (mid1 << 1) | (side1 & 0x01);
10031 mid2 = (mid2 << 1) | (side2 & 0x01);
10032 mid3 = (mid3 << 1) | (side3 & 0x01);
10065 for (i = (frameCount4 << 2); i < frameCount; ++i) {
10069 mid = (mid << 1) | (side & 0x01);
10076 #if defined(DRFLAC_SUPPORT_SSE2)
10088 for (i = 0; i < frameCount4; ++i) {
10097 mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
10099 left = _mm_srai_epi32(_mm_add_epi32(mid, side), 1);
10100 right = _mm_srai_epi32(_mm_sub_epi32(mid, side), 1);
10102 left = _mm_srai_epi32(left, 16);
10103 right = _mm_srai_epi32(right, 16);
10105 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
10108 for (i = (frameCount4 << 2); i < frameCount; ++i) {
10112 mid = (mid << 1) | (side & 0x01);
10119 for (i = 0; i < frameCount4; ++i) {
10128 mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
10130 left = _mm_slli_epi32(_mm_add_epi32(mid, side), shift);
10131 right = _mm_slli_epi32(_mm_sub_epi32(mid, side), shift);
10133 left = _mm_srai_epi32(left, 16);
10134 right = _mm_srai_epi32(right, 16);
10136 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
10139 for (i = (frameCount4 << 2); i < frameCount; ++i) {
10143 mid = (mid << 1) | (side & 0x01);
10145 pOutputSamples[i*2+0] = (
drflac_int16)(((mid + side) << shift) >> 16);
10146 pOutputSamples[i*2+1] = (
drflac_int16)(((mid - side) << shift) >> 16);
10152 #if defined(DRFLAC_SUPPORT_NEON)
10160 int32x4_t wbpsShift0_4;
10161 int32x4_t wbpsShift1_4;
10169 for (i = 0; i < frameCount4; ++i) {
10175 mid = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4);
10176 side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4);
10178 mid = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1)));
10180 left = vshrq_n_s32(vreinterpretq_s32_u32(vaddq_u32(mid, side)), 1);
10181 right = vshrq_n_s32(vreinterpretq_s32_u32(vsubq_u32(mid, side)), 1);
10183 left = vshrq_n_s32(left, 16);
10184 right = vshrq_n_s32(right, 16);
10186 drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right)));
10189 for (i = (frameCount4 << 2); i < frameCount; ++i) {
10193 mid = (mid << 1) | (side & 0x01);
10202 shift4 = vdupq_n_s32(shift);
10204 for (i = 0; i < frameCount4; ++i) {
10210 mid = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4);
10211 side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4);
10213 mid = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1)));
10215 left = vreinterpretq_s32_u32(vshlq_u32(vaddq_u32(mid, side), shift4));
10216 right = vreinterpretq_s32_u32(vshlq_u32(vsubq_u32(mid, side), shift4));
10218 left = vshrq_n_s32(left, 16);
10219 right = vshrq_n_s32(right, 16);
10221 drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right)));
10224 for (i = (frameCount4 << 2); i < frameCount; ++i) {
10228 mid = (mid << 1) | (side & 0x01);
10230 pOutputSamples[i*2+0] = (
drflac_int16)(((mid + side) << shift) >> 16);
10231 pOutputSamples[i*2+1] = (
drflac_int16)(((mid - side) << shift) >> 16);
10239 #if defined(DRFLAC_SUPPORT_SSE2)
10240 if (drflac__gIsSSE2Supported && pFlac->
bitsPerSample <= 24) {
10241 drflac_read_pcm_frames_s16__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
10243 #elif defined(DRFLAC_SUPPORT_NEON)
10245 drflac_read_pcm_frames_s16__decode_mid_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
10251 drflac_read_pcm_frames_s16__decode_mid_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
10278 for (i = 0; i < frameCount4; ++i) {
10309 for (i = (frameCount4 << 2); i < frameCount; ++i) {
10310 pOutputSamples[i*2+0] = (
drflac_int16)((pInputSamples0U32[i] << shift0) >> 16);
10311 pOutputSamples[i*2+1] = (
drflac_int16)((pInputSamples1U32[i] << shift1) >> 16);
10315 #if defined(DRFLAC_SUPPORT_SSE2)
10325 for (i = 0; i < frameCount4; ++i) {
10326 __m128i left = _mm_slli_epi32(_mm_loadu_si128((
const __m128i*)pInputSamples0 + i), shift0);
10327 __m128i right = _mm_slli_epi32(_mm_loadu_si128((
const __m128i*)pInputSamples1 + i), shift1);
10329 left = _mm_srai_epi32(left, 16);
10330 right = _mm_srai_epi32(right, 16);
10333 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
10336 for (i = (frameCount4 << 2); i < frameCount; ++i) {
10337 pOutputSamples[i*2+0] = (
drflac_int16)((pInputSamples0U32[i] << shift0) >> 16);
10338 pOutputSamples[i*2+1] = (
drflac_int16)((pInputSamples1U32[i] << shift1) >> 16);
10343 #if defined(DRFLAC_SUPPORT_NEON)
10353 int32x4_t shift0_4 = vdupq_n_s32(shift0);
10354 int32x4_t shift1_4 = vdupq_n_s32(shift1);
10356 for (i = 0; i < frameCount4; ++i) {
10360 left = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4));
10361 right = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4));
10363 left = vshrq_n_s32(left, 16);
10364 right = vshrq_n_s32(right, 16);
10366 drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right)));
10369 for (i = (frameCount4 << 2); i < frameCount; ++i) {
10370 pOutputSamples[i*2+0] = (
drflac_int16)((pInputSamples0U32[i] << shift0) >> 16);
10371 pOutputSamples[i*2+1] = (
drflac_int16)((pInputSamples1U32[i] << shift1) >> 16);
10378 #if defined(DRFLAC_SUPPORT_SSE2)
10379 if (drflac__gIsSSE2Supported && pFlac->
bitsPerSample <= 24) {
10380 drflac_read_pcm_frames_s16__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
10382 #elif defined(DRFLAC_SUPPORT_NEON)
10384 drflac_read_pcm_frames_s16__decode_independent_stereo__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
10390 drflac_read_pcm_frames_s16__decode_independent_stereo__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
10402 if (pFlac ==
NULL || framesToRead == 0) {
10406 if (pBufferOut ==
NULL) {
10414 while (framesToRead > 0) {
10429 if (channelCount == 2) {
10459 for (i = 0; i < frameCountThisIteration; ++i) {
10461 for (j = 0; j < channelCount; ++j) {
10463 pBufferOut[(i*channelCount)+j] = (
drflac_int16)(sampleS32 >> 16);
10468 framesRead += frameCountThisIteration;
10469 pBufferOut += frameCountThisIteration * channelCount;
10470 framesToRead -= frameCountThisIteration;
10484 for (i = 0; i < frameCount; ++i) {
10489 pOutputSamples[i*2+0] = (float)((
drflac_int32)left / 2147483648.0);
10490 pOutputSamples[i*2+1] = (float)((
drflac_int32)right / 2147483648.0);
10504 float factor = 1 / 2147483648.0;
10506 for (i = 0; i < frameCount4; ++i) {
10523 pOutputSamples[i*8+1] = (
drflac_int32)right0 * factor;
10525 pOutputSamples[i*8+3] = (
drflac_int32)right1 * factor;
10527 pOutputSamples[i*8+5] = (
drflac_int32)right2 * factor;
10529 pOutputSamples[i*8+7] = (
drflac_int32)right3 * factor;
10532 for (i = (frameCount4 << 2); i < frameCount; ++i) {
10542 #if defined(DRFLAC_SUPPORT_SSE2)
10555 factor = _mm_set1_ps(1.0f / 8388608.0f);
10557 for (i = 0; i < frameCount4; ++i) {
10558 __m128i left = _mm_slli_epi32(_mm_loadu_si128((
const __m128i*)pInputSamples0 + i), shift0);
10559 __m128i side = _mm_slli_epi32(_mm_loadu_si128((
const __m128i*)pInputSamples1 + i), shift1);
10560 __m128i right = _mm_sub_epi32(left, side);
10561 __m128 leftf = _mm_mul_ps(_mm_cvtepi32_ps(left), factor);
10562 __m128 rightf = _mm_mul_ps(_mm_cvtepi32_ps(right), factor);
10564 _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
10565 _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
10568 for (i = (frameCount4 << 2); i < frameCount; ++i) {
10573 pOutputSamples[i*2+0] = (
drflac_int32)left / 8388608.0f;
10574 pOutputSamples[i*2+1] = (
drflac_int32)right / 8388608.0f;
10579 #if defined(DRFLAC_SUPPORT_NEON)
10588 float32x4_t factor4;
10589 int32x4_t shift0_4;
10590 int32x4_t shift1_4;
10594 factor4 = vdupq_n_f32(1.0f / 8388608.0f);
10595 shift0_4 = vdupq_n_s32(shift0);
10596 shift1_4 = vdupq_n_s32(shift1);
10598 for (i = 0; i < frameCount4; ++i) {
10603 float32x4_t rightf;
10605 left = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
10606 side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
10607 right = vsubq_u32(left, side);
10608 leftf = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(left)), factor4);
10609 rightf = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(right)), factor4);
10611 drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
10614 for (i = (frameCount4 << 2); i < frameCount; ++i) {
10619 pOutputSamples[i*2+0] = (
drflac_int32)left / 8388608.0f;
10620 pOutputSamples[i*2+1] = (
drflac_int32)right / 8388608.0f;
10627 #if defined(DRFLAC_SUPPORT_SSE2)
10628 if (drflac__gIsSSE2Supported && pFlac->
bitsPerSample <= 24) {
10629 drflac_read_pcm_frames_f32__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
10631 #elif defined(DRFLAC_SUPPORT_NEON)
10633 drflac_read_pcm_frames_f32__decode_left_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
10639 drflac_read_pcm_frames_f32__decode_left_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
10651 for (i = 0; i < frameCount; ++i) {
10656 pOutputSamples[i*2+0] = (float)((
drflac_int32)left / 2147483648.0);
10657 pOutputSamples[i*2+1] = (float)((
drflac_int32)right / 2147483648.0);
10670 float factor = 1 / 2147483648.0;
10672 for (i = 0; i < frameCount4; ++i) {
10689 pOutputSamples[i*8+1] = (
drflac_int32)right0 * factor;
10691 pOutputSamples[i*8+3] = (
drflac_int32)right1 * factor;
10693 pOutputSamples[i*8+5] = (
drflac_int32)right2 * factor;
10695 pOutputSamples[i*8+7] = (
drflac_int32)right3 * factor;
10698 for (i = (frameCount4 << 2); i < frameCount; ++i) {
10708 #if defined(DRFLAC_SUPPORT_SSE2)
10721 factor = _mm_set1_ps(1.0f / 8388608.0f);
10723 for (i = 0; i < frameCount4; ++i) {
10724 __m128i side = _mm_slli_epi32(_mm_loadu_si128((
const __m128i*)pInputSamples0 + i), shift0);
10725 __m128i right = _mm_slli_epi32(_mm_loadu_si128((
const __m128i*)pInputSamples1 + i), shift1);
10726 __m128i left = _mm_add_epi32(right, side);
10727 __m128 leftf = _mm_mul_ps(_mm_cvtepi32_ps(left), factor);
10728 __m128 rightf = _mm_mul_ps(_mm_cvtepi32_ps(right), factor);
10730 _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
10731 _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
10734 for (i = (frameCount4 << 2); i < frameCount; ++i) {
10739 pOutputSamples[i*2+0] = (
drflac_int32)left / 8388608.0f;
10740 pOutputSamples[i*2+1] = (
drflac_int32)right / 8388608.0f;
10745 #if defined(DRFLAC_SUPPORT_NEON)
10754 float32x4_t factor4;
10755 int32x4_t shift0_4;
10756 int32x4_t shift1_4;
10760 factor4 = vdupq_n_f32(1.0f / 8388608.0f);
10761 shift0_4 = vdupq_n_s32(shift0);
10762 shift1_4 = vdupq_n_s32(shift1);
10764 for (i = 0; i < frameCount4; ++i) {
10769 float32x4_t rightf;
10771 side = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
10772 right = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
10773 left = vaddq_u32(right, side);
10774 leftf = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(left)), factor4);
10775 rightf = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(right)), factor4);
10777 drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
10780 for (i = (frameCount4 << 2); i < frameCount; ++i) {
10785 pOutputSamples[i*2+0] = (
drflac_int32)left / 8388608.0f;
10786 pOutputSamples[i*2+1] = (
drflac_int32)right / 8388608.0f;
10793 #if defined(DRFLAC_SUPPORT_SSE2)
10794 if (drflac__gIsSSE2Supported && pFlac->
bitsPerSample <= 24) {
10795 drflac_read_pcm_frames_f32__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
10797 #elif defined(DRFLAC_SUPPORT_NEON)
10799 drflac_read_pcm_frames_f32__decode_right_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
10805 drflac_read_pcm_frames_f32__decode_right_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
10820 mid = (mid << 1) | (side & 0x01);
10822 pOutputSamples[i*2+0] = (float)((((
drflac_int32)(mid + side) >> 1) << (unusedBitsPerSample)) / 2147483648.0);
10823 pOutputSamples[i*2+1] = (float)((((
drflac_int32)(mid - side) >> 1) << (unusedBitsPerSample)) / 2147483648.0);
10835 float factor = 1 / 2147483648.0;
10839 for (i = 0; i < frameCount4; ++i) {
10859 mid0 = (mid0 << 1) | (side0 & 0x01);
10860 mid1 = (mid1 << 1) | (side1 & 0x01);
10861 mid2 = (mid2 << 1) | (side2 & 0x01);
10862 mid3 = (mid3 << 1) | (side3 & 0x01);
10864 temp0L = (mid0 + side0) << shift;
10865 temp1L = (mid1 + side1) << shift;
10866 temp2L = (mid2 + side2) << shift;
10867 temp3L = (mid3 + side3) << shift;
10869 temp0R = (mid0 - side0) << shift;
10870 temp1R = (mid1 - side1) << shift;
10871 temp2R = (mid2 - side2) << shift;
10872 temp3R = (mid3 - side3) << shift;
10874 pOutputSamples[i*8+0] = (
drflac_int32)temp0L * factor;
10875 pOutputSamples[i*8+1] = (
drflac_int32)temp0R * factor;
10876 pOutputSamples[i*8+2] = (
drflac_int32)temp1L * factor;
10877 pOutputSamples[i*8+3] = (
drflac_int32)temp1R * factor;
10878 pOutputSamples[i*8+4] = (
drflac_int32)temp2L * factor;
10879 pOutputSamples[i*8+5] = (
drflac_int32)temp2R * factor;
10880 pOutputSamples[i*8+6] = (
drflac_int32)temp3L * factor;
10881 pOutputSamples[i*8+7] = (
drflac_int32)temp3R * factor;
10884 for (i = 0; i < frameCount4; ++i) {
10904 mid0 = (mid0 << 1) | (side0 & 0x01);
10905 mid1 = (mid1 << 1) | (side1 & 0x01);
10906 mid2 = (mid2 << 1) | (side2 & 0x01);
10907 mid3 = (mid3 << 1) | (side3 & 0x01);
10919 pOutputSamples[i*8+0] = (
drflac_int32)temp0L * factor;
10920 pOutputSamples[i*8+1] = (
drflac_int32)temp0R * factor;
10921 pOutputSamples[i*8+2] = (
drflac_int32)temp1L * factor;
10922 pOutputSamples[i*8+3] = (
drflac_int32)temp1R * factor;
10923 pOutputSamples[i*8+4] = (
drflac_int32)temp2L * factor;
10924 pOutputSamples[i*8+5] = (
drflac_int32)temp2R * factor;
10925 pOutputSamples[i*8+6] = (
drflac_int32)temp3L * factor;
10926 pOutputSamples[i*8+7] = (
drflac_int32)temp3R * factor;
10930 for (i = (frameCount4 << 2); i < frameCount; ++i) {
10934 mid = (mid << 1) | (side & 0x01);
10941 #if defined(DRFLAC_SUPPORT_SSE2)
10954 factor = 1.0f / 8388608.0f;
10955 factor128 = _mm_set1_ps(factor);
10958 for (i = 0; i < frameCount4; ++i) {
10969 mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
10971 tempL = _mm_srai_epi32(_mm_add_epi32(mid, side), 1);
10972 tempR = _mm_srai_epi32(_mm_sub_epi32(mid, side), 1);
10974 leftf = _mm_mul_ps(_mm_cvtepi32_ps(tempL), factor128);
10975 rightf = _mm_mul_ps(_mm_cvtepi32_ps(tempR), factor128);
10977 _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
10978 _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
10981 for (i = (frameCount4 << 2); i < frameCount; ++i) {
10985 mid = (mid << 1) | (side & 0x01);
10987 pOutputSamples[i*2+0] = ((
drflac_int32)(mid + side) >> 1) * factor;
10988 pOutputSamples[i*2+1] = ((
drflac_int32)(mid - side) >> 1) * factor;
10992 for (i = 0; i < frameCount4; ++i) {
11003 mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
11005 tempL = _mm_slli_epi32(_mm_add_epi32(mid, side), shift);
11006 tempR = _mm_slli_epi32(_mm_sub_epi32(mid, side), shift);
11008 leftf = _mm_mul_ps(_mm_cvtepi32_ps(tempL), factor128);
11009 rightf = _mm_mul_ps(_mm_cvtepi32_ps(tempR), factor128);
11011 _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
11012 _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
11015 for (i = (frameCount4 << 2); i < frameCount; ++i) {
11019 mid = (mid << 1) | (side & 0x01);
11021 pOutputSamples[i*2+0] = (
drflac_int32)((mid + side) << shift) * factor;
11022 pOutputSamples[i*2+1] = (
drflac_int32)((mid - side) << shift) * factor;
11028 #if defined(DRFLAC_SUPPORT_NEON)
11037 float32x4_t factor4;
11044 factor = 1.0f / 8388608.0f;
11045 factor4 = vdupq_n_f32(factor);
11050 for (i = 0; i < frameCount4; ++i) {
11054 float32x4_t rightf;
11056 uint32x4_t mid = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbps0_4);
11057 uint32x4_t side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbps1_4);
11059 mid = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1)));
11061 lefti = vshrq_n_s32(vreinterpretq_s32_u32(vaddq_u32(mid, side)), 1);
11062 righti = vshrq_n_s32(vreinterpretq_s32_u32(vsubq_u32(mid, side)), 1);
11064 leftf = vmulq_f32(vcvtq_f32_s32(lefti), factor4);
11065 rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4);
11067 drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
11070 for (i = (frameCount4 << 2); i < frameCount; ++i) {
11074 mid = (mid << 1) | (side & 0x01);
11076 pOutputSamples[i*2+0] = ((
drflac_int32)(mid + side) >> 1) * factor;
11077 pOutputSamples[i*2+1] = ((
drflac_int32)(mid - side) >> 1) * factor;
11081 shift4 = vdupq_n_s32(shift);
11082 for (i = 0; i < frameCount4; ++i) {
11088 float32x4_t rightf;
11090 mid = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbps0_4);
11091 side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbps1_4);
11093 mid = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1)));
11095 lefti = vreinterpretq_s32_u32(vshlq_u32(vaddq_u32(mid, side), shift4));
11096 righti = vreinterpretq_s32_u32(vshlq_u32(vsubq_u32(mid, side), shift4));
11098 leftf = vmulq_f32(vcvtq_f32_s32(lefti), factor4);
11099 rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4);
11101 drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
11104 for (i = (frameCount4 << 2); i < frameCount; ++i) {
11108 mid = (mid << 1) | (side & 0x01);
11110 pOutputSamples[i*2+0] = (
drflac_int32)((mid + side) << shift) * factor;
11111 pOutputSamples[i*2+1] = (
drflac_int32)((mid - side) << shift) * factor;
11119 #if defined(DRFLAC_SUPPORT_SSE2)
11120 if (drflac__gIsSSE2Supported && pFlac->
bitsPerSample <= 24) {
11121 drflac_read_pcm_frames_f32__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
11123 #elif defined(DRFLAC_SUPPORT_NEON)
11125 drflac_read_pcm_frames_f32__decode_mid_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
11131 drflac_read_pcm_frames_f32__decode_mid_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
11156 float factor = 1 / 2147483648.0;
11158 for (i = 0; i < frameCount4; ++i) {
11169 pOutputSamples[i*8+0] = (
drflac_int32)tempL0 * factor;
11170 pOutputSamples[i*8+1] = (
drflac_int32)tempR0 * factor;
11171 pOutputSamples[i*8+2] = (
drflac_int32)tempL1 * factor;
11172 pOutputSamples[i*8+3] = (
drflac_int32)tempR1 * factor;
11173 pOutputSamples[i*8+4] = (
drflac_int32)tempL2 * factor;
11174 pOutputSamples[i*8+5] = (
drflac_int32)tempR2 * factor;
11175 pOutputSamples[i*8+6] = (
drflac_int32)tempL3 * factor;
11176 pOutputSamples[i*8+7] = (
drflac_int32)tempR3 * factor;
11179 for (i = (frameCount4 << 2); i < frameCount; ++i) {
11180 pOutputSamples[i*2+0] = (
drflac_int32)(pInputSamples0U32[i] << shift0) * factor;
11181 pOutputSamples[i*2+1] = (
drflac_int32)(pInputSamples1U32[i] << shift1) * factor;
11185 #if defined(DRFLAC_SUPPORT_SSE2)
11195 float factor = 1.0f / 8388608.0f;
11196 __m128 factor128 = _mm_set1_ps(factor);
11198 for (i = 0; i < frameCount4; ++i) {
11204 lefti = _mm_slli_epi32(_mm_loadu_si128((
const __m128i*)pInputSamples0 + i), shift0);
11205 righti = _mm_slli_epi32(_mm_loadu_si128((
const __m128i*)pInputSamples1 + i), shift1);
11207 leftf = _mm_mul_ps(_mm_cvtepi32_ps(lefti), factor128);
11208 rightf = _mm_mul_ps(_mm_cvtepi32_ps(righti), factor128);
11210 _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
11211 _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
11214 for (i = (frameCount4 << 2); i < frameCount; ++i) {
11215 pOutputSamples[i*2+0] = (
drflac_int32)(pInputSamples0U32[i] << shift0) * factor;
11216 pOutputSamples[i*2+1] = (
drflac_int32)(pInputSamples1U32[i] << shift1) * factor;
11221 #if defined(DRFLAC_SUPPORT_NEON)
11231 float factor = 1.0f / 8388608.0f;
11232 float32x4_t factor4 = vdupq_n_f32(factor);
11233 int32x4_t shift0_4 = vdupq_n_s32(shift0);
11234 int32x4_t shift1_4 = vdupq_n_s32(shift1);
11236 for (i = 0; i < frameCount4; ++i) {
11240 float32x4_t rightf;
11242 lefti = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4));
11243 righti = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4));
11245 leftf = vmulq_f32(vcvtq_f32_s32(lefti), factor4);
11246 rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4);
11248 drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
11251 for (i = (frameCount4 << 2); i < frameCount; ++i) {
11252 pOutputSamples[i*2+0] = (
drflac_int32)(pInputSamples0U32[i] << shift0) * factor;
11253 pOutputSamples[i*2+1] = (
drflac_int32)(pInputSamples1U32[i] << shift1) * factor;
11260 #if defined(DRFLAC_SUPPORT_SSE2)
11261 if (drflac__gIsSSE2Supported && pFlac->
bitsPerSample <= 24) {
11262 drflac_read_pcm_frames_f32__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
11264 #elif defined(DRFLAC_SUPPORT_NEON)
11266 drflac_read_pcm_frames_f32__decode_independent_stereo__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
11272 drflac_read_pcm_frames_f32__decode_independent_stereo__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
11284 if (pFlac ==
NULL || framesToRead == 0) {
11288 if (pBufferOut ==
NULL) {
11296 while (framesToRead > 0) {
11311 if (channelCount == 2) {
11341 for (i = 0; i < frameCountThisIteration; ++i) {
11343 for (j = 0; j < channelCount; ++j) {
11345 pBufferOut[(i*channelCount)+j] = (
float)(sampleS32 / 2147483648.0);
11350 framesRead += frameCountThisIteration;
11351 pBufferOut += frameCountThisIteration * channelCount;
11352 framesToRead -= frameCountThisIteration;
11364 if (pFlac ==
NULL) {
11381 if (pcmFrameIndex == 0) {
11407 if (currentFLACFramePCMFramesConsumed > offsetAbs) {
11418 #ifndef DR_FLAC_NO_OGG
11431 #if !defined(DR_FLAC_NO_CRC)
11444 if (wasSuccessful) {
11454 return wasSuccessful;
11462 #if defined(SIZE_MAX)
11463 #define DRFLAC_SIZE_MAX SIZE_MAX
11465 #if defined(DRFLAC_64BIT)
11466 #define DRFLAC_SIZE_MAX ((drflac_uint64)0xFFFFFFFFFFFFFFFF)
11468 #define DRFLAC_SIZE_MAX 0xFFFFFFFF
11474 #define DRFLAC_DEFINE_FULL_READ_AND_CLOSE(extension, type) \
11475 static type* drflac__full_read_and_close_ ## extension (drflac* pFlac, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut)\
11477 type* pSampleData = NULL; \
11478 drflac_uint64 totalPCMFrameCount; \
11480 DRFLAC_ASSERT(pFlac != NULL); \
11482 totalPCMFrameCount = pFlac->totalPCMFrameCount; \
11484 if (totalPCMFrameCount == 0) { \
11485 type buffer[4096]; \
11486 drflac_uint64 pcmFramesRead; \
11487 size_t sampleDataBufferSize = sizeof(buffer); \
11489 pSampleData = (type*)drflac__malloc_from_callbacks(sampleDataBufferSize, &pFlac->allocationCallbacks); \
11490 if (pSampleData == NULL) { \
11494 while ((pcmFramesRead = (drflac_uint64)drflac_read_pcm_frames_##extension(pFlac, sizeof(buffer)/sizeof(buffer[0])/pFlac->channels, buffer)) > 0) { \
11495 if (((totalPCMFrameCount + pcmFramesRead) * pFlac->channels * sizeof(type)) > sampleDataBufferSize) { \
11496 type* pNewSampleData; \
11497 size_t newSampleDataBufferSize; \
11499 newSampleDataBufferSize = sampleDataBufferSize * 2; \
11500 pNewSampleData = (type*)drflac__realloc_from_callbacks(pSampleData, newSampleDataBufferSize, sampleDataBufferSize, &pFlac->allocationCallbacks); \
11501 if (pNewSampleData == NULL) { \
11502 drflac__free_from_callbacks(pSampleData, &pFlac->allocationCallbacks); \
11506 sampleDataBufferSize = newSampleDataBufferSize; \
11507 pSampleData = pNewSampleData; \
11510 DRFLAC_COPY_MEMORY(pSampleData + (totalPCMFrameCount*pFlac->channels), buffer, (size_t)(pcmFramesRead*pFlac->channels*sizeof(type))); \
11511 totalPCMFrameCount += pcmFramesRead; \
11516 DRFLAC_ZERO_MEMORY(pSampleData + (totalPCMFrameCount*pFlac->channels), (size_t)(sampleDataBufferSize - totalPCMFrameCount*pFlac->channels*sizeof(type))); \
11518 drflac_uint64 dataSize = totalPCMFrameCount*pFlac->channels*sizeof(type); \
11519 if (dataSize > (drflac_uint64)DRFLAC_SIZE_MAX) { \
11523 pSampleData = (type*)drflac__malloc_from_callbacks((size_t)dataSize, &pFlac->allocationCallbacks); \
11524 if (pSampleData == NULL) { \
11528 totalPCMFrameCount = drflac_read_pcm_frames_##extension(pFlac, pFlac->totalPCMFrameCount, pSampleData); \
11531 if (sampleRateOut) *sampleRateOut = pFlac->sampleRate; \
11532 if (channelsOut) *channelsOut = pFlac->channels; \
11533 if (totalPCMFrameCountOut) *totalPCMFrameCountOut = totalPCMFrameCount; \
11535 drflac_close(pFlac); \
11536 return pSampleData; \
11539 drflac_close(pFlac); \
11554 if (sampleRateOut) {
11555 *sampleRateOut = 0;
11557 if (totalPCMFrameCountOut) {
11558 *totalPCMFrameCountOut = 0;
11561 pFlac =
drflac_open(onRead, onSeek, pUserData, pAllocationCallbacks);
11562 if (pFlac ==
NULL) {
11566 return drflac__full_read_and_close_s32(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut);
11576 if (sampleRateOut) {
11577 *sampleRateOut = 0;
11579 if (totalPCMFrameCountOut) {
11580 *totalPCMFrameCountOut = 0;
11583 pFlac =
drflac_open(onRead, onSeek, pUserData, pAllocationCallbacks);
11584 if (pFlac ==
NULL) {
11588 return drflac__full_read_and_close_s16(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut);
11598 if (sampleRateOut) {
11599 *sampleRateOut = 0;
11601 if (totalPCMFrameCountOut) {
11602 *totalPCMFrameCountOut = 0;
11605 pFlac =
drflac_open(onRead, onSeek, pUserData, pAllocationCallbacks);
11606 if (pFlac ==
NULL) {
11610 return drflac__full_read_and_close_f32(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut);
11613 #ifndef DR_FLAC_NO_STDIO
11624 if (totalPCMFrameCount) {
11625 *totalPCMFrameCount = 0;
11629 if (pFlac ==
NULL) {
11633 return drflac__full_read_and_close_s32(pFlac, channels, sampleRate, totalPCMFrameCount);
11646 if (totalPCMFrameCount) {
11647 *totalPCMFrameCount = 0;
11651 if (pFlac ==
NULL) {
11655 return drflac__full_read_and_close_s16(pFlac, channels, sampleRate, totalPCMFrameCount);
11668 if (totalPCMFrameCount) {
11669 *totalPCMFrameCount = 0;
11673 if (pFlac ==
NULL) {
11677 return drflac__full_read_and_close_f32(pFlac, channels, sampleRate, totalPCMFrameCount);
11691 if (totalPCMFrameCount) {
11692 *totalPCMFrameCount = 0;
11696 if (pFlac ==
NULL) {
11700 return drflac__full_read_and_close_s32(pFlac, channels, sampleRate, totalPCMFrameCount);
11713 if (totalPCMFrameCount) {
11714 *totalPCMFrameCount = 0;
11718 if (pFlac ==
NULL) {
11722 return drflac__full_read_and_close_s16(pFlac, channels, sampleRate, totalPCMFrameCount);
11735 if (totalPCMFrameCount) {
11736 *totalPCMFrameCount = 0;
11740 if (pFlac ==
NULL) {
11744 return drflac__full_read_and_close_f32(pFlac, channels, sampleRate, totalPCMFrameCount);
11750 if (pAllocationCallbacks !=
NULL) {
11762 if (pIter ==
NULL) {
11773 const char* pComment;
11776 if (pCommentLengthOut) {
11777 *pCommentLengthOut = 0;
11791 if (pCommentLengthOut) {
11792 *pCommentLengthOut = length;
11803 if (pIter ==
NULL) {
11814 const char* pRunningData;
11826 cuesheetTrack.
offset = offsetLo | (offsetHi << 32);
11827 cuesheetTrack.
trackNumber = pRunningData[0]; pRunningData += 1;
11829 cuesheetTrack.
isAudio = (pRunningData[0] & 0x80) != 0;
11830 cuesheetTrack.
preEmphasis = (pRunningData[0] & 0x40) != 0; pRunningData += 14;
11831 cuesheetTrack.
indexCount = pRunningData[0]; pRunningData += 1;
11837 if (pCuesheetTrack) {
11838 *pCuesheetTrack = cuesheetTrack;
11844 #if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)))
11845 #pragma GCC diagnostic pop