232 #if defined(_MSC_VER) && _MSC_VER < 1600
254 #define DRFLAC_TRUE 1
255 #define DRFLAC_FALSE 0
257 #if defined(_MSC_VER) && _MSC_VER >= 1700
258 #define DRFLAC_DEPRECATED __declspec(deprecated)
259 #elif (defined(__GNUC__) && __GNUC__ >= 4)
260 #define DRFLAC_DEPRECATED __attribute__((deprecated))
261 #elif defined(__has_feature)
262 #if __has_feature(attribute_deprecated)
263 #define DRFLAC_DEPRECATED __attribute__((deprecated))
265 #define DRFLAC_DEPRECATED
268 #define DRFLAC_DEPRECATED
276 #ifndef DR_FLAC_BUFFER_SIZE
277 #define DR_FLAC_BUFFER_SIZE 4096
285 #if defined(_WIN64) || defined(_LP64) || defined(__LP64__)
296 #define DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO 0
297 #define DRFLAC_METADATA_BLOCK_TYPE_PADDING 1
298 #define DRFLAC_METADATA_BLOCK_TYPE_APPLICATION 2
299 #define DRFLAC_METADATA_BLOCK_TYPE_SEEKTABLE 3
300 #define DRFLAC_METADATA_BLOCK_TYPE_VORBIS_COMMENT 4
301 #define DRFLAC_METADATA_BLOCK_TYPE_CUESHEET 5
302 #define DRFLAC_METADATA_BLOCK_TYPE_PICTURE 6
303 #define DRFLAC_METADATA_BLOCK_TYPE_INVALID 127
306 #define DRFLAC_PICTURE_TYPE_OTHER 0
307 #define DRFLAC_PICTURE_TYPE_FILE_ICON 1
308 #define DRFLAC_PICTURE_TYPE_OTHER_FILE_ICON 2
309 #define DRFLAC_PICTURE_TYPE_COVER_FRONT 3
310 #define DRFLAC_PICTURE_TYPE_COVER_BACK 4
311 #define DRFLAC_PICTURE_TYPE_LEAFLET_PAGE 5
312 #define DRFLAC_PICTURE_TYPE_MEDIA 6
313 #define DRFLAC_PICTURE_TYPE_LEAD_ARTIST 7
314 #define DRFLAC_PICTURE_TYPE_ARTIST 8
315 #define DRFLAC_PICTURE_TYPE_CONDUCTOR 9
316 #define DRFLAC_PICTURE_TYPE_BAND 10
317 #define DRFLAC_PICTURE_TYPE_COMPOSER 11
318 #define DRFLAC_PICTURE_TYPE_LYRICIST 12
319 #define DRFLAC_PICTURE_TYPE_RECORDING_LOCATION 13
320 #define DRFLAC_PICTURE_TYPE_DURING_RECORDING 14
321 #define DRFLAC_PICTURE_TYPE_DURING_PERFORMANCE 15
322 #define DRFLAC_PICTURE_TYPE_SCREEN_CAPTURE 16
323 #define DRFLAC_PICTURE_TYPE_BRIGHT_COLORED_FISH 17
324 #define DRFLAC_PICTURE_TYPE_ILLUSTRATION 18
325 #define DRFLAC_PICTURE_TYPE_BAND_LOGOTYPE 19
326 #define DRFLAC_PICTURE_TYPE_PUBLISHER_LOGOTYPE 20
374 const void* pRawData;
406 const void* pComments;
415 const void* pTrackData;
448 typedef size_t (*
drflac_read_proc)(
void* pUserData,
void* pBufferOut,
size_t bytesToRead);
481 void* (* onMalloc)(
size_t sz,
void* pUserData);
482 void* (* onRealloc)(
void* p,
size_t sz,
void* pUserData);
483 void (* onFree)(
void* p,
void* pUserData);
491 size_t currentReadPos;
512 size_t unalignedByteCount;
811 #ifndef DR_FLAC_NO_STDIO
877 #ifndef DR_FLAC_NO_STDIO
909 const char* pRunningData;
929 const char* pRunningData;
976 #ifdef DR_FLAC_IMPLEMENTATION
979 #if defined(__GNUC__)
980 #pragma GCC diagnostic push
982 #pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
1000 #define DRFLAC_INLINE __forceinline
1001 #elif defined(__GNUC__)
1009 #if defined(__STRICT_ANSI__)
1010 #define DRFLAC_INLINE __inline__ __attribute__((always_inline))
1012 #define DRFLAC_INLINE inline __attribute__((always_inline))
1015 #define DRFLAC_INLINE
1019 #if defined(__x86_64__) || defined(_M_X64)
1021 #elif defined(__i386) || defined(_M_IX86)
1023 #elif defined(__arm__) || defined(_M_ARM)
1028 #if !defined(DR_FLAC_NO_SIMD)
1029 #if defined(DRFLAC_X64) || defined(DRFLAC_X86)
1030 #if defined(_MSC_VER) && !defined(__clang__)
1032 #if _MSC_VER >= 1400 && !defined(DRFLAC_NO_SSE2)
1033 #define DRFLAC_SUPPORT_SSE2
1035 #if _MSC_VER >= 1600 && !defined(DRFLAC_NO_SSE41)
1036 #define DRFLAC_SUPPORT_SSE41
1040 #if defined(__SSE2__) && !defined(DRFLAC_NO_SSE2)
1041 #define DRFLAC_SUPPORT_SSE2
1043 #if defined(__SSE4_1__) && !defined(DRFLAC_NO_SSE41)
1044 #define DRFLAC_SUPPORT_SSE41
1049 #if !defined(__GNUC__) && !defined(__clang__) && defined(__has_include)
1050 #if !defined(DRFLAC_SUPPORT_SSE2) && !defined(DRFLAC_NO_SSE2) && __has_include(<emmintrin.h>)
1051 #define DRFLAC_SUPPORT_SSE2
1053 #if !defined(DRFLAC_SUPPORT_SSE41) && !defined(DRFLAC_NO_SSE41) && __has_include(<smmintrin.h>)
1054 #define DRFLAC_SUPPORT_SSE41
1058 #if defined(DRFLAC_SUPPORT_SSE41)
1059 #include <smmintrin.h>
1060 #elif defined(DRFLAC_SUPPORT_SSE2)
1061 #include <emmintrin.h>
1065 #if defined(DRFLAC_ARM)
1066 #if !defined(DRFLAC_NO_NEON) && (defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64))
1067 #define DRFLAC_SUPPORT_NEON
1071 #if !defined(__GNUC__) && !defined(__clang__) && defined(__has_include)
1072 #if !defined(DRFLAC_SUPPORT_NEON) && !defined(DRFLAC_NO_NEON) && __has_include(<arm_neon.h>)
1073 #define DRFLAC_SUPPORT_NEON
1077 #if defined(DRFLAC_SUPPORT_NEON)
1078 #include <arm_neon.h>
1084 #if !defined(DR_FLAC_NO_SIMD) && (defined(DRFLAC_X86) || defined(DRFLAC_X64))
1085 #if defined(_MSC_VER) && !defined(__clang__)
1086 #if _MSC_VER >= 1400
1088 static void drflac__cpuid(
int info[4],
int fid)
1093 #define DRFLAC_NO_CPUID
1096 #if defined(__GNUC__) || defined(__clang__)
1097 static void drflac__cpuid(
int info[4],
int fid)
1106 #if defined(DRFLAC_X86) && defined(__PIC__)
1107 __asm__ __volatile__ (
1108 "xchg{l} {%%}ebx, %k1;"
1110 "xchg{l} {%%}ebx, %k1;"
1111 :
"=a"(info[0]),
"=&r"(info[1]),
"=c"(info[2]),
"=d"(info[3]) :
"a"(fid),
"c"(0)
1114 __asm__ __volatile__ (
1115 "cpuid" :
"=a"(info[0]),
"=b"(info[1]),
"=c"(info[2]),
"=d"(info[3]) :
"a"(fid),
"c"(0)
1120 #define DRFLAC_NO_CPUID
1124 #define DRFLAC_NO_CPUID
1129 #if defined(DRFLAC_SUPPORT_SSE2)
1130 #if (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(DRFLAC_NO_SSE2)
1131 #if defined(DRFLAC_X64)
1133 #elif (defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(__SSE2__)
1136 #if defined(DRFLAC_NO_CPUID)
1140 drflac__cpuid(info, 1);
1141 return (info[3] & (1 << 26)) != 0;
1154 #if defined(DRFLAC_SUPPORT_SSE41)
1155 #if (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(DRFLAC_NO_SSE41)
1156 #if defined(DRFLAC_X64)
1158 #elif (defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(__SSE4_1__)
1161 #if defined(DRFLAC_NO_CPUID)
1165 drflac__cpuid(info, 1);
1166 return (info[2] & (1 << 19)) != 0;
1178 #if defined(_MSC_VER) && _MSC_VER >= 1500 && (defined(DRFLAC_X86) || defined(DRFLAC_X64))
1179 #define DRFLAC_HAS_LZCNT_INTRINSIC
1180 #elif (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)))
1181 #define DRFLAC_HAS_LZCNT_INTRINSIC
1182 #elif defined(__clang__)
1183 #if defined(__has_builtin)
1184 #if __has_builtin(__builtin_clzll) || __has_builtin(__builtin_clzl)
1185 #define DRFLAC_HAS_LZCNT_INTRINSIC
1190 #if defined(_MSC_VER) && _MSC_VER >= 1400
1191 #define DRFLAC_HAS_BYTESWAP16_INTRINSIC
1192 #define DRFLAC_HAS_BYTESWAP32_INTRINSIC
1193 #define DRFLAC_HAS_BYTESWAP64_INTRINSIC
1194 #elif defined(__clang__)
1195 #if defined(__has_builtin)
1196 #if __has_builtin(__builtin_bswap16)
1197 #define DRFLAC_HAS_BYTESWAP16_INTRINSIC
1199 #if __has_builtin(__builtin_bswap32)
1200 #define DRFLAC_HAS_BYTESWAP32_INTRINSIC
1202 #if __has_builtin(__builtin_bswap64)
1203 #define DRFLAC_HAS_BYTESWAP64_INTRINSIC
1206 #elif defined(__GNUC__)
1207 #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
1208 #define DRFLAC_HAS_BYTESWAP32_INTRINSIC
1209 #define DRFLAC_HAS_BYTESWAP64_INTRINSIC
1211 #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
1212 #define DRFLAC_HAS_BYTESWAP16_INTRINSIC
1218 #ifndef DRFLAC_ASSERT
1220 #define DRFLAC_ASSERT(expression) assert(expression)
1222 #ifndef DRFLAC_MALLOC
1223 #define DRFLAC_MALLOC(sz) malloc((sz))
1225 #ifndef DRFLAC_REALLOC
1226 #define DRFLAC_REALLOC(p, sz) realloc((p), (sz))
1229 #define DRFLAC_FREE(p) free((p))
1231 #ifndef DRFLAC_COPY_MEMORY
1232 #define DRFLAC_COPY_MEMORY(dst, src, sz) memcpy((dst), (src), (sz))
1234 #ifndef DRFLAC_ZERO_MEMORY
1235 #define DRFLAC_ZERO_MEMORY(p, sz) memset((p), 0, (sz))
1238 #define DRFLAC_MAX_SIMD_VECTOR_SIZE 64
1241 #define DRFLAC_SUCCESS 0
1242 #define DRFLAC_ERROR -1
1243 #define DRFLAC_INVALID_ARGS -2
1244 #define DRFLAC_END_OF_STREAM -128
1245 #define DRFLAC_CRC_MISMATCH -129
1247 #define DRFLAC_SUBFRAME_CONSTANT 0
1248 #define DRFLAC_SUBFRAME_VERBATIM 1
1249 #define DRFLAC_SUBFRAME_FIXED 8
1250 #define DRFLAC_SUBFRAME_LPC 32
1251 #define DRFLAC_SUBFRAME_RESERVED 255
1253 #define DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE 0
1254 #define DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2 1
1256 #define DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT 0
1257 #define DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE 8
1258 #define DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE 9
1259 #define DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE 10
1261 #define drflac_align(x, a) ((((x) + (a) - 1) / (a)) * (a))
1265 #if defined(__has_feature)
1266 #if __has_feature(thread_sanitizer)
1267 #define DRFLAC_NO_THREAD_SANITIZE __attribute__((no_sanitize("thread")))
1269 #define DRFLAC_NO_THREAD_SANITIZE
1272 #define DRFLAC_NO_THREAD_SANITIZE
1275 #if defined(DRFLAC_HAS_LZCNT_INTRINSIC)
1279 #ifndef DRFLAC_NO_CPUID
1293 if (!isCPUCapsInitialized) {
1297 #if defined(DRFLAC_HAS_LZCNT_INTRINSIC)
1298 drflac__cpuid(info, 0x80000001);
1299 drflac__gIsLZCNTSupported = (info[2] & (1 << 5)) != 0;
1317 #if defined(DRFLAC_SUPPORT_NEON)
1318 #if defined(DRFLAC_ARM) && !defined(DRFLAC_NO_NEON)
1319 #if (defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64))
1337 #if defined(DRFLAC_HAS_LZCNT_INTRINSIC) && defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5)
1347 #if defined(DRFLAC_X86) || defined(DRFLAC_X64)
1349 #elif defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && __BYTE_ORDER == __LITTLE_ENDIAN
1353 return (*(
char*)&n) == 1;
1359 #ifdef DRFLAC_HAS_BYTESWAP16_INTRINSIC
1360 #if defined(_MSC_VER)
1361 return _byteswap_ushort(n);
1362 #elif defined(__GNUC__) || defined(__clang__)
1363 return __builtin_bswap16(n);
1365 #error "This compiler does not support the byte swap intrinsic."
1368 return ((n & 0xFF00) >> 8) |
1369 ((n & 0x00FF) << 8);
1375 #ifdef DRFLAC_HAS_BYTESWAP32_INTRINSIC
1376 #if defined(_MSC_VER)
1377 return _byteswap_ulong(n);
1378 #elif defined(__GNUC__) || defined(__clang__)
1379 #if defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 6) && !defined(DRFLAC_64BIT)
1382 __asm__ __volatile__ (
1383 #
if defined(DRFLAC_64BIT)
1384 "rev %w[out], %w[in]" : [out]
"=r"(r) : [in]
"r"(n)
1386 "rev %[out], %[in]" : [out]
"=r"(r) : [in]
"r"(n)
1391 return __builtin_bswap32(n);
1394 #error "This compiler does not support the byte swap intrinsic."
1397 return ((n & 0xFF000000) >> 24) |
1398 ((n & 0x00FF0000) >> 8) |
1399 ((n & 0x0000FF00) << 8) |
1400 ((n & 0x000000FF) << 24);
1406 #ifdef DRFLAC_HAS_BYTESWAP64_INTRINSIC
1407 #if defined(_MSC_VER)
1408 return _byteswap_uint64(n);
1409 #elif defined(__GNUC__) || defined(__clang__)
1410 return __builtin_bswap64(n);
1412 #error "This compiler does not support the byte swap intrinsic."
1468 result |= (n & 0x7F000000) >> 3;
1469 result |= (n & 0x007F0000) >> 2;
1470 result |= (n & 0x00007F00) >> 1;
1471 result |= (n & 0x0000007F) >> 0;
1480 0x00, 0x07, 0x0E, 0x09, 0x1C, 0x1B, 0x12, 0x15, 0x38, 0x3F, 0x36, 0x31, 0x24, 0x23, 0x2A, 0x2D,
1481 0x70, 0x77, 0x7E, 0x79, 0x6C, 0x6B, 0x62, 0x65, 0x48, 0x4F, 0x46, 0x41, 0x54, 0x53, 0x5A, 0x5D,
1482 0xE0, 0xE7, 0xEE, 0xE9, 0xFC, 0xFB, 0xF2, 0xF5, 0xD8, 0xDF, 0xD6, 0xD1, 0xC4, 0xC3, 0xCA, 0xCD,
1483 0x90, 0x97, 0x9E, 0x99, 0x8C, 0x8B, 0x82, 0x85, 0xA8, 0xAF, 0xA6, 0xA1, 0xB4, 0xB3, 0xBA, 0xBD,
1484 0xC7, 0xC0, 0xC9, 0xCE, 0xDB, 0xDC, 0xD5, 0xD2, 0xFF, 0xF8, 0xF1, 0xF6, 0xE3, 0xE4, 0xED, 0xEA,
1485 0xB7, 0xB0, 0xB9, 0xBE, 0xAB, 0xAC, 0xA5, 0xA2, 0x8F, 0x88, 0x81, 0x86, 0x93, 0x94, 0x9D, 0x9A,
1486 0x27, 0x20, 0x29, 0x2E, 0x3B, 0x3C, 0x35, 0x32, 0x1F, 0x18, 0x11, 0x16, 0x03, 0x04, 0x0D, 0x0A,
1487 0x57, 0x50, 0x59, 0x5E, 0x4B, 0x4C, 0x45, 0x42, 0x6F, 0x68, 0x61, 0x66, 0x73, 0x74, 0x7D, 0x7A,
1488 0x89, 0x8E, 0x87, 0x80, 0x95, 0x92, 0x9B, 0x9C, 0xB1, 0xB6, 0xBF, 0xB8, 0xAD, 0xAA, 0xA3, 0xA4,
1489 0xF9, 0xFE, 0xF7, 0xF0, 0xE5, 0xE2, 0xEB, 0xEC, 0xC1, 0xC6, 0xCF, 0xC8, 0xDD, 0xDA, 0xD3, 0xD4,
1490 0x69, 0x6E, 0x67, 0x60, 0x75, 0x72, 0x7B, 0x7C, 0x51, 0x56, 0x5F, 0x58, 0x4D, 0x4A, 0x43, 0x44,
1491 0x19, 0x1E, 0x17, 0x10, 0x05, 0x02, 0x0B, 0x0C, 0x21, 0x26, 0x2F, 0x28, 0x3D, 0x3A, 0x33, 0x34,
1492 0x4E, 0x49, 0x40, 0x47, 0x52, 0x55, 0x5C, 0x5B, 0x76, 0x71, 0x78, 0x7F, 0x6A, 0x6D, 0x64, 0x63,
1493 0x3E, 0x39, 0x30, 0x37, 0x22, 0x25, 0x2C, 0x2B, 0x06, 0x01, 0x08, 0x0F, 0x1A, 0x1D, 0x14, 0x13,
1494 0xAE, 0xA9, 0xA0, 0xA7, 0xB2, 0xB5, 0xBC, 0xBB, 0x96, 0x91, 0x98, 0x9F, 0x8A, 0x8D, 0x84, 0x83,
1495 0xDE, 0xD9, 0xD0, 0xD7, 0xC2, 0xC5, 0xCC, 0xCB, 0xE6, 0xE1, 0xE8, 0xEF, 0xFA, 0xFD, 0xF4, 0xF3
1499 0x0000, 0x8005, 0x800F, 0x000A, 0x801B, 0x001E, 0x0014, 0x8011,
1500 0x8033, 0x0036, 0x003C, 0x8039, 0x0028, 0x802D, 0x8027, 0x0022,
1501 0x8063, 0x0066, 0x006C, 0x8069, 0x0078, 0x807D, 0x8077, 0x0072,
1502 0x0050, 0x8055, 0x805F, 0x005A, 0x804B, 0x004E, 0x0044, 0x8041,
1503 0x80C3, 0x00C6, 0x00CC, 0x80C9, 0x00D8, 0x80DD, 0x80D7, 0x00D2,
1504 0x00F0, 0x80F5, 0x80FF, 0x00FA, 0x80EB, 0x00EE, 0x00E4, 0x80E1,
1505 0x00A0, 0x80A5, 0x80AF, 0x00AA, 0x80BB, 0x00BE, 0x00B4, 0x80B1,
1506 0x8093, 0x0096, 0x009C, 0x8099, 0x0088, 0x808D, 0x8087, 0x0082,
1507 0x8183, 0x0186, 0x018C, 0x8189, 0x0198, 0x819D, 0x8197, 0x0192,
1508 0x01B0, 0x81B5, 0x81BF, 0x01BA, 0x81AB, 0x01AE, 0x01A4, 0x81A1,
1509 0x01E0, 0x81E5, 0x81EF, 0x01EA, 0x81FB, 0x01FE, 0x01F4, 0x81F1,
1510 0x81D3, 0x01D6, 0x01DC, 0x81D9, 0x01C8, 0x81CD, 0x81C7, 0x01C2,
1511 0x0140, 0x8145, 0x814F, 0x014A, 0x815B, 0x015E, 0x0154, 0x8151,
1512 0x8173, 0x0176, 0x017C, 0x8179, 0x0168, 0x816D, 0x8167, 0x0162,
1513 0x8123, 0x0126, 0x012C, 0x8129, 0x0138, 0x813D, 0x8137, 0x0132,
1514 0x0110, 0x8115, 0x811F, 0x011A, 0x810B, 0x010E, 0x0104, 0x8101,
1515 0x8303, 0x0306, 0x030C, 0x8309, 0x0318, 0x831D, 0x8317, 0x0312,
1516 0x0330, 0x8335, 0x833F, 0x033A, 0x832B, 0x032E, 0x0324, 0x8321,
1517 0x0360, 0x8365, 0x836F, 0x036A, 0x837B, 0x037E, 0x0374, 0x8371,
1518 0x8353, 0x0356, 0x035C, 0x8359, 0x0348, 0x834D, 0x8347, 0x0342,
1519 0x03C0, 0x83C5, 0x83CF, 0x03CA, 0x83DB, 0x03DE, 0x03D4, 0x83D1,
1520 0x83F3, 0x03F6, 0x03FC, 0x83F9, 0x03E8, 0x83ED, 0x83E7, 0x03E2,
1521 0x83A3, 0x03A6, 0x03AC, 0x83A9, 0x03B8, 0x83BD, 0x83B7, 0x03B2,
1522 0x0390, 0x8395, 0x839F, 0x039A, 0x838B, 0x038E, 0x0384, 0x8381,
1523 0x0280, 0x8285, 0x828F, 0x028A, 0x829B, 0x029E, 0x0294, 0x8291,
1524 0x82B3, 0x02B6, 0x02BC, 0x82B9, 0x02A8, 0x82AD, 0x82A7, 0x02A2,
1525 0x82E3, 0x02E6, 0x02EC, 0x82E9, 0x02F8, 0x82FD, 0x82F7, 0x02F2,
1526 0x02D0, 0x82D5, 0x82DF, 0x02DA, 0x82CB, 0x02CE, 0x02C4, 0x82C1,
1527 0x8243, 0x0246, 0x024C, 0x8249, 0x0258, 0x825D, 0x8257, 0x0252,
1528 0x0270, 0x8275, 0x827F, 0x027A, 0x826B, 0x026E, 0x0264, 0x8261,
1529 0x0220, 0x8225, 0x822F, 0x022A, 0x823B, 0x023E, 0x0234, 0x8231,
1530 0x8213, 0x0216, 0x021C, 0x8219, 0x0208, 0x820D, 0x8207, 0x0202
1540 #ifdef DR_FLAC_NO_CRC
1549 for (
int i =
count-1; i >= 0; --i) {
1552 crc = ((crc << 1) | bit) ^ p;
1554 crc = ((crc << 1) | bit);
1564 0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F
1569 wholeBytes =
count >> 3;
1570 leftoverBits =
count - (wholeBytes*8);
1571 leftoverDataMask = leftoverDataMaskTable[leftoverBits];
1573 switch (wholeBytes) {
1578 case 0:
if (leftoverBits > 0) crc = (crc << leftoverBits) ^
drflac__crc8_table[(crc >> (8 - leftoverBits)) ^ (data & leftoverDataMask)];
1628 #ifdef DR_FLAC_NO_CRC
1637 for (
int i =
count-1; i >= 0; --i) {
1640 r = ((r << 1) | bit) ^ p;
1642 r = ((r << 1) | bit);
1653 0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F
1658 wholeBytes =
count >> 3;
1659 leftoverBits =
count & 7;
1660 leftoverDataMask = leftoverDataMaskTable[leftoverBits];
1662 switch (wholeBytes) {
1668 case 0:
if (leftoverBits > 0) crc = (crc << leftoverBits) ^
drflac__crc16_table[(crc >> (16 - leftoverBits)) ^ (data & leftoverDataMask)];
1677 #ifdef DR_FLAC_NO_CRC
1688 0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F
1693 wholeBytes =
count >> 3;
1694 leftoverBits =
count & 7;
1695 leftoverDataMask = leftoverDataMaskTable[leftoverBits];
1697 switch (wholeBytes) {
1707 case 0:
if (leftoverBits > 0) crc = (crc << leftoverBits) ^
drflac__crc16_table[(crc >> (16 - leftoverBits)) ^ (data & leftoverDataMask)];
1717 return drflac_crc16__64bit(crc, data,
count);
1719 return drflac_crc16__32bit(crc, data,
count);
1726 #define drflac__be2host__cache_line drflac__be2host_64
1728 #define drflac__be2host__cache_line drflac__be2host_32
1740 #define DRFLAC_CACHE_L1_SIZE_BYTES(bs) (sizeof((bs)->cache))
1741 #define DRFLAC_CACHE_L1_SIZE_BITS(bs) (sizeof((bs)->cache)*8)
1742 #define DRFLAC_CACHE_L1_BITS_REMAINING(bs) (DRFLAC_CACHE_L1_SIZE_BITS(bs) - (bs)->consumedBits)
1743 #define DRFLAC_CACHE_L1_SELECTION_MASK(_bitCount) (~((~(drflac_cache_t)0) >> (_bitCount)))
1744 #define DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, _bitCount) (DRFLAC_CACHE_L1_SIZE_BITS(bs) - (_bitCount))
1745 #define DRFLAC_CACHE_L1_SELECT(bs, _bitCount) (((bs)->cache) & DRFLAC_CACHE_L1_SELECTION_MASK(_bitCount))
1746 #define DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, _bitCount) (DRFLAC_CACHE_L1_SELECT((bs), (_bitCount)) >> DRFLAC_CACHE_L1_SELECTION_SHIFT((bs), (_bitCount)))
1747 #define DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE(bs, _bitCount)(DRFLAC_CACHE_L1_SELECT((bs), (_bitCount)) >> (DRFLAC_CACHE_L1_SELECTION_SHIFT((bs), (_bitCount)) & (DRFLAC_CACHE_L1_SIZE_BITS(bs)-1)))
1748 #define DRFLAC_CACHE_L2_SIZE_BYTES(bs) (sizeof((bs)->cacheL2))
1749 #define DRFLAC_CACHE_L2_LINE_COUNT(bs) (DRFLAC_CACHE_L2_SIZE_BYTES(bs) / sizeof((bs)->cacheL2[0]))
1750 #define DRFLAC_CACHE_L2_LINES_REMAINING(bs) (DRFLAC_CACHE_L2_LINE_COUNT(bs) - (bs)->nextL2Line)
1753 #ifndef DR_FLAC_NO_CRC
1799 size_t alignedL1LineCount;
1838 if (alignedL1LineCount > 0) {
1841 for (i = alignedL1LineCount; i > 0; --i) {
1859 #ifndef DR_FLAC_NO_CRC
1867 #ifndef DR_FLAC_NO_CRC
1881 if (bytesRead == 0) {
1893 #ifndef DR_FLAC_NO_CRC
1908 #ifndef DR_FLAC_NO_CRC
1937 bs->
cache <<= bitCount;
1942 bs->
cache <<= bitCount;
1964 bs->
cache <<= bitCountLo;
1983 signbit = ((result >> (bitCount-1)) & 0x01);
1984 result |= (~signbit + 1) << bitCount;
2021 if (!drflac__read_uint64(bs, bitCount, &result)) {
2025 signbit = ((result >> (bitCount-1)) & 0x01);
2026 result |= (~signbit + 1) << bitCount;
2108 bs->
cache <<= bitsToSeek;
2136 while (bitsToSeek >= 8) {
2145 if (bitsToSeek > 0) {
2175 #ifndef DR_FLAC_NO_CRC
2204 #if defined(DRFLAC_HAS_LZCNT_INTRINSIC)
2205 #define DRFLAC_IMPLEMENT_CLZ_LZCNT
2207 #if defined(_MSC_VER) && _MSC_VER >= 1400 && (defined(DRFLAC_X64) || defined(DRFLAC_X86))
2208 #define DRFLAC_IMPLEMENT_CLZ_MSVC
2219 1, 1, 1, 1, 1, 1, 1, 1
2226 n = clz_table_4[x >> (
sizeof(x)*8 - 4)];
2229 if ((x & ((
drflac_uint64)0xFFFFFFFF << 32)) == 0) { n = 32; x <<= 32; }
2230 if ((x & ((
drflac_uint64)0xFFFF0000 << 32)) == 0) { n += 16; x <<= 16; }
2231 if ((x & ((
drflac_uint64)0xFF000000 << 32)) == 0) { n += 8; x <<= 8; }
2232 if ((x & ((
drflac_uint64)0xF0000000 << 32)) == 0) { n += 4; x <<= 4; }
2234 if ((x & 0xFFFF0000) == 0) { n = 16; x <<= 16; }
2235 if ((x & 0xFF000000) == 0) { n += 8; x <<= 8; }
2236 if ((x & 0xF0000000) == 0) { n += 4; x <<= 4; }
2238 n += clz_table_4[x >> (
sizeof(x)*8 - 4)];
2244 #ifdef DRFLAC_IMPLEMENT_CLZ_LZCNT
2248 #if defined(DRFLAC_HAS_LZCNT_INTRINSIC) && defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5)
2252 #ifdef DRFLAC_HAS_LZCNT_INTRINSIC
2253 return drflac__gIsLZCNTSupported;
2262 #if defined(_MSC_VER) && !defined(__clang__)
2269 #if defined(__GNUC__) || defined(__clang__)
2270 #if defined(DRFLAC_X64)
2273 __asm__ __volatile__ (
2274 "lzcnt{ %1, %0| %0, %1}" :
"=r"(r) :
"r"(x)
2279 #elif defined(DRFLAC_X86)
2282 __asm__ __volatile__ (
2283 "lzcnt{l %1, %0| %0, %1}" :
"=r"(r) :
"r"(x)
2288 #elif defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5) && !defined(DRFLAC_64BIT)
2291 __asm__ __volatile__ (
2292 #
if defined(DRFLAC_64BIT)
2293 "clz %w[out], %w[in]" : [out]
"=r"(r) : [in]
"r"(x)
2295 "clz %[out], %[in]" : [out]
"=r"(r) : [in]
"r"(x)
2313 #error "This compiler does not support the lzcnt intrinsic."
2319 #ifdef DRFLAC_IMPLEMENT_CLZ_MSVC
2331 _BitScanReverse64((
unsigned long*)&n, x);
2333 _BitScanReverse((
unsigned long*)&n, x);
2335 return sizeof(x)*8 - n - 1;
2341 #ifdef DRFLAC_IMPLEMENT_CLZ_LZCNT
2342 if (drflac__is_lzcnt_supported()) {
2343 return drflac__clz_lzcnt(x);
2347 #ifdef DRFLAC_IMPLEMENT_CLZ_MSVC
2348 return drflac__clz_msvc(x);
2361 while (bs->
cache == 0) {
2369 setBitOffsetPlus1 += 1;
2372 bs->
cache <<= setBitOffsetPlus1;
2374 *pOffsetOut = zeroCounter + setBitOffsetPlus1 - 1;
2390 if (offsetFromStart > 0x7FFFFFFF) {
2395 bytesRemaining -= 0x7FFFFFFF;
2397 while (bytesRemaining > 0x7FFFFFFF) {
2401 bytesRemaining -= 0x7FFFFFFF;
2404 if (bytesRemaining > 0) {
2425 unsigned char utf8[7] = {0};
2437 return DRFLAC_END_OF_STREAM;
2441 if ((utf8[0] & 0x80) == 0) {
2442 *pNumberOut = utf8[0];
2448 if ((utf8[0] & 0xE0) == 0xC0) {
2450 }
else if ((utf8[0] & 0xF0) == 0xE0) {
2452 }
else if ((utf8[0] & 0xF8) == 0xF0) {
2454 }
else if ((utf8[0] & 0xFC) == 0xF8) {
2456 }
else if ((utf8[0] & 0xFE) == 0xFC) {
2458 }
else if ((utf8[0] & 0xFF) == 0xFE) {
2468 result = (
drflac_uint64)(utf8[0] & (0xFF >> (byteCount + 1)));
2469 for (i = 1; i < byteCount; ++i) {
2472 return DRFLAC_END_OF_STREAM;
2476 result = (result << 6) | (utf8[i] & 0x3F);
2479 *pNumberOut = result;
2503 case 32: prediction += coefficients[31] * pDecodedSamples[-32];
2504 case 31: prediction += coefficients[30] * pDecodedSamples[-31];
2505 case 30: prediction += coefficients[29] * pDecodedSamples[-30];
2506 case 29: prediction += coefficients[28] * pDecodedSamples[-29];
2507 case 28: prediction += coefficients[27] * pDecodedSamples[-28];
2508 case 27: prediction += coefficients[26] * pDecodedSamples[-27];
2509 case 26: prediction += coefficients[25] * pDecodedSamples[-26];
2510 case 25: prediction += coefficients[24] * pDecodedSamples[-25];
2511 case 24: prediction += coefficients[23] * pDecodedSamples[-24];
2512 case 23: prediction += coefficients[22] * pDecodedSamples[-23];
2513 case 22: prediction += coefficients[21] * pDecodedSamples[-22];
2514 case 21: prediction += coefficients[20] * pDecodedSamples[-21];
2515 case 20: prediction += coefficients[19] * pDecodedSamples[-20];
2516 case 19: prediction += coefficients[18] * pDecodedSamples[-19];
2517 case 18: prediction += coefficients[17] * pDecodedSamples[-18];
2518 case 17: prediction += coefficients[16] * pDecodedSamples[-17];
2519 case 16: prediction += coefficients[15] * pDecodedSamples[-16];
2520 case 15: prediction += coefficients[14] * pDecodedSamples[-15];
2521 case 14: prediction += coefficients[13] * pDecodedSamples[-14];
2522 case 13: prediction += coefficients[12] * pDecodedSamples[-13];
2523 case 12: prediction += coefficients[11] * pDecodedSamples[-12];
2524 case 11: prediction += coefficients[10] * pDecodedSamples[-11];
2525 case 10: prediction += coefficients[ 9] * pDecodedSamples[-10];
2526 case 9: prediction += coefficients[ 8] * pDecodedSamples[- 9];
2527 case 8: prediction += coefficients[ 7] * pDecodedSamples[- 8];
2528 case 7: prediction += coefficients[ 6] * pDecodedSamples[- 7];
2529 case 6: prediction += coefficients[ 5] * pDecodedSamples[- 6];
2530 case 5: prediction += coefficients[ 4] * pDecodedSamples[- 5];
2531 case 4: prediction += coefficients[ 3] * pDecodedSamples[- 4];
2532 case 3: prediction += coefficients[ 2] * pDecodedSamples[- 3];
2533 case 2: prediction += coefficients[ 1] * pDecodedSamples[- 2];
2534 case 1: prediction += coefficients[ 0] * pDecodedSamples[- 1];
2549 #ifndef DRFLAC_64BIT
2552 prediction = coefficients[0] * (
drflac_int64)pDecodedSamples[-1];
2553 prediction += coefficients[1] * (
drflac_int64)pDecodedSamples[-2];
2554 prediction += coefficients[2] * (
drflac_int64)pDecodedSamples[-3];
2555 prediction += coefficients[3] * (
drflac_int64)pDecodedSamples[-4];
2556 prediction += coefficients[4] * (
drflac_int64)pDecodedSamples[-5];
2557 prediction += coefficients[5] * (
drflac_int64)pDecodedSamples[-6];
2558 prediction += coefficients[6] * (
drflac_int64)pDecodedSamples[-7];
2559 prediction += coefficients[7] * (
drflac_int64)pDecodedSamples[-8];
2561 else if (order == 7)
2563 prediction = coefficients[0] * (
drflac_int64)pDecodedSamples[-1];
2564 prediction += coefficients[1] * (
drflac_int64)pDecodedSamples[-2];
2565 prediction += coefficients[2] * (
drflac_int64)pDecodedSamples[-3];
2566 prediction += coefficients[3] * (
drflac_int64)pDecodedSamples[-4];
2567 prediction += coefficients[4] * (
drflac_int64)pDecodedSamples[-5];
2568 prediction += coefficients[5] * (
drflac_int64)pDecodedSamples[-6];
2569 prediction += coefficients[6] * (
drflac_int64)pDecodedSamples[-7];
2571 else if (order == 3)
2573 prediction = coefficients[0] * (
drflac_int64)pDecodedSamples[-1];
2574 prediction += coefficients[1] * (
drflac_int64)pDecodedSamples[-2];
2575 prediction += coefficients[2] * (
drflac_int64)pDecodedSamples[-3];
2577 else if (order == 6)
2579 prediction = coefficients[0] * (
drflac_int64)pDecodedSamples[-1];
2580 prediction += coefficients[1] * (
drflac_int64)pDecodedSamples[-2];
2581 prediction += coefficients[2] * (
drflac_int64)pDecodedSamples[-3];
2582 prediction += coefficients[3] * (
drflac_int64)pDecodedSamples[-4];
2583 prediction += coefficients[4] * (
drflac_int64)pDecodedSamples[-5];
2584 prediction += coefficients[5] * (
drflac_int64)pDecodedSamples[-6];
2586 else if (order == 5)
2588 prediction = coefficients[0] * (
drflac_int64)pDecodedSamples[-1];
2589 prediction += coefficients[1] * (
drflac_int64)pDecodedSamples[-2];
2590 prediction += coefficients[2] * (
drflac_int64)pDecodedSamples[-3];
2591 prediction += coefficients[3] * (
drflac_int64)pDecodedSamples[-4];
2592 prediction += coefficients[4] * (
drflac_int64)pDecodedSamples[-5];
2594 else if (order == 4)
2596 prediction = coefficients[0] * (
drflac_int64)pDecodedSamples[-1];
2597 prediction += coefficients[1] * (
drflac_int64)pDecodedSamples[-2];
2598 prediction += coefficients[2] * (
drflac_int64)pDecodedSamples[-3];
2599 prediction += coefficients[3] * (
drflac_int64)pDecodedSamples[-4];
2601 else if (order == 12)
2603 prediction = coefficients[0] * (
drflac_int64)pDecodedSamples[-1];
2604 prediction += coefficients[1] * (
drflac_int64)pDecodedSamples[-2];
2605 prediction += coefficients[2] * (
drflac_int64)pDecodedSamples[-3];
2606 prediction += coefficients[3] * (
drflac_int64)pDecodedSamples[-4];
2607 prediction += coefficients[4] * (
drflac_int64)pDecodedSamples[-5];
2608 prediction += coefficients[5] * (
drflac_int64)pDecodedSamples[-6];
2609 prediction += coefficients[6] * (
drflac_int64)pDecodedSamples[-7];
2610 prediction += coefficients[7] * (
drflac_int64)pDecodedSamples[-8];
2611 prediction += coefficients[8] * (
drflac_int64)pDecodedSamples[-9];
2612 prediction += coefficients[9] * (
drflac_int64)pDecodedSamples[-10];
2613 prediction += coefficients[10] * (
drflac_int64)pDecodedSamples[-11];
2614 prediction += coefficients[11] * (
drflac_int64)pDecodedSamples[-12];
2616 else if (order == 2)
2618 prediction = coefficients[0] * (
drflac_int64)pDecodedSamples[-1];
2619 prediction += coefficients[1] * (
drflac_int64)pDecodedSamples[-2];
2621 else if (order == 1)
2623 prediction = coefficients[0] * (
drflac_int64)pDecodedSamples[-1];
2625 else if (order == 10)
2627 prediction = coefficients[0] * (
drflac_int64)pDecodedSamples[-1];
2628 prediction += coefficients[1] * (
drflac_int64)pDecodedSamples[-2];
2629 prediction += coefficients[2] * (
drflac_int64)pDecodedSamples[-3];
2630 prediction += coefficients[3] * (
drflac_int64)pDecodedSamples[-4];
2631 prediction += coefficients[4] * (
drflac_int64)pDecodedSamples[-5];
2632 prediction += coefficients[5] * (
drflac_int64)pDecodedSamples[-6];
2633 prediction += coefficients[6] * (
drflac_int64)pDecodedSamples[-7];
2634 prediction += coefficients[7] * (
drflac_int64)pDecodedSamples[-8];
2635 prediction += coefficients[8] * (
drflac_int64)pDecodedSamples[-9];
2636 prediction += coefficients[9] * (
drflac_int64)pDecodedSamples[-10];
2638 else if (order == 9)
2640 prediction = coefficients[0] * (
drflac_int64)pDecodedSamples[-1];
2641 prediction += coefficients[1] * (
drflac_int64)pDecodedSamples[-2];
2642 prediction += coefficients[2] * (
drflac_int64)pDecodedSamples[-3];
2643 prediction += coefficients[3] * (
drflac_int64)pDecodedSamples[-4];
2644 prediction += coefficients[4] * (
drflac_int64)pDecodedSamples[-5];
2645 prediction += coefficients[5] * (
drflac_int64)pDecodedSamples[-6];
2646 prediction += coefficients[6] * (
drflac_int64)pDecodedSamples[-7];
2647 prediction += coefficients[7] * (
drflac_int64)pDecodedSamples[-8];
2648 prediction += coefficients[8] * (
drflac_int64)pDecodedSamples[-9];
2650 else if (order == 11)
2652 prediction = coefficients[0] * (
drflac_int64)pDecodedSamples[-1];
2653 prediction += coefficients[1] * (
drflac_int64)pDecodedSamples[-2];
2654 prediction += coefficients[2] * (
drflac_int64)pDecodedSamples[-3];
2655 prediction += coefficients[3] * (
drflac_int64)pDecodedSamples[-4];
2656 prediction += coefficients[4] * (
drflac_int64)pDecodedSamples[-5];
2657 prediction += coefficients[5] * (
drflac_int64)pDecodedSamples[-6];
2658 prediction += coefficients[6] * (
drflac_int64)pDecodedSamples[-7];
2659 prediction += coefficients[7] * (
drflac_int64)pDecodedSamples[-8];
2660 prediction += coefficients[8] * (
drflac_int64)pDecodedSamples[-9];
2661 prediction += coefficients[9] * (
drflac_int64)pDecodedSamples[-10];
2662 prediction += coefficients[10] * (
drflac_int64)pDecodedSamples[-11];
2669 for (j = 0; j < (int)order; ++j) {
2670 prediction += coefficients[j] * (
drflac_int64)pDecodedSamples[-j-1];
2683 case 32: prediction += coefficients[31] * (
drflac_int64)pDecodedSamples[-32];
2684 case 31: prediction += coefficients[30] * (
drflac_int64)pDecodedSamples[-31];
2685 case 30: prediction += coefficients[29] * (
drflac_int64)pDecodedSamples[-30];
2686 case 29: prediction += coefficients[28] * (
drflac_int64)pDecodedSamples[-29];
2687 case 28: prediction += coefficients[27] * (
drflac_int64)pDecodedSamples[-28];
2688 case 27: prediction += coefficients[26] * (
drflac_int64)pDecodedSamples[-27];
2689 case 26: prediction += coefficients[25] * (
drflac_int64)pDecodedSamples[-26];
2690 case 25: prediction += coefficients[24] * (
drflac_int64)pDecodedSamples[-25];
2691 case 24: prediction += coefficients[23] * (
drflac_int64)pDecodedSamples[-24];
2692 case 23: prediction += coefficients[22] * (
drflac_int64)pDecodedSamples[-23];
2693 case 22: prediction += coefficients[21] * (
drflac_int64)pDecodedSamples[-22];
2694 case 21: prediction += coefficients[20] * (
drflac_int64)pDecodedSamples[-21];
2695 case 20: prediction += coefficients[19] * (
drflac_int64)pDecodedSamples[-20];
2696 case 19: prediction += coefficients[18] * (
drflac_int64)pDecodedSamples[-19];
2697 case 18: prediction += coefficients[17] * (
drflac_int64)pDecodedSamples[-18];
2698 case 17: prediction += coefficients[16] * (
drflac_int64)pDecodedSamples[-17];
2699 case 16: prediction += coefficients[15] * (
drflac_int64)pDecodedSamples[-16];
2700 case 15: prediction += coefficients[14] * (
drflac_int64)pDecodedSamples[-15];
2701 case 14: prediction += coefficients[13] * (
drflac_int64)pDecodedSamples[-14];
2702 case 13: prediction += coefficients[12] * (
drflac_int64)pDecodedSamples[-13];
2703 case 12: prediction += coefficients[11] * (
drflac_int64)pDecodedSamples[-12];
2704 case 11: prediction += coefficients[10] * (
drflac_int64)pDecodedSamples[-11];
2705 case 10: prediction += coefficients[ 9] * (
drflac_int64)pDecodedSamples[-10];
2706 case 9: prediction += coefficients[ 8] * (
drflac_int64)pDecodedSamples[- 9];
2707 case 8: prediction += coefficients[ 7] * (
drflac_int64)pDecodedSamples[- 8];
2708 case 7: prediction += coefficients[ 6] * (
drflac_int64)pDecodedSamples[- 7];
2709 case 6: prediction += coefficients[ 5] * (
drflac_int64)pDecodedSamples[- 6];
2710 case 5: prediction += coefficients[ 4] * (
drflac_int64)pDecodedSamples[- 5];
2711 case 4: prediction += coefficients[ 3] * (
drflac_int64)pDecodedSamples[- 4];
2712 case 3: prediction += coefficients[ 2] * (
drflac_int64)pDecodedSamples[- 3];
2713 case 2: prediction += coefficients[ 1] * (
drflac_int64)pDecodedSamples[- 2];
2714 case 1: prediction += coefficients[ 0] * (
drflac_int64)pDecodedSamples[- 1];
2735 for (i = 0; i <
count; ++i) {
2751 if (riceParam > 0) {
2759 decodedRice |= (zeroCounter << riceParam);
2760 if ((decodedRice & 0x01)) {
2761 decodedRice = ~(decodedRice >> 1);
2763 decodedRice = (decodedRice >> 1);
2767 if (bitsPerSample+shift >= 32) {
2797 if (riceParam > 0) {
2805 *pZeroCounterOut = zeroCounter;
2806 *pRiceParamPartOut = decodedRice;
2825 while (bs->
cache == 0) {
2833 zeroCounter += setBitOffsetPlus1;
2834 setBitOffsetPlus1 += 1;
2836 riceLength = setBitOffsetPlus1 + riceParam;
2841 bs->
cache <<= riceLength;
2854 #ifndef DR_FLAC_NO_CRC
2859 #ifndef DR_FLAC_NO_CRC
2872 bs->
cache <<= bitCountLo;
2875 pZeroCounterOut[0] = zeroCounter;
2876 pRiceParamPartOut[0] = riceParamPart;
2898 if (lzcount <
sizeof(bs_cache)*8) {
2899 pZeroCounterOut[0] = lzcount;
2906 extract_rice_param_part:
2907 bs_cache <<= lzcount;
2908 bs_consumedBits += lzcount;
2910 if (bs_consumedBits <= riceParamPlus1MaxConsumedBits) {
2912 pRiceParamPartOut[0] = (
drflac_uint32)(bs_cache >> riceParamPlus1Shift);
2913 bs_cache <<= riceParamPlus1;
2914 bs_consumedBits += riceParamPlus1;
2926 riceParamPartHi = (
drflac_uint32)(bs_cache >> riceParamPlus1Shift);
2929 riceParamPartLoBitCount = bs_consumedBits - riceParamPlus1MaxConsumedBits;
2930 DRFLAC_ASSERT(riceParamPartLoBitCount > 0 && riceParamPartLoBitCount < 32);
2934 #ifndef DR_FLAC_NO_CRC
2938 bs_consumedBits = riceParamPartLoBitCount;
2939 #ifndef DR_FLAC_NO_CRC
2948 bs_cache = bs->
cache;
2949 bs_consumedBits = bs->
consumedBits + riceParamPartLoBitCount;
2954 pRiceParamPartOut[0] = riceParamPartHi | riceParamPartLo;
2956 bs_cache <<= riceParamPartLoBitCount;
2966 #ifndef DR_FLAC_NO_CRC
2970 bs_consumedBits = 0;
2971 #ifndef DR_FLAC_NO_CRC
2980 bs_cache = bs->
cache;
2985 zeroCounter += lzcount;
2987 if (lzcount <
sizeof(bs_cache)*8) {
2992 pZeroCounterOut[0] = zeroCounter;
2993 goto extract_rice_param_part;
2997 bs->
cache = bs_cache;
3017 if (lzcount <
sizeof(bs_cache)*8) {
3023 extract_rice_param_part:
3024 bs_cache <<= lzcount;
3025 bs_consumedBits += lzcount;
3027 if (bs_consumedBits <= riceParamPlus1MaxConsumedBits) {
3029 bs_cache <<= riceParamPlus1;
3030 bs_consumedBits += riceParamPlus1;
3038 drflac_uint32 riceParamPartLoBitCount = bs_consumedBits - riceParamPlus1MaxConsumedBits;
3039 DRFLAC_ASSERT(riceParamPartLoBitCount > 0 && riceParamPartLoBitCount < 32);
3043 #ifndef DR_FLAC_NO_CRC
3047 bs_consumedBits = riceParamPartLoBitCount;
3048 #ifndef DR_FLAC_NO_CRC
3057 bs_cache = bs->
cache;
3058 bs_consumedBits = bs->
consumedBits + riceParamPartLoBitCount;
3061 bs_cache <<= riceParamPartLoBitCount;
3070 #ifndef DR_FLAC_NO_CRC
3074 bs_consumedBits = 0;
3075 #ifndef DR_FLAC_NO_CRC
3084 bs_cache = bs->
cache;
3089 if (lzcount <
sizeof(bs_cache)*8) {
3094 goto extract_rice_param_part;
3098 bs->
cache = bs_cache;
3117 (void)bitsPerSample;
3132 riceParamPart0 &= riceParamMask;
3133 riceParamPart0 |= (zeroCountPart0 << riceParam);
3134 riceParamPart0 = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
3136 pSamplesOut[i] = riceParamPart0;
3168 pSamplesOutEnd = pSamplesOut + (
count & ~3);
3170 if (bitsPerSample+shift > 32) {
3171 while (pSamplesOut < pSamplesOutEnd) {
3183 riceParamPart0 &= riceParamMask;
3184 riceParamPart1 &= riceParamMask;
3185 riceParamPart2 &= riceParamMask;
3186 riceParamPart3 &= riceParamMask;
3188 riceParamPart0 |= (zeroCountPart0 << riceParam);
3189 riceParamPart1 |= (zeroCountPart1 << riceParam);
3190 riceParamPart2 |= (zeroCountPart2 << riceParam);
3191 riceParamPart3 |= (zeroCountPart3 << riceParam);
3193 riceParamPart0 = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
3194 riceParamPart1 = (riceParamPart1 >> 1) ^ t[riceParamPart1 & 0x01];
3195 riceParamPart2 = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01];
3196 riceParamPart3 = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01];
3206 while (pSamplesOut < pSamplesOutEnd) {
3214 riceParamPart0 &= riceParamMask;
3215 riceParamPart1 &= riceParamMask;
3216 riceParamPart2 &= riceParamMask;
3217 riceParamPart3 &= riceParamMask;
3219 riceParamPart0 |= (zeroCountPart0 << riceParam);
3220 riceParamPart1 |= (zeroCountPart1 << riceParam);
3221 riceParamPart2 |= (zeroCountPart2 << riceParam);
3222 riceParamPart3 |= (zeroCountPart3 << riceParam);
3224 riceParamPart0 = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
3225 riceParamPart1 = (riceParamPart1 >> 1) ^ t[riceParamPart1 & 0x01];
3226 riceParamPart2 = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01];
3227 riceParamPart3 = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01];
3246 riceParamPart0 &= riceParamMask;
3247 riceParamPart0 |= (zeroCountPart0 << riceParam);
3248 riceParamPart0 = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
3252 if (bitsPerSample+shift > 32) {
3265 #if defined(DRFLAC_SUPPORT_SSE2)
3266 static DRFLAC_INLINE __m128i drflac__mm_packs_interleaved_epi32(__m128i a, __m128i b)
3271 r = _mm_packs_epi32(a, b);
3274 r = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 1, 2, 0));
3277 r = _mm_shufflehi_epi16(r, _MM_SHUFFLE(3, 1, 2, 0));
3278 r = _mm_shufflelo_epi16(r, _MM_SHUFFLE(3, 1, 2, 0));
3284 #if defined(DRFLAC_SUPPORT_SSE41)
3285 static DRFLAC_INLINE __m128i drflac__mm_not_si128(__m128i a)
3287 return _mm_xor_si128(a, _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128()));
3290 static DRFLAC_INLINE __m128i drflac__mm_hadd_epi32(__m128i x)
3292 __m128i x64 = _mm_add_epi32(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2)));
3293 __m128i x32 = _mm_shufflelo_epi16(x64, _MM_SHUFFLE(1, 0, 3, 2));
3294 return _mm_add_epi32(x64, x32);
3297 static DRFLAC_INLINE __m128i drflac__mm_hadd_epi64(__m128i x)
3299 return _mm_add_epi64(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2)));
3308 __m128i lo = _mm_srli_epi64(x,
count);
3309 __m128i hi = _mm_srai_epi32(x,
count);
3311 hi = _mm_and_si128(hi, _mm_set_epi32(0xFFFFFFFF, 0, 0xFFFFFFFF, 0));
3313 return _mm_or_si128(lo, hi);
3330 __m128i coefficients128_0;
3331 __m128i coefficients128_4;
3332 __m128i coefficients128_8;
3333 __m128i samples128_0;
3334 __m128i samples128_4;
3335 __m128i samples128_8;
3336 __m128i riceParamMask128;
3341 riceParamMask128 = _mm_set1_epi32(riceParamMask);
3344 coefficients128_0 = _mm_setzero_si128();
3345 coefficients128_4 = _mm_setzero_si128();
3346 coefficients128_8 = _mm_setzero_si128();
3348 samples128_0 = _mm_setzero_si128();
3349 samples128_4 = _mm_setzero_si128();
3350 samples128_8 = _mm_setzero_si128();
3360 int runningOrder = order;
3363 if (runningOrder >= 4) {
3364 coefficients128_0 = _mm_loadu_si128((
const __m128i*)(coefficients + 0));
3365 samples128_0 = _mm_loadu_si128((
const __m128i*)(pSamplesOut - 4));
3368 switch (runningOrder) {
3369 case 3: coefficients128_0 = _mm_set_epi32(0, coefficients[2], coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], pSamplesOut[-3], 0);
break;
3370 case 2: coefficients128_0 = _mm_set_epi32(0, 0, coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], 0, 0);
break;
3371 case 1: coefficients128_0 = _mm_set_epi32(0, 0, 0, coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], 0, 0, 0);
break;
3377 if (runningOrder >= 4) {
3378 coefficients128_4 = _mm_loadu_si128((
const __m128i*)(coefficients + 4));
3379 samples128_4 = _mm_loadu_si128((
const __m128i*)(pSamplesOut - 8));
3382 switch (runningOrder) {
3383 case 3: coefficients128_4 = _mm_set_epi32(0, coefficients[6], coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], pSamplesOut[-7], 0);
break;
3384 case 2: coefficients128_4 = _mm_set_epi32(0, 0, coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], 0, 0);
break;
3385 case 1: coefficients128_4 = _mm_set_epi32(0, 0, 0, coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], 0, 0, 0);
break;
3391 if (runningOrder == 4) {
3392 coefficients128_8 = _mm_loadu_si128((
const __m128i*)(coefficients + 8));
3393 samples128_8 = _mm_loadu_si128((
const __m128i*)(pSamplesOut - 12));
3396 switch (runningOrder) {
3397 case 3: coefficients128_8 = _mm_set_epi32(0, coefficients[10], coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], pSamplesOut[-11], 0);
break;
3398 case 2: coefficients128_8 = _mm_set_epi32(0, 0, coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], 0, 0);
break;
3399 case 1: coefficients128_8 = _mm_set_epi32(0, 0, 0, coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], 0, 0, 0);
break;
3405 coefficients128_0 = _mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(0, 1, 2, 3));
3406 coefficients128_4 = _mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(0, 1, 2, 3));
3407 coefficients128_8 = _mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(0, 1, 2, 3));
3413 case 12: ((
drflac_int32*)&coefficients128_8)[0] = coefficients[11]; ((
drflac_int32*)&samples128_8)[0] = pDecodedSamples[-12];
3414 case 11: ((
drflac_int32*)&coefficients128_8)[1] = coefficients[10]; ((
drflac_int32*)&samples128_8)[1] = pDecodedSamples[-11];
3415 case 10: ((
drflac_int32*)&coefficients128_8)[2] = coefficients[ 9]; ((
drflac_int32*)&samples128_8)[2] = pDecodedSamples[-10];
3416 case 9: ((
drflac_int32*)&coefficients128_8)[3] = coefficients[ 8]; ((
drflac_int32*)&samples128_8)[3] = pDecodedSamples[- 9];
3417 case 8: ((
drflac_int32*)&coefficients128_4)[0] = coefficients[ 7]; ((
drflac_int32*)&samples128_4)[0] = pDecodedSamples[- 8];
3418 case 7: ((
drflac_int32*)&coefficients128_4)[1] = coefficients[ 6]; ((
drflac_int32*)&samples128_4)[1] = pDecodedSamples[- 7];
3419 case 6: ((
drflac_int32*)&coefficients128_4)[2] = coefficients[ 5]; ((
drflac_int32*)&samples128_4)[2] = pDecodedSamples[- 6];
3420 case 5: ((
drflac_int32*)&coefficients128_4)[3] = coefficients[ 4]; ((
drflac_int32*)&samples128_4)[3] = pDecodedSamples[- 5];
3421 case 4: ((
drflac_int32*)&coefficients128_0)[0] = coefficients[ 3]; ((
drflac_int32*)&samples128_0)[0] = pDecodedSamples[- 4];
3422 case 3: ((
drflac_int32*)&coefficients128_0)[1] = coefficients[ 2]; ((
drflac_int32*)&samples128_0)[1] = pDecodedSamples[- 3];
3423 case 2: ((
drflac_int32*)&coefficients128_0)[2] = coefficients[ 1]; ((
drflac_int32*)&samples128_0)[2] = pDecodedSamples[- 2];
3424 case 1: ((
drflac_int32*)&coefficients128_0)[3] = coefficients[ 0]; ((
drflac_int32*)&samples128_0)[3] = pDecodedSamples[- 1];
3429 while (pDecodedSamples < pDecodedSamplesEnd) {
3430 __m128i prediction128;
3431 __m128i zeroCountPart128;
3432 __m128i riceParamPart128;
3441 zeroCountPart128 = _mm_set_epi32(zeroCountParts3, zeroCountParts2, zeroCountParts1, zeroCountParts0);
3442 riceParamPart128 = _mm_set_epi32(riceParamParts3, riceParamParts2, riceParamParts1, riceParamParts0);
3444 riceParamPart128 = _mm_and_si128(riceParamPart128, riceParamMask128);
3445 riceParamPart128 = _mm_or_si128(riceParamPart128, _mm_slli_epi32(zeroCountPart128, riceParam));
3446 riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_add_epi32(drflac__mm_not_si128(_mm_and_si128(riceParamPart128, _mm_set1_epi32(0x01))), _mm_set1_epi32(0x01)));
3450 for (i = 0; i < 4; i += 1) {
3451 prediction128 = _mm_mullo_epi32(coefficients128_0, samples128_0);
3454 prediction128 = drflac__mm_hadd_epi32(prediction128);
3455 prediction128 = _mm_srai_epi32(prediction128, shift);
3456 prediction128 = _mm_add_epi32(riceParamPart128, prediction128);
3458 samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4);
3459 riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4);
3461 }
else if (order <= 8) {
3462 for (i = 0; i < 4; i += 1) {
3463 prediction128 = _mm_mullo_epi32(coefficients128_4, samples128_4);
3464 prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_0, samples128_0));
3467 prediction128 = drflac__mm_hadd_epi32(prediction128);
3468 prediction128 = _mm_srai_epi32(prediction128, shift);
3469 prediction128 = _mm_add_epi32(riceParamPart128, prediction128);
3471 samples128_4 = _mm_alignr_epi8(samples128_0, samples128_4, 4);
3472 samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4);
3473 riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4);
3476 for (i = 0; i < 4; i += 1) {
3477 prediction128 = _mm_mullo_epi32(coefficients128_8, samples128_8);
3478 prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_4, samples128_4));
3479 prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_0, samples128_0));
3482 prediction128 = drflac__mm_hadd_epi32(prediction128);
3483 prediction128 = _mm_srai_epi32(prediction128, shift);
3484 prediction128 = _mm_add_epi32(riceParamPart128, prediction128);
3486 samples128_8 = _mm_alignr_epi8(samples128_4, samples128_8, 4);
3487 samples128_4 = _mm_alignr_epi8(samples128_0, samples128_4, 4);
3488 samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4);
3489 riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4);
3494 _mm_storeu_si128((__m128i*)pDecodedSamples, samples128_0);
3495 pDecodedSamples += 4;
3500 while (i < (
int)
count) {
3507 riceParamParts0 &= riceParamMask;
3508 riceParamParts0 |= (zeroCountParts0 << riceParam);
3509 riceParamParts0 = (riceParamParts0 >> 1) ^ t[riceParamParts0 & 0x01];
3515 pDecodedSamples += 1;
3535 __m128i coefficients128_0;
3536 __m128i coefficients128_4;
3537 __m128i coefficients128_8;
3538 __m128i samples128_0;
3539 __m128i samples128_4;
3540 __m128i samples128_8;
3541 __m128i prediction128;
3542 __m128i riceParamMask128;
3549 riceParamMask128 = _mm_set1_epi32(riceParamMask);
3551 prediction128 = _mm_setzero_si128();
3554 coefficients128_0 = _mm_setzero_si128();
3555 coefficients128_4 = _mm_setzero_si128();
3556 coefficients128_8 = _mm_setzero_si128();
3558 samples128_0 = _mm_setzero_si128();
3559 samples128_4 = _mm_setzero_si128();
3560 samples128_8 = _mm_setzero_si128();
3564 int runningOrder = order;
3567 if (runningOrder >= 4) {
3568 coefficients128_0 = _mm_loadu_si128((
const __m128i*)(coefficients + 0));
3569 samples128_0 = _mm_loadu_si128((
const __m128i*)(pSamplesOut - 4));
3572 switch (runningOrder) {
3573 case 3: coefficients128_0 = _mm_set_epi32(0, coefficients[2], coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], pSamplesOut[-3], 0);
break;
3574 case 2: coefficients128_0 = _mm_set_epi32(0, 0, coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], 0, 0);
break;
3575 case 1: coefficients128_0 = _mm_set_epi32(0, 0, 0, coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], 0, 0, 0);
break;
3581 if (runningOrder >= 4) {
3582 coefficients128_4 = _mm_loadu_si128((
const __m128i*)(coefficients + 4));
3583 samples128_4 = _mm_loadu_si128((
const __m128i*)(pSamplesOut - 8));
3586 switch (runningOrder) {
3587 case 3: coefficients128_4 = _mm_set_epi32(0, coefficients[6], coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], pSamplesOut[-7], 0);
break;
3588 case 2: coefficients128_4 = _mm_set_epi32(0, 0, coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], 0, 0);
break;
3589 case 1: coefficients128_4 = _mm_set_epi32(0, 0, 0, coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], 0, 0, 0);
break;
3595 if (runningOrder == 4) {
3596 coefficients128_8 = _mm_loadu_si128((
const __m128i*)(coefficients + 8));
3597 samples128_8 = _mm_loadu_si128((
const __m128i*)(pSamplesOut - 12));
3600 switch (runningOrder) {
3601 case 3: coefficients128_8 = _mm_set_epi32(0, coefficients[10], coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], pSamplesOut[-11], 0);
break;
3602 case 2: coefficients128_8 = _mm_set_epi32(0, 0, coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], 0, 0);
break;
3603 case 1: coefficients128_8 = _mm_set_epi32(0, 0, 0, coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], 0, 0, 0);
break;
3609 coefficients128_0 = _mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(0, 1, 2, 3));
3610 coefficients128_4 = _mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(0, 1, 2, 3));
3611 coefficients128_8 = _mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(0, 1, 2, 3));
3616 case 12: ((
drflac_int32*)&coefficients128_8)[0] = coefficients[11]; ((
drflac_int32*)&samples128_8)[0] = pDecodedSamples[-12];
3617 case 11: ((
drflac_int32*)&coefficients128_8)[1] = coefficients[10]; ((
drflac_int32*)&samples128_8)[1] = pDecodedSamples[-11];
3618 case 10: ((
drflac_int32*)&coefficients128_8)[2] = coefficients[ 9]; ((
drflac_int32*)&samples128_8)[2] = pDecodedSamples[-10];
3619 case 9: ((
drflac_int32*)&coefficients128_8)[3] = coefficients[ 8]; ((
drflac_int32*)&samples128_8)[3] = pDecodedSamples[- 9];
3620 case 8: ((
drflac_int32*)&coefficients128_4)[0] = coefficients[ 7]; ((
drflac_int32*)&samples128_4)[0] = pDecodedSamples[- 8];
3621 case 7: ((
drflac_int32*)&coefficients128_4)[1] = coefficients[ 6]; ((
drflac_int32*)&samples128_4)[1] = pDecodedSamples[- 7];
3622 case 6: ((
drflac_int32*)&coefficients128_4)[2] = coefficients[ 5]; ((
drflac_int32*)&samples128_4)[2] = pDecodedSamples[- 6];
3623 case 5: ((
drflac_int32*)&coefficients128_4)[3] = coefficients[ 4]; ((
drflac_int32*)&samples128_4)[3] = pDecodedSamples[- 5];
3624 case 4: ((
drflac_int32*)&coefficients128_0)[0] = coefficients[ 3]; ((
drflac_int32*)&samples128_0)[0] = pDecodedSamples[- 4];
3625 case 3: ((
drflac_int32*)&coefficients128_0)[1] = coefficients[ 2]; ((
drflac_int32*)&samples128_0)[1] = pDecodedSamples[- 3];
3626 case 2: ((
drflac_int32*)&coefficients128_0)[2] = coefficients[ 1]; ((
drflac_int32*)&samples128_0)[2] = pDecodedSamples[- 2];
3627 case 1: ((
drflac_int32*)&coefficients128_0)[3] = coefficients[ 0]; ((
drflac_int32*)&samples128_0)[3] = pDecodedSamples[- 1];
3632 while (pDecodedSamples < pDecodedSamplesEnd) {
3633 __m128i zeroCountPart128;
3634 __m128i riceParamPart128;
3643 zeroCountPart128 = _mm_set_epi32(zeroCountParts3, zeroCountParts2, zeroCountParts1, zeroCountParts0);
3644 riceParamPart128 = _mm_set_epi32(riceParamParts3, riceParamParts2, riceParamParts1, riceParamParts0);
3646 riceParamPart128 = _mm_and_si128(riceParamPart128, riceParamMask128);
3647 riceParamPart128 = _mm_or_si128(riceParamPart128, _mm_slli_epi32(zeroCountPart128, riceParam));
3648 riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_add_epi32(drflac__mm_not_si128(_mm_and_si128(riceParamPart128, _mm_set1_epi32(1))), _mm_set1_epi32(1)));
3650 for (i = 0; i < 4; i += 1) {
3651 prediction128 = _mm_xor_si128(prediction128, prediction128);
3656 case 11: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_8, _MM_SHUFFLE(1, 1, 0, 0))));
3658 case 9: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_8, _MM_SHUFFLE(3, 3, 2, 2))));
3660 case 7: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_4, _MM_SHUFFLE(1, 1, 0, 0))));
3662 case 5: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_4, _MM_SHUFFLE(3, 3, 2, 2))));
3664 case 3: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_0, _MM_SHUFFLE(1, 1, 0, 0))));
3666 case 1: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_0, _MM_SHUFFLE(3, 3, 2, 2))));
3670 prediction128 = drflac__mm_hadd_epi64(prediction128);
3671 prediction128 = drflac__mm_srai_epi64(prediction128, shift);
3672 prediction128 = _mm_add_epi32(riceParamPart128, prediction128);
3675 samples128_8 = _mm_alignr_epi8(samples128_4, samples128_8, 4);
3676 samples128_4 = _mm_alignr_epi8(samples128_0, samples128_4, 4);
3677 samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4);
3680 riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4);
3684 _mm_storeu_si128((__m128i*)pDecodedSamples, samples128_0);
3685 pDecodedSamples += 4;
3690 while (i < (
int)
count) {
3697 riceParamParts0 &= riceParamMask;
3698 riceParamParts0 |= (zeroCountParts0 << riceParam);
3699 riceParamParts0 = (riceParamParts0 >> 1) ^ t[riceParamParts0 & 0x01];
3705 pDecodedSamples += 1;
3718 if (order > 0 && order <= 12) {
3719 if (bitsPerSample+shift > 32) {
3720 return drflac__decode_samples_with_residual__rice__sse41_64(bs,
count, riceParam, order, shift, coefficients, pSamplesOut);
3722 return drflac__decode_samples_with_residual__rice__sse41_32(bs,
count, riceParam, order, shift, coefficients, pSamplesOut);
3730 #if defined(DRFLAC_SUPPORT_NEON)
3733 vst1q_s32(p+0, x.val[0]);
3734 vst1q_s32(p+4, x.val[1]);
3737 static DRFLAC_INLINE void drflac__vst2q_f32(
float* p, float32x4x2_t x)
3739 vst1q_f32(p+0, x.val[0]);
3740 vst1q_f32(p+4, x.val[1]);
3745 vst1q_s16(p, vcombine_s16(x.val[0], x.val[1]));
3755 return vld1q_s32(x);
3758 static DRFLAC_INLINE int32x4_t drflac__valignrq_s32_1(int32x4_t a, int32x4_t b)
3770 return vextq_s32(b, a, 1);
3773 static DRFLAC_INLINE uint32x4_t drflac__valignrq_u32_1(uint32x4_t a, uint32x4_t b)
3785 return vextq_u32(b, a, 1);
3788 static DRFLAC_INLINE int32x2_t drflac__vhaddq_s32(int32x4_t x)
3800 int32x2_t r = vadd_s32(vget_high_s32(x), vget_low_s32(x));
3801 return vpadd_s32(r, r);
3804 static DRFLAC_INLINE int64x1_t drflac__vhaddq_s64(int64x2_t x)
3806 return vadd_s64(vget_high_s64(x), vget_low_s64(x));
3809 static DRFLAC_INLINE int32x4_t drflac__vrevq_s32(int32x4_t x)
3819 return vrev64q_s32(vcombine_s32(vget_high_s32(x), vget_low_s32(x)));
3822 static DRFLAC_INLINE int32x4_t drflac__vnotq_s32(int32x4_t x)
3824 return veorq_s32(x, vdupq_n_s32(0xFFFFFFFF));
3827 static DRFLAC_INLINE uint32x4_t drflac__vnotq_u32(uint32x4_t x)
3829 return veorq_u32(x, vdupq_n_u32(0xFFFFFFFF));
3840 int32x4_t coefficients128_0;
3841 int32x4_t coefficients128_4;
3842 int32x4_t coefficients128_8;
3843 int32x4_t samples128_0;
3844 int32x4_t samples128_4;
3845 int32x4_t samples128_8;
3846 uint32x4_t riceParamMask128;
3847 int32x4_t riceParam128;
3853 riceParamMask = ~((~0UL) << riceParam);
3854 riceParamMask128 = vdupq_n_u32(riceParamMask);
3856 riceParam128 = vdupq_n_s32(riceParam);
3857 shift64 = vdup_n_s32(-shift);
3858 one128 = vdupq_n_u32(1);
3867 int runningOrder = order;
3872 if (runningOrder >= 4) {
3873 coefficients128_0 = vld1q_s32(coefficients + 0);
3874 samples128_0 = vld1q_s32(pSamplesOut - 4);
3877 switch (runningOrder) {
3878 case 3: tempC[2] = coefficients[2]; tempS[1] = pSamplesOut[-3];
3879 case 2: tempC[1] = coefficients[1]; tempS[2] = pSamplesOut[-2];
3880 case 1: tempC[0] = coefficients[0]; tempS[3] = pSamplesOut[-1];
3883 coefficients128_0 = vld1q_s32(tempC);
3884 samples128_0 = vld1q_s32(tempS);
3889 if (runningOrder >= 4) {
3890 coefficients128_4 = vld1q_s32(coefficients + 4);
3891 samples128_4 = vld1q_s32(pSamplesOut - 8);
3894 switch (runningOrder) {
3895 case 3: tempC[2] = coefficients[6]; tempS[1] = pSamplesOut[-7];
3896 case 2: tempC[1] = coefficients[5]; tempS[2] = pSamplesOut[-6];
3897 case 1: tempC[0] = coefficients[4]; tempS[3] = pSamplesOut[-5];
3900 coefficients128_4 = vld1q_s32(tempC);
3901 samples128_4 = vld1q_s32(tempS);
3906 if (runningOrder == 4) {
3907 coefficients128_8 = vld1q_s32(coefficients + 8);
3908 samples128_8 = vld1q_s32(pSamplesOut - 12);
3911 switch (runningOrder) {
3912 case 3: tempC[2] = coefficients[10]; tempS[1] = pSamplesOut[-11];
3913 case 2: tempC[1] = coefficients[ 9]; tempS[2] = pSamplesOut[-10];
3914 case 1: tempC[0] = coefficients[ 8]; tempS[3] = pSamplesOut[- 9];
3917 coefficients128_8 = vld1q_s32(tempC);
3918 samples128_8 = vld1q_s32(tempS);
3923 coefficients128_0 = drflac__vrevq_s32(coefficients128_0);
3924 coefficients128_4 = drflac__vrevq_s32(coefficients128_4);
3925 coefficients128_8 = drflac__vrevq_s32(coefficients128_8);
3929 while (pDecodedSamples < pDecodedSamplesEnd) {
3930 int32x4_t prediction128;
3931 int32x2_t prediction64;
3932 uint32x4_t zeroCountPart128;
3933 uint32x4_t riceParamPart128;
3942 zeroCountPart128 = vld1q_u32(zeroCountParts);
3943 riceParamPart128 = vld1q_u32(riceParamParts);
3945 riceParamPart128 = vandq_u32(riceParamPart128, riceParamMask128);
3946 riceParamPart128 = vorrq_u32(riceParamPart128, vshlq_u32(zeroCountPart128, riceParam128));
3947 riceParamPart128 = veorq_u32(vshrq_n_u32(riceParamPart128, 1), vaddq_u32(drflac__vnotq_u32(vandq_u32(riceParamPart128, one128)), one128));
3950 for (i = 0; i < 4; i += 1) {
3951 prediction128 = vmulq_s32(coefficients128_0, samples128_0);
3954 prediction64 = drflac__vhaddq_s32(prediction128);
3955 prediction64 = vshl_s32(prediction64, shift64);
3956 prediction64 = vadd_s32(prediction64, vget_low_s32(vreinterpretq_s32_u32(riceParamPart128)));
3958 samples128_0 = drflac__valignrq_s32_1(vcombine_s32(prediction64, vdup_n_s32(0)), samples128_0);
3959 riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128);
3961 }
else if (order <= 8) {
3962 for (i = 0; i < 4; i += 1) {
3963 prediction128 = vmulq_s32(coefficients128_4, samples128_4);
3964 prediction128 = vmlaq_s32(prediction128, coefficients128_0, samples128_0);
3967 prediction64 = drflac__vhaddq_s32(prediction128);
3968 prediction64 = vshl_s32(prediction64, shift64);
3969 prediction64 = vadd_s32(prediction64, vget_low_s32(vreinterpretq_s32_u32(riceParamPart128)));
3971 samples128_4 = drflac__valignrq_s32_1(samples128_0, samples128_4);
3972 samples128_0 = drflac__valignrq_s32_1(vcombine_s32(prediction64, vdup_n_s32(0)), samples128_0);
3973 riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128);
3976 for (i = 0; i < 4; i += 1) {
3977 prediction128 = vmulq_s32(coefficients128_8, samples128_8);
3978 prediction128 = vmlaq_s32(prediction128, coefficients128_4, samples128_4);
3979 prediction128 = vmlaq_s32(prediction128, coefficients128_0, samples128_0);
3982 prediction64 = drflac__vhaddq_s32(prediction128);
3983 prediction64 = vshl_s32(prediction64, shift64);
3984 prediction64 = vadd_s32(prediction64, vget_low_s32(vreinterpretq_s32_u32(riceParamPart128)));
3986 samples128_8 = drflac__valignrq_s32_1(samples128_4, samples128_8);
3987 samples128_4 = drflac__valignrq_s32_1(samples128_0, samples128_4);
3988 samples128_0 = drflac__valignrq_s32_1(vcombine_s32(prediction64, vdup_n_s32(0)), samples128_0);
3989 riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128);
3994 vst1q_s32(pDecodedSamples, samples128_0);
3995 pDecodedSamples += 4;
4000 while (i < (
int)
count) {
4007 riceParamParts[0] &= riceParamMask;
4008 riceParamParts[0] |= (zeroCountParts[0] << riceParam);
4009 riceParamParts[0] = (riceParamParts[0] >> 1) ^ t[riceParamParts[0] & 0x01];
4015 pDecodedSamples += 1;
4029 int32x4_t coefficients128_0;
4030 int32x4_t coefficients128_4;
4031 int32x4_t coefficients128_8;
4032 int32x4_t samples128_0;
4033 int32x4_t samples128_4;
4034 int32x4_t samples128_8;
4035 uint32x4_t riceParamMask128;
4036 int32x4_t riceParam128;
4042 riceParamMask = ~((~0UL) << riceParam);
4043 riceParamMask128 = vdupq_n_u32(riceParamMask);
4045 riceParam128 = vdupq_n_s32(riceParam);
4046 shift64 = vdup_n_s64(-shift);
4047 one128 = vdupq_n_u32(1);
4056 int runningOrder = order;
4061 if (runningOrder >= 4) {
4062 coefficients128_0 = vld1q_s32(coefficients + 0);
4063 samples128_0 = vld1q_s32(pSamplesOut - 4);
4066 switch (runningOrder) {
4067 case 3: tempC[2] = coefficients[2]; tempS[1] = pSamplesOut[-3];
4068 case 2: tempC[1] = coefficients[1]; tempS[2] = pSamplesOut[-2];
4069 case 1: tempC[0] = coefficients[0]; tempS[3] = pSamplesOut[-1];
4072 coefficients128_0 = vld1q_s32(tempC);
4073 samples128_0 = vld1q_s32(tempS);
4078 if (runningOrder >= 4) {
4079 coefficients128_4 = vld1q_s32(coefficients + 4);
4080 samples128_4 = vld1q_s32(pSamplesOut - 8);
4083 switch (runningOrder) {
4084 case 3: tempC[2] = coefficients[6]; tempS[1] = pSamplesOut[-7];
4085 case 2: tempC[1] = coefficients[5]; tempS[2] = pSamplesOut[-6];
4086 case 1: tempC[0] = coefficients[4]; tempS[3] = pSamplesOut[-5];
4089 coefficients128_4 = vld1q_s32(tempC);
4090 samples128_4 = vld1q_s32(tempS);
4095 if (runningOrder == 4) {
4096 coefficients128_8 = vld1q_s32(coefficients + 8);
4097 samples128_8 = vld1q_s32(pSamplesOut - 12);
4100 switch (runningOrder) {
4101 case 3: tempC[2] = coefficients[10]; tempS[1] = pSamplesOut[-11];
4102 case 2: tempC[1] = coefficients[ 9]; tempS[2] = pSamplesOut[-10];
4103 case 1: tempC[0] = coefficients[ 8]; tempS[3] = pSamplesOut[- 9];
4106 coefficients128_8 = vld1q_s32(tempC);
4107 samples128_8 = vld1q_s32(tempS);
4112 coefficients128_0 = drflac__vrevq_s32(coefficients128_0);
4113 coefficients128_4 = drflac__vrevq_s32(coefficients128_4);
4114 coefficients128_8 = drflac__vrevq_s32(coefficients128_8);
4118 while (pDecodedSamples < pDecodedSamplesEnd) {
4119 int64x2_t prediction128;
4120 uint32x4_t zeroCountPart128;
4121 uint32x4_t riceParamPart128;
4130 zeroCountPart128 = vld1q_u32(zeroCountParts);
4131 riceParamPart128 = vld1q_u32(riceParamParts);
4133 riceParamPart128 = vandq_u32(riceParamPart128, riceParamMask128);
4134 riceParamPart128 = vorrq_u32(riceParamPart128, vshlq_u32(zeroCountPart128, riceParam128));
4135 riceParamPart128 = veorq_u32(vshrq_n_u32(riceParamPart128, 1), vaddq_u32(drflac__vnotq_u32(vandq_u32(riceParamPart128, one128)), one128));
4137 for (i = 0; i < 4; i += 1) {
4138 int64x1_t prediction64;
4140 prediction128 = veorq_s64(prediction128, prediction128);
4144 case 11: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_low_s32(coefficients128_8), vget_low_s32(samples128_8)));
4146 case 9: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_high_s32(coefficients128_8), vget_high_s32(samples128_8)));
4148 case 7: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_low_s32(coefficients128_4), vget_low_s32(samples128_4)));
4150 case 5: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_high_s32(coefficients128_4), vget_high_s32(samples128_4)));
4152 case 3: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_low_s32(coefficients128_0), vget_low_s32(samples128_0)));
4154 case 1: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_high_s32(coefficients128_0), vget_high_s32(samples128_0)));
4158 prediction64 = drflac__vhaddq_s64(prediction128);
4159 prediction64 = vshl_s64(prediction64, shift64);
4160 prediction64 = vadd_s64(prediction64, vdup_n_s64(vgetq_lane_u32(riceParamPart128, 0)));
4163 samples128_8 = drflac__valignrq_s32_1(samples128_4, samples128_8);
4164 samples128_4 = drflac__valignrq_s32_1(samples128_0, samples128_4);
4165 samples128_0 = drflac__valignrq_s32_1(vcombine_s32(vreinterpret_s32_s64(prediction64), vdup_n_s32(0)), samples128_0);
4168 riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128);
4172 vst1q_s32(pDecodedSamples, samples128_0);
4173 pDecodedSamples += 4;
4178 while (i < (
int)
count) {
4185 riceParamParts[0] &= riceParamMask;
4186 riceParamParts[0] |= (zeroCountParts[0] << riceParam);
4187 riceParamParts[0] = (riceParamParts[0] >> 1) ^ t[riceParamParts[0] & 0x01];
4193 pDecodedSamples += 1;
4206 if (order > 0 && order <= 12) {
4207 if (bitsPerSample+shift > 32) {
4208 return drflac__decode_samples_with_residual__rice__neon_64(bs,
count, riceParam, order, shift, coefficients, pSamplesOut);
4210 return drflac__decode_samples_with_residual__rice__neon_32(bs,
count, riceParam, order, shift, coefficients, pSamplesOut);
4220 #if defined(DRFLAC_SUPPORT_SSE41)
4221 if (drflac__gIsSSE41Supported) {
4222 return drflac__decode_samples_with_residual__rice__sse41(bs, bitsPerSample,
count, riceParam, order, shift, coefficients, pSamplesOut);
4224 #elif defined(DRFLAC_SUPPORT_NEON)
4226 return drflac__decode_samples_with_residual__rice__neon(bs, bitsPerSample,
count, riceParam, order, shift, coefficients, pSamplesOut);
4232 return drflac__decode_samples_with_residual__rice__reference(bs, bitsPerSample,
count, riceParam, order, shift, coefficients, pSamplesOut);
4247 for (i = 0; i <
count; ++i) {
4265 for (i = 0; i <
count; ++i) {
4266 if (unencodedBitsPerSample > 0) {
4274 if (bitsPerSample >= 24) {
4310 pDecodedSamples += order;
4320 if (partitionOrder > 8) {
4325 if ((blockSize / (1 << partitionOrder)) <= order) {
4329 samplesInPartition = (blockSize / (1 << partitionOrder)) - order;
4330 partitionsRemaining = (1 << partitionOrder);
4337 if (riceParam == 15) {
4344 if (riceParam == 31) {
4349 if (riceParam != 0xFF) {
4354 unsigned char unencodedBitsPerSample = 0;
4364 pDecodedSamples += samplesInPartition;
4366 if (partitionsRemaining == 1) {
4370 partitionsRemaining -= 1;
4372 if (partitionOrder != 0) {
4373 samplesInPartition = blockSize / (1 << partitionOrder);
4411 if (partitionOrder > 8) {
4416 if ((blockSize / (1 << partitionOrder)) <= order) {
4420 samplesInPartition = (blockSize / (1 << partitionOrder)) - order;
4421 partitionsRemaining = (1 << partitionOrder);
4429 if (riceParam == 15) {
4436 if (riceParam == 31) {
4441 if (riceParam != 0xFF) {
4446 unsigned char unencodedBitsPerSample = 0;
4457 if (partitionsRemaining == 1) {
4461 partitionsRemaining -= 1;
4462 samplesInPartition = blockSize / (1 << partitionOrder);
4483 for (i = 0; i < blockSize; ++i) {
4484 pDecodedSamples[i] = sample;
4494 for (i = 0; i < blockSize; ++i) {
4500 pDecodedSamples[i] = sample;
4519 for (i = 0; i < lpcOrder; ++i) {
4525 pDecodedSamples[i] = sample;
4543 for (i = 0; i < lpcOrder; ++i) {
4549 pDecodedSamples[i] = sample;
4555 if (lpcPrecision == 15) {
4565 for (i = 0; i < lpcOrder; ++i) {
4581 const drflac_uint32 sampleRateTable[12] = {0, 88200, 176400, 192000, 8000, 16000, 22050, 24000, 32000, 44100, 48000, 96000};
4605 if (reserved == 1) {
4618 if (blockSize == 0) {
4631 if (channelAssignment > 10) {
4639 if (bitsPerSample == 3 || bitsPerSample == 7) {
4648 if (reserved == 1) {
4654 isVariableBlockSize = blockingStrategy == 1;
4655 if (isVariableBlockSize) {
4659 if (result == DRFLAC_END_OF_STREAM) {
4665 header->flacFrameNumber = 0;
4666 header->pcmFrameNumber = pcmFrameNumber;
4671 if (result == DRFLAC_END_OF_STREAM) {
4678 header->pcmFrameNumber = 0;
4683 if (blockSize == 1) {
4684 header->blockSizeInPCMFrames = 192;
4685 }
else if (blockSize >= 2 && blockSize <= 5) {
4686 header->blockSizeInPCMFrames = 576 * (1 << (blockSize - 2));
4687 }
else if (blockSize == 6) {
4692 header->blockSizeInPCMFrames += 1;
4693 }
else if (blockSize == 7) {
4698 header->blockSizeInPCMFrames += 1;
4701 header->blockSizeInPCMFrames = 256 * (1 << (blockSize - 8));
4705 if (sampleRate <= 11) {
4706 header->sampleRate = sampleRateTable[sampleRate];
4707 }
else if (sampleRate == 12) {
4712 header->sampleRate *= 1000;
4713 }
else if (sampleRate == 13) {
4718 }
else if (sampleRate == 14) {
4723 header->sampleRate *= 10;
4729 header->channelAssignment = channelAssignment;
4731 header->bitsPerSample = bitsPerSampleTable[bitsPerSample];
4732 if (
header->bitsPerSample == 0) {
4733 header->bitsPerSample = streaminfoBitsPerSample;
4740 #ifndef DR_FLAC_NO_CRC
4741 if (
header->crc8 != crc8) {
4759 if ((header & 0x80) != 0) {
4763 type = (
header & 0x7E) >> 1;
4766 }
else if (type == 1) {
4769 if ((type & 0x20) != 0) {
4771 pSubframe->
lpcOrder = (type & 0x1F) + 1;
4772 }
else if ((type & 0x08) != 0) {
4774 pSubframe->
lpcOrder = (type & 0x07);
4790 if ((header & 0x01) == 1) {
4791 unsigned int wastedBitsPerSample;
4809 pSubframe = frame->
subframes + subframeIndex;
4817 subframeBitsPerSample += 1;
4819 subframeBitsPerSample += 1;
4866 pSubframe = frame->
subframes + subframeIndex;
4874 subframeBitsPerSample += 1;
4876 subframeBitsPerSample += 1;
4906 unsigned int bitsToSeek = pSubframe->
lpcOrder * subframeBitsPerSample;
4918 unsigned char lpcPrecision;
4920 unsigned int bitsToSeek = pSubframe->
lpcOrder * subframeBitsPerSample;
4928 if (lpcPrecision == 15) {
4934 bitsToSeek = (pSubframe->
lpcOrder * lpcPrecision) + 5;
4953 drflac_uint8 lookup[] = {1, 2, 3, 4, 5, 6, 7, 8, 2, 2, 2};
4956 return lookup[channelAssignment];
4965 #ifndef DR_FLAC_NO_CRC
4979 if (channelCount != (
int)pFlac->
channels) {
4983 for (i = 0; i < channelCount; ++i) {
4990 if (paddingSizeInBits > 0) {
4993 return DRFLAC_END_OF_STREAM;
4997 #ifndef DR_FLAC_NO_CRC
5001 return DRFLAC_END_OF_STREAM;
5004 #ifndef DR_FLAC_NO_CRC
5005 if (actualCRC16 != desiredCRC16) {
5020 #ifndef DR_FLAC_NO_CRC
5025 for (i = 0; i < channelCount; ++i) {
5037 #ifndef DR_FLAC_NO_CRC
5041 return DRFLAC_END_OF_STREAM;
5044 #ifndef DR_FLAC_NO_CRC
5045 if (actualCRC16 != desiredCRC16) {
5085 if (firstPCMFrame == 0) {
5090 if (lastPCMFrame > 0) {
5094 if (pFirstPCMFrame) {
5095 *pFirstPCMFrame = firstPCMFrame;
5097 if (pLastPCMFrame) {
5098 *pLastPCMFrame = lastPCMFrame;
5127 while (pcmFramesToSeek > 0) {
5134 pcmFramesRead += pcmFramesToSeek;
5136 pcmFramesToSeek = 0;
5146 return pcmFramesRead;
5172 runningPCMFrameCount = 0;
5196 pcmFrameCountInThisFLACFrame = (lastPCMFrameInFLACFrame - firstPCMFrameInFLACFrame) + 1;
5197 if (pcmFrameIndex < (runningPCMFrameCount + pcmFrameCountInThisFLACFrame)) {
5202 drflac_uint64 pcmFramesToDecode = pcmFrameIndex - runningPCMFrameCount;
5211 goto next_iteration;
5228 runningPCMFrameCount += pcmFrameCountInThisFLACFrame;
5231 goto next_iteration;
5261 #if !defined(DR_FLAC_NO_CRC)
5267 #define DRFLAC_BINARY_SEARCH_APPROX_COMPRESSION_RATIO 0.6f
5282 if (targetByte == 0) {
5288 targetByte = rangeLo + ((rangeHi - rangeLo)/2);
5289 rangeHi = targetByte;
5304 targetByte = rangeLo + ((rangeHi - rangeLo)/2);
5305 rangeHi = targetByte;
5312 targetByte = rangeLo + ((rangeHi - rangeLo)/2);
5313 rangeHi = targetByte;
5326 *pLastSuccessfulSeekOffset = targetByte;
5354 drflac_uint64 closestSeekOffsetBeforeTargetPCMFrame = byteRangeLo;
5358 if (targetByte > byteRangeHi) {
5359 targetByte = byteRangeHi;
5370 if (pcmRangeLo == newPCMRangeLo) {
5382 pcmRangeLo = newPCMRangeLo;
5383 pcmRangeHi = newPCMRangeHi;
5385 if (pcmRangeLo <= pcmFrameIndex && pcmRangeHi >= pcmFrameIndex) {
5395 if (pcmRangeLo > pcmFrameIndex) {
5397 byteRangeHi = lastSuccessfulSeekOffset;
5398 if (byteRangeLo > byteRangeHi) {
5399 byteRangeLo = byteRangeHi;
5402 targetByte = byteRangeLo + ((byteRangeHi - byteRangeLo) / 2);
5403 if (targetByte < byteRangeLo) {
5404 targetByte = byteRangeLo;
5410 if ((pcmFrameIndex - pcmRangeLo) < seekForwardThreshold) {
5417 byteRangeLo = lastSuccessfulSeekOffset;
5418 if (byteRangeHi < byteRangeLo) {
5419 byteRangeHi = byteRangeLo;
5423 if (targetByte > byteRangeHi) {
5424 targetByte = byteRangeHi;
5427 if (closestSeekOffsetBeforeTargetPCMFrame < lastSuccessfulSeekOffset) {
5428 closestSeekOffsetBeforeTargetPCMFrame = lastSuccessfulSeekOffset;
5455 if (pcmFrameIndex < seekForwardThreshold) {
5484 for (iSeekpoint = 0; iSeekpoint < pFlac->
seekpointCount; ++iSeekpoint) {
5489 iClosestSeekpoint = iSeekpoint;
5492 #if !defined(DR_FLAC_NO_CRC)
5501 if (iClosestSeekpoint < pFlac->seekpointCount-1) {
5558 pcmFrameCountInThisFLACFrame = (lastPCMFrameInFLACFrame - firstPCMFrameInFLACFrame) + 1;
5559 if (pcmFrameIndex < (runningPCMFrameCount + pcmFrameCountInThisFLACFrame)) {
5564 drflac_uint64 pcmFramesToDecode = pcmFrameIndex - runningPCMFrameCount;
5573 goto next_iteration;
5590 runningPCMFrameCount += pcmFrameCountInThisFLACFrame;
5593 goto next_iteration;
5623 #ifndef DR_FLAC_NO_OGG
5657 #ifndef DR_FLAC_NO_OGG
5667 *isLastBlock = (blockHeader & 0x80000000UL) >> 31;
5668 *blockType = (blockHeader & 0x7F000000UL) >> 24;
5669 *blockSize = (blockHeader & 0x00FFFFFFUL);
5677 if (onRead(pUserData, &blockHeader, 4) != 4) {
5693 if (onRead(pUserData, &blockSizes, 4) != 4) {
5698 if (onRead(pUserData, &frameSizes, 6) != 6) {
5703 if (onRead(pUserData, &importantProps, 8) != 8) {
5708 if (onRead(pUserData, md5,
sizeof(md5)) !=
sizeof(md5)) {
5751 if (pAllocationCallbacks ==
NULL) {
5769 if (pAllocationCallbacks ==
NULL) {
5799 if (p ==
NULL || pAllocationCallbacks ==
NULL) {
5827 runningFilePos += 4;
5829 metadata.
type = blockType;
5837 if (blockSize < 4) {
5843 if (pRawData ==
NULL) {
5847 if (onRead(pUserData, pRawData, blockSize) != blockSize) {
5857 onMeta(pUserDataMD, &metadata);
5865 seektablePos = runningFilePos;
5866 seektableSize = blockSize;
5873 if (pRawData ==
NULL) {
5877 if (onRead(pUserData, pRawData, blockSize) != blockSize) {
5888 for (iSeekpoint = 0; iSeekpoint < metadata.
data.
seektable.seekpointCount; ++iSeekpoint) {
5895 onMeta(pUserDataMD, &metadata);
5903 if (blockSize < 8) {
5909 const char* pRunningData;
5910 const char* pRunningDataEnd;
5914 if (pRawData ==
NULL) {
5918 if (onRead(pUserData, pRawData, blockSize) != blockSize) {
5926 pRunningData = (
const char*)pRawData;
5927 pRunningDataEnd = (
const char*)pRawData + blockSize;
5950 if (pRunningDataEnd - pRunningData < 4) {
5956 if (pRunningDataEnd - pRunningData < (
drflac_int64)commentLength) {
5960 pRunningData += commentLength;
5963 onMeta(pUserDataMD, &metadata);
5971 if (blockSize < 396) {
5977 const char* pRunningData;
5978 const char* pRunningDataEnd;
5983 if (pRawData ==
NULL) {
5987 if (onRead(pUserData, pRawData, blockSize) != blockSize) {
5995 pRunningData = (
const char*)pRawData;
5996 pRunningDataEnd = (
const char*)pRawData + blockSize;
6000 metadata.
data.
cuesheet.isCD = (pRunningData[0] & 0x80) != 0; pRunningData += 259;
6001 metadata.
data.
cuesheet.trackCount = pRunningData[0]; pRunningData += 1;
6005 for (iTrack = 0; iTrack < metadata.
data.
cuesheet.trackCount; ++iTrack) {
6009 if (pRunningDataEnd - pRunningData < 36) {
6016 indexCount = pRunningData[0]; pRunningData += 1;
6018 if (pRunningDataEnd - pRunningData < (
drflac_int64)indexPointSize) {
6024 for (iIndex = 0; iIndex < indexCount; ++iIndex) {
6031 onMeta(pUserDataMD, &metadata);
6039 if (blockSize < 32) {
6045 const char* pRunningData;
6046 const char* pRunningDataEnd;
6049 if (pRawData ==
NULL) {
6053 if (onRead(pUserData, pRawData, blockSize) != blockSize) {
6061 pRunningData = (
const char*)pRawData;
6062 pRunningDataEnd = (
const char*)pRawData + blockSize;
6080 metadata.
data.
picture.description = pRunningData; pRunningData += metadata.
data.
picture.descriptionLength;
6094 onMeta(pUserDataMD, &metadata);
6109 onMeta(pUserDataMD, &metadata);
6132 if (pRawData ==
NULL) {
6136 if (onRead(pUserData, pRawData, blockSize) != blockSize) {
6143 onMeta(pUserDataMD, &metadata);
6151 if (onMeta ==
NULL && blockSize > 0) {
6157 runningFilePos += blockSize;
6163 *pSeektablePos = seektablePos;
6164 *pSeektableSize = seektableSize;
6165 *pFirstFramePos = runningFilePos;
6233 onMeta(pUserDataMD, &metadata);
6240 #ifndef DR_FLAC_NO_OGG
6241 #define DRFLAC_OGG_MAX_PAGE_SIZE 65307
6242 #define DRFLAC_OGG_CAPTURE_PATTERN_CRC32 1605413199
6250 #ifndef DR_FLAC_NO_CRC
6252 0x00000000
L, 0x04C11DB7
L, 0x09823B6E
L, 0x0D4326D9
L,
6253 0x130476DC
L, 0x17C56B6B
L, 0x1A864DB2
L, 0x1E475005
L,
6254 0x2608EDB8
L, 0x22C9F00F
L, 0x2F8AD6D6
L, 0x2B4BCB61
L,
6255 0x350C9B64
L, 0x31CD86D3
L, 0x3C8EA00A
L, 0x384FBDBD
L,
6256 0x4C11DB70
L, 0x48D0C6C7
L, 0x4593E01E
L, 0x4152FDA9
L,
6257 0x5F15ADAC
L, 0x5BD4B01B
L, 0x569796C2
L, 0x52568B75
L,
6258 0x6A1936C8
L, 0x6ED82B7F
L, 0x639B0DA6
L, 0x675A1011
L,
6259 0x791D4014
L, 0x7DDC5DA3
L, 0x709F7B7A
L, 0x745E66CD
L,
6260 0x9823B6E0
L, 0x9CE2AB57
L, 0x91A18D8E
L, 0x95609039
L,
6261 0x8B27C03C
L, 0x8FE6DD8B
L, 0x82A5FB52
L, 0x8664E6E5
L,
6262 0xBE2B5B58
L, 0xBAEA46EF
L, 0xB7A96036
L, 0xB3687D81
L,
6263 0xAD2F2D84
L, 0xA9EE3033
L, 0xA4AD16EA
L, 0xA06C0B5D
L,
6264 0xD4326D90
L, 0xD0F37027
L, 0xDDB056FE
L, 0xD9714B49
L,
6265 0xC7361B4C
L, 0xC3F706FB
L, 0xCEB42022
L, 0xCA753D95
L,
6266 0xF23A8028
L, 0xF6FB9D9F
L, 0xFBB8BB46
L, 0xFF79A6F1
L,
6267 0xE13EF6F4
L, 0xE5FFEB43
L, 0xE8BCCD9A
L, 0xEC7DD02D
L,
6268 0x34867077
L, 0x30476DC0
L, 0x3D044B19
L, 0x39C556AE
L,
6269 0x278206AB
L, 0x23431B1C
L, 0x2E003DC5
L, 0x2AC12072
L,
6270 0x128E9DCF
L, 0x164F8078
L, 0x1B0CA6A1
L, 0x1FCDBB16
L,
6271 0x018AEB13
L, 0x054BF6A4
L, 0x0808D07D
L, 0x0CC9CDCA
L,
6272 0x7897AB07
L, 0x7C56B6B0
L, 0x71159069
L, 0x75D48DDE
L,
6273 0x6B93DDDB
L, 0x6F52C06C
L, 0x6211E6B5
L, 0x66D0FB02
L,
6274 0x5E9F46BF
L, 0x5A5E5B08
L, 0x571D7DD1
L, 0x53DC6066
L,
6275 0x4D9B3063
L, 0x495A2DD4
L, 0x44190B0D
L, 0x40D816BA
L,
6276 0xACA5C697
L, 0xA864DB20
L, 0xA527FDF9
L, 0xA1E6E04E
L,
6277 0xBFA1B04B
L, 0xBB60ADFC
L, 0xB6238B25
L, 0xB2E29692
L,
6278 0x8AAD2B2F
L, 0x8E6C3698
L, 0x832F1041
L, 0x87EE0DF6
L,
6279 0x99A95DF3
L, 0x9D684044
L, 0x902B669D
L, 0x94EA7B2A
L,
6280 0xE0B41DE7
L, 0xE4750050
L, 0xE9362689
L, 0xEDF73B3E
L,
6281 0xF3B06B3B
L, 0xF771768C
L, 0xFA325055
L, 0xFEF34DE2
L,
6282 0xC6BCF05F
L, 0xC27DEDE8
L, 0xCF3ECB31
L, 0xCBFFD686
L,
6283 0xD5B88683
L, 0xD1799B34
L, 0xDC3ABDED
L, 0xD8FBA05A
L,
6284 0x690CE0EE
L, 0x6DCDFD59
L, 0x608EDB80
L, 0x644FC637
L,
6285 0x7A089632
L, 0x7EC98B85
L, 0x738AAD5C
L, 0x774BB0EB
L,
6286 0x4F040D56
L, 0x4BC510E1
L, 0x46863638
L, 0x42472B8F
L,
6287 0x5C007B8A
L, 0x58C1663D
L, 0x558240E4
L, 0x51435D53
L,
6288 0x251D3B9E
L, 0x21DC2629
L, 0x2C9F00F0
L, 0x285E1D47
L,
6289 0x36194D42
L, 0x32D850F5
L, 0x3F9B762C
L, 0x3B5A6B9B
L,
6290 0x0315D626
L, 0x07D4CB91
L, 0x0A97ED48
L, 0x0E56F0FF
L,
6291 0x1011A0FA
L, 0x14D0BD4D
L, 0x19939B94
L, 0x1D528623
L,
6292 0xF12F560E
L, 0xF5EE4BB9
L, 0xF8AD6D60
L, 0xFC6C70D7
L,
6293 0xE22B20D2
L, 0xE6EA3D65
L, 0xEBA91BBC
L, 0xEF68060B
L,
6294 0xD727BBB6
L, 0xD3E6A601
L, 0xDEA580D8
L, 0xDA649D6F
L,
6295 0xC423CD6A
L, 0xC0E2D0DD
L, 0xCDA1F604
L, 0xC960EBB3
L,
6296 0xBD3E8D7E
L, 0xB9FF90C9
L, 0xB4BCB610
L, 0xB07DABA7
L,
6297 0xAE3AFBA2
L, 0xAAFBE615
L, 0xA7B8C0CC
L, 0xA379DD7B
L,
6298 0x9B3660C6
L, 0x9FF77D71
L, 0x92B45BA8
L, 0x9675461F
L,
6299 0x8832161A
L, 0x8CF30BAD
L, 0x81B02D74
L, 0x857130C3
L,
6300 0x5D8A9099
L, 0x594B8D2E
L, 0x5408ABF7
L, 0x50C9B640
L,
6301 0x4E8EE645
L, 0x4A4FFBF2
L, 0x470CDD2B
L, 0x43CDC09C
L,
6302 0x7B827D21
L, 0x7F436096
L, 0x7200464F
L, 0x76C15BF8
L,
6303 0x68860BFD
L, 0x6C47164A
L, 0x61043093
L, 0x65C52D24
L,
6304 0x119B4BE9
L, 0x155A565E
L, 0x18197087
L, 0x1CD86D30
L,
6305 0x029F3D35
L, 0x065E2082
L, 0x0B1D065B
L, 0x0FDC1BEC
L,
6306 0x3793A651
L, 0x3352BBE6
L, 0x3E119D3F
L, 0x3AD08088
L,
6307 0x2497D08D
L, 0x2056CD3A
L, 0x2D15EBE3
L, 0x29D4F654
L,
6308 0xC5A92679
L, 0xC1683BCE
L, 0xCC2B1D17
L, 0xC8EA00A0
L,
6309 0xD6AD50A5
L, 0xD26C4D12
L, 0xDF2F6BCB
L, 0xDBEE767C
L,
6310 0xE3A1CBC1
L, 0xE760D676
L, 0xEA23F0AF
L, 0xEEE2ED18
L,
6311 0xF0A5BD1D
L, 0xF464A0AA
L, 0xF9278673
L, 0xFDE69BC4
L,
6312 0x89B8FD09
L, 0x8D79E0BE
L, 0x803AC667
L, 0x84FBDBD0
L,
6313 0x9ABC8BD5
L, 0x9E7D9662
L, 0x933EB0BB
L, 0x97FFAD0C
L,
6314 0xAFB010B1
L, 0xAB710D06
L, 0xA6322BDF
L, 0xA2F33668
L,
6315 0xBCB4666D
L, 0xB8757BDA
L, 0xB5365D03
L, 0xB1F740B4
L
6321 #ifndef DR_FLAC_NO_CRC
6341 crc32 = drflac_crc32_uint32(crc32, (
drflac_uint32)((data >> 32) & 0xFFFFFFFF));
6342 crc32 = drflac_crc32_uint32(crc32, (
drflac_uint32)((data >> 0) & 0xFFFFFFFF));
6351 for (i = 0; i < dataSize; ++i) {
6360 return pattern[0] ==
'O' && pattern[1] ==
'g' && pattern[2] ==
'g' && pattern[3] ==
'S';
6377 return pageBodySize;
6387 if (onRead(pUserData, data, 23) != 23) {
6388 return DRFLAC_END_OF_STREAM;
6416 for (i = 0; i < 23; ++i) {
6422 return DRFLAC_END_OF_STREAM;
6439 if (onRead(pUserData,
id, 4) != 4) {
6440 return DRFLAC_END_OF_STREAM;
6466 if (onRead(pUserData, &
id[3], 1) != 1) {
6467 return DRFLAC_END_OF_STREAM;
6499 size_t bytesActuallyRead = oggbs->
onRead(oggbs->
pUserData, bufferOut, bytesToRead);
6502 return bytesActuallyRead;
6508 if (offset <= 0x7FFFFFFF) {
6524 while (offset > 0x7FFFFFFF) {
6529 offset -= 0x7FFFFFFF;
6548 #ifndef DR_FLAC_NO_CRC
6577 #ifndef DR_FLAC_NO_CRC
6579 if (actualCRC32 !=
header.checksum) {
6593 (void)recoveryMethod;
6609 while (iByte < bytesConsumedInPage) {
6611 if (iByte + segmentSize > bytesConsumedInPage) {
6615 iByte += segmentSize;
6630 drflac_uint8 iFirstSeg = drflac_oggbs__get_current_segment_index(oggbs, &bytesRemainingInSeg);
6632 drflac_uint32 bytesToEndOfPacketOrPage = bytesRemainingInSeg;
6635 if (segmentSize < 255) {
6643 bytesToEndOfPacketOrPage += segmentSize;
6678 return drflac_oggbs__seek_to_next_packet(oggbs);
6686 size_t bytesRead = 0;
6692 while (bytesRead < bytesToRead) {
6693 size_t bytesRemainingToRead = bytesToRead - bytesRead;
6697 bytesRead += bytesRemainingToRead;
6722 int bytesSeeked = 0;
6742 while (bytesSeeked < offset) {
6743 int bytesRemainingToSeek = offset - bytesSeeked;
6747 bytesSeeked += bytesRemainingToSeek;
6788 runningGranulePosition = 0;
6807 firstBytesInPage[0] = oggbs->
pageData[0];
6808 firstBytesInPage[1] = oggbs->
pageData[1];
6810 if ((firstBytesInPage[0] == 0xFF) && (firstBytesInPage[1] & 0xFC) == 0xF8) {
6836 runningPCMFrameCount = runningGranulePosition;
6869 pcmFrameCountInThisFrame = (lastPCMFrameInFLACFrame - firstPCMFrameInFLACFrame) + 1;
6883 if (pcmFrameIndex < (runningPCMFrameCount + pcmFrameCountInThisFrame)) {
6891 drflac_uint64 pcmFramesToDecode = (size_t)(pcmFrameIndex - runningPCMFrameCount);
6892 if (pcmFramesToDecode == 0) {
6913 runningPCMFrameCount += pcmFrameCountInThisFrame;
6953 if ((
header.headerType & 0x02) == 0) {
6959 if (pageBodySize == 51) {
6964 if (onRead(pUserData, &packetType, 1) != 1) {
6968 bytesRemainingInPage -= 1;
6969 if (packetType == 0x7F) {
6972 if (onRead(pUserData, sig, 4) != 4) {
6976 bytesRemainingInPage -= 4;
6977 if (sig[0] ==
'F' && sig[1] ==
'L' && sig[2] ==
'A' && sig[3] ==
'C') {
6980 if (onRead(pUserData, mappingVersion, 2) != 2) {
6984 if (mappingVersion[0] != 1) {
6997 if (onRead(pUserData, sig, 4) != 4) {
7001 if (sig[0] ==
'f' && sig[1] ==
'L' && sig[2] ==
'a' && sig[3] ==
'C') {
7031 onMeta(pUserDataMD, &metadata);
7090 if (pInit ==
NULL || onRead ==
NULL || onSeek ==
NULL) {
7113 if (onRead(pUserData,
id, 4) != 4) {
7118 if (
id[0] ==
'I' &&
id[1] ==
'D' &&
id[2] ==
'3') {
7123 if (onRead(pUserData, header, 6) != 6) {
7145 if (
id[0] ==
'f' &&
id[1] ==
'L' &&
id[2] ==
'a' &&
id[3] ==
'C') {
7148 #ifndef DR_FLAC_NO_OGG
7149 if (
id[0] ==
'O' &&
id[1] ==
'g' &&
id[2] ==
'g' &&
id[3] ==
'S') {
7159 #ifndef DR_FLAC_NO_OGG
7176 pFlac->
bs = pInit->
bs;
7194 #ifndef DR_FLAC_NO_OGG
7210 if (pAllocationCallbacks !=
NULL) {
7211 allocationCallbacks = *pAllocationCallbacks;
7232 allocationSize =
sizeof(
drflac);
7246 allocationSize += decodedSamplesAllocationSize;
7249 #ifndef DR_FLAC_NO_OGG
7276 if (
init.hasMetadataBlocks) {
7279 void* pUserDataOverride = pUserData;
7281 #ifndef DR_FLAC_NO_OGG
7285 pUserDataOverride = (
void*)&oggbs;
7289 if (!
drflac__read_and_decode_metadata(onReadOverride, onSeekOverride, onMeta, pUserDataOverride, pUserDataMD, &firstFramePos, &seektablePos, &seektableSize, &allocationCallbacks)) {
7293 allocationSize += seektableSize;
7298 if (pFlac ==
NULL) {
7306 #ifndef DR_FLAC_NO_OGG
7309 *pInternalOggbs = oggbs;
7315 pFlac->
_oggbs = (
void*)pInternalOggbs;
7322 #ifndef DR_FLAC_NO_OGG
7332 if (seektablePos != 0) {
7344 for (iSeekpoint = 0; iSeekpoint < pFlac->
seekpointCount; ++iSeekpoint) {
7373 if (!
init.hasStreamInfoBlock) {
7400 #ifndef DR_FLAC_NO_STDIO
7405 return fread(bufferOut, 1, bytesToRead, (FILE*)pUserData);
7415 static FILE* drflac__fopen(
const char* filename)
7418 #if defined(_MSC_VER) && _MSC_VER >= 1400
7419 if (fopen_s(&pFile, filename,
"rb") != 0) {
7423 pFile = fopen(filename,
"rb");
7424 if (pFile ==
NULL) {
7438 pFile = drflac__fopen(filename);
7439 if (pFile ==
NULL) {
7444 if (pFlac ==
NULL) {
7457 pFile = drflac__fopen(filename);
7458 if (pFile ==
NULL) {
7463 if (pFlac ==
NULL) {
7475 size_t bytesRemaining;
7481 if (bytesToRead > bytesRemaining) {
7482 bytesToRead = bytesRemaining;
7485 if (bytesToRead > 0) {
7505 if (memoryStream->
currentReadPos + offset <= memoryStream->dataSize) {
7526 memoryStream.
data = (
const unsigned char*)data;
7530 if (pFlac ==
NULL) {
7537 #ifndef DR_FLAC_NO_OGG
7557 memoryStream.
data = (
const unsigned char*)data;
7561 if (pFlac ==
NULL) {
7568 #ifndef DR_FLAC_NO_OGG
7605 if (pFlac ==
NULL) {
7609 #ifndef DR_FLAC_NO_STDIO
7618 #ifndef DR_FLAC_NO_OGG
7639 for (i = 0; i < frameCount; ++i) {
7644 pOutputSamples[i*2+0] = left;
7645 pOutputSamples[i*2+1] = right;
7657 for (i = 0; i < frameCount4; ++i) {
7673 pOutputSamples[i*8+0] = left0;
7674 pOutputSamples[i*8+1] = right0;
7675 pOutputSamples[i*8+2] = left1;
7676 pOutputSamples[i*8+3] = right1;
7677 pOutputSamples[i*8+4] = left2;
7678 pOutputSamples[i*8+5] = right2;
7679 pOutputSamples[i*8+6] = left3;
7680 pOutputSamples[i*8+7] = right3;
7683 for (i = (frameCount4 << 2); i < frameCount; ++i) {
7684 int left = pInputSamples0[i] << shift0;
7685 int side = pInputSamples1[i] << shift1;
7686 int right = left - side;
7688 pOutputSamples[i*2+0] = left;
7689 pOutputSamples[i*2+1] = right;
7693 #if defined(DRFLAC_SUPPORT_SSE2)
7703 frameCount4 = frameCount >> 2;
7708 for (i = 0; i < frameCount4; ++i) {
7709 __m128i left = _mm_slli_epi32(_mm_loadu_si128((
const __m128i*)pInputSamples0 + i), shift0);
7710 __m128i side = _mm_slli_epi32(_mm_loadu_si128((
const __m128i*)pInputSamples1 + i), shift1);
7711 __m128i right = _mm_sub_epi32(left, side);
7713 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
7714 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
7717 for (i = (frameCount4 << 2); i < frameCount; ++i) {
7722 pOutputSamples[i*2+0] = left;
7723 pOutputSamples[i*2+1] = right;
7728 #if defined(DRFLAC_SUPPORT_NEON)
7740 frameCount4 = frameCount >> 2;
7745 shift0_4 = vdupq_n_s32(shift0);
7746 shift1_4 = vdupq_n_s32(shift1);
7748 for (i = 0; i < frameCount4; ++i) {
7753 left = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), shift0_4);
7754 side = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), shift1_4);
7755 right = vsubq_s32(left, side);
7757 drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right));
7760 for (i = (frameCount4 << 2); i < frameCount; ++i) {
7765 pOutputSamples[i*2+0] = left;
7766 pOutputSamples[i*2+1] = right;
7773 #if defined(DRFLAC_SUPPORT_SSE2)
7774 if (drflac__gIsSSE2Supported && pFlac->
bitsPerSample <= 24) {
7775 drflac_read_pcm_frames_s32__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
7777 #elif defined(DRFLAC_SUPPORT_NEON)
7779 drflac_read_pcm_frames_s32__decode_left_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
7785 drflac_read_pcm_frames_s32__decode_left_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
7797 for (i = 0; i < frameCount; ++i) {
7802 pOutputSamples[i*2+0] = left;
7803 pOutputSamples[i*2+1] = right;
7815 for (i = 0; i < frameCount4; ++i) {
7831 pOutputSamples[i*8+0] = left0;
7832 pOutputSamples[i*8+1] = right0;
7833 pOutputSamples[i*8+2] = left1;
7834 pOutputSamples[i*8+3] = right1;
7835 pOutputSamples[i*8+4] = left2;
7836 pOutputSamples[i*8+5] = right2;
7837 pOutputSamples[i*8+6] = left3;
7838 pOutputSamples[i*8+7] = right3;
7841 for (i = (frameCount4 << 2); i < frameCount; ++i) {
7846 pOutputSamples[i*2+0] = left;
7847 pOutputSamples[i*2+1] = right;
7851 #if defined(DRFLAC_SUPPORT_SSE2)
7861 frameCount4 = frameCount >> 2;
7866 for (i = 0; i < frameCount4; ++i) {
7867 __m128i side = _mm_slli_epi32(_mm_loadu_si128((
const __m128i*)pInputSamples0 + i), shift0);
7868 __m128i right = _mm_slli_epi32(_mm_loadu_si128((
const __m128i*)pInputSamples1 + i), shift1);
7869 __m128i left = _mm_add_epi32(right, side);
7871 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
7872 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
7875 for (i = (frameCount4 << 2); i < frameCount; ++i) {
7880 pOutputSamples[i*2+0] = left;
7881 pOutputSamples[i*2+1] = right;
7886 #if defined(DRFLAC_SUPPORT_NEON)
7898 frameCount4 = frameCount >> 2;
7903 shift0_4 = vdupq_n_s32(shift0);
7904 shift1_4 = vdupq_n_s32(shift1);
7906 for (i = 0; i < frameCount4; ++i) {
7911 side = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), shift0_4);
7912 right = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), shift1_4);
7913 left = vaddq_s32(right, side);
7915 drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right));
7918 for (i = (frameCount4 << 2); i < frameCount; ++i) {
7923 pOutputSamples[i*2+0] = left;
7924 pOutputSamples[i*2+1] = right;
7931 #if defined(DRFLAC_SUPPORT_SSE2)
7932 if (drflac__gIsSSE2Supported && pFlac->
bitsPerSample <= 24) {
7933 drflac_read_pcm_frames_s32__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
7935 #elif defined(DRFLAC_SUPPORT_NEON)
7937 drflac_read_pcm_frames_s32__decode_right_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
7943 drflac_read_pcm_frames_s32__decode_right_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
7960 pOutputSamples[i*2+0] = ((mid + side) >> 1) << unusedBitsPerSample;
7961 pOutputSamples[i*2+1] = ((mid - side) >> 1) << unusedBitsPerSample;
7974 for (i = 0; i < frameCount4; ++i) {
7999 temp0L = ((mid0 + side0) << shift);
8000 temp1L = ((mid1 + side1) << shift);
8001 temp2L = ((mid2 + side2) << shift);
8002 temp3L = ((mid3 + side3) << shift);
8004 temp0R = ((mid0 - side0) << shift);
8005 temp1R = ((mid1 - side1) << shift);
8006 temp2R = ((mid2 - side2) << shift);
8007 temp3R = ((mid3 - side3) << shift);
8009 pOutputSamples[i*8+0] = temp0L;
8010 pOutputSamples[i*8+1] = temp0R;
8011 pOutputSamples[i*8+2] = temp1L;
8012 pOutputSamples[i*8+3] = temp1R;
8013 pOutputSamples[i*8+4] = temp2L;
8014 pOutputSamples[i*8+5] = temp2R;
8015 pOutputSamples[i*8+6] = temp3L;
8016 pOutputSamples[i*8+7] = temp3R;
8019 for (i = 0; i < frameCount4; ++i) {
8044 temp0L = ((mid0 + side0) >> 1);
8045 temp1L = ((mid1 + side1) >> 1);
8046 temp2L = ((mid2 + side2) >> 1);
8047 temp3L = ((mid3 + side3) >> 1);
8049 temp0R = ((mid0 - side0) >> 1);
8050 temp1R = ((mid1 - side1) >> 1);
8051 temp2R = ((mid2 - side2) >> 1);
8052 temp3R = ((mid3 - side3) >> 1);
8054 pOutputSamples[i*8+0] = temp0L;
8055 pOutputSamples[i*8+1] = temp0R;
8056 pOutputSamples[i*8+2] = temp1L;
8057 pOutputSamples[i*8+3] = temp1R;
8058 pOutputSamples[i*8+4] = temp2L;
8059 pOutputSamples[i*8+5] = temp2R;
8060 pOutputSamples[i*8+6] = temp3L;
8061 pOutputSamples[i*8+7] = temp3R;
8065 for (i = (frameCount4 << 2); i < frameCount; ++i) {
8071 pOutputSamples[i*2+0] = ((mid + side) >> 1) << unusedBitsPerSample;
8072 pOutputSamples[i*2+1] = ((mid - side) >> 1) << unusedBitsPerSample;
8076 #if defined(DRFLAC_SUPPORT_SSE2)
8085 frameCount4 = frameCount >> 2;
8087 shift = unusedBitsPerSample;
8089 for (i = 0; i < frameCount4; ++i) {
8098 mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
8100 left = _mm_srai_epi32(_mm_add_epi32(mid, side), 1);
8101 right = _mm_srai_epi32(_mm_sub_epi32(mid, side), 1);
8103 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
8104 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
8107 for (i = (frameCount4 << 2); i < frameCount; ++i) {
8113 pOutputSamples[i*2+0] = ((mid + side) >> 1);
8114 pOutputSamples[i*2+1] = ((mid - side) >> 1);
8118 for (i = 0; i < frameCount4; ++i) {
8127 mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
8129 left = _mm_slli_epi32(_mm_add_epi32(mid, side), shift);
8130 right = _mm_slli_epi32(_mm_sub_epi32(mid, side), shift);
8132 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
8133 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
8136 for (i = (frameCount4 << 2); i < frameCount; ++i) {
8142 pOutputSamples[i*2+0] = ((mid + side) << shift);
8143 pOutputSamples[i*2+1] = ((mid - side) << shift);
8149 #if defined(DRFLAC_SUPPORT_NEON)
8155 int32x4_t wbpsShift0_4;
8156 int32x4_t wbpsShift1_4;
8161 frameCount4 = frameCount >> 2;
8165 one4 = vdupq_n_s32(1);
8167 shift = unusedBitsPerSample;
8169 for (i = 0; i < frameCount4; ++i) {
8175 mid = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), wbpsShift0_4);
8176 side = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), wbpsShift1_4);
8178 mid = vorrq_s32(vshlq_n_s32(mid, 1), vandq_s32(side, one4));
8180 left = vshrq_n_s32(vaddq_s32(mid, side), 1);
8181 right = vshrq_n_s32(vsubq_s32(mid, side), 1);
8183 drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right));
8186 for (i = (frameCount4 << 2); i < frameCount; ++i) {
8192 pOutputSamples[i*2+0] = ((mid + side) >> 1);
8193 pOutputSamples[i*2+1] = ((mid - side) >> 1);
8199 shift4 = vdupq_n_s32(shift);
8201 for (i = 0; i < frameCount4; ++i) {
8207 mid = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), wbpsShift0_4);
8208 side = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), wbpsShift1_4);
8210 mid = vorrq_s32(vshlq_n_s32(mid, 1), vandq_s32(side, one4));
8212 left = vshlq_s32(vaddq_s32(mid, side), shift4);
8213 right = vshlq_s32(vsubq_s32(mid, side), shift4);
8215 drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right));
8218 for (i = (frameCount4 << 2); i < frameCount; ++i) {
8224 pOutputSamples[i*2+0] = ((mid + side) << shift);
8225 pOutputSamples[i*2+1] = ((mid - side) << shift);
8233 #if defined(DRFLAC_SUPPORT_SSE2)
8234 if (drflac__gIsSSE2Supported && pFlac->
bitsPerSample <= 24) {
8235 drflac_read_pcm_frames_s32__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
8237 #elif defined(DRFLAC_SUPPORT_NEON)
8239 drflac_read_pcm_frames_s32__decode_mid_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
8245 drflac_read_pcm_frames_s32__decode_mid_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
8271 for (i = 0; i < frameCount4; ++i) {
8282 pOutputSamples[i*8+0] = tempL0;
8283 pOutputSamples[i*8+1] = tempR0;
8284 pOutputSamples[i*8+2] = tempL1;
8285 pOutputSamples[i*8+3] = tempR1;
8286 pOutputSamples[i*8+4] = tempL2;
8287 pOutputSamples[i*8+5] = tempR2;
8288 pOutputSamples[i*8+6] = tempL3;
8289 pOutputSamples[i*8+7] = tempR3;
8292 for (i = (frameCount4 << 2); i < frameCount; ++i) {
8293 pOutputSamples[i*2+0] = (pInputSamples0[i] << shift0);
8294 pOutputSamples[i*2+1] = (pInputSamples1[i] << shift1);
8298 #if defined(DRFLAC_SUPPORT_SSE2)
8307 for (i = 0; i < frameCount4; ++i) {
8308 __m128i left = _mm_slli_epi32(_mm_loadu_si128((
const __m128i*)pInputSamples0 + i), shift0);
8309 __m128i right = _mm_slli_epi32(_mm_loadu_si128((
const __m128i*)pInputSamples1 + i), shift1);
8311 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
8312 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
8315 for (i = (frameCount4 << 2); i < frameCount; ++i) {
8316 pOutputSamples[i*2+0] = (pInputSamples0[i] << shift0);
8317 pOutputSamples[i*2+1] = (pInputSamples1[i] << shift1);
8322 #if defined(DRFLAC_SUPPORT_NEON)
8331 int32x4_t shift4_0 = vdupq_n_s32(shift0);
8332 int32x4_t shift4_1 = vdupq_n_s32(shift1);
8334 for (i = 0; i < frameCount4; ++i) {
8338 left = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), shift4_0);
8339 right = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), shift4_1);
8341 drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right));
8344 for (i = (frameCount4 << 2); i < frameCount; ++i) {
8345 pOutputSamples[i*2+0] = (pInputSamples0[i] << shift0);
8346 pOutputSamples[i*2+1] = (pInputSamples1[i] << shift1);
8353 #if defined(DRFLAC_SUPPORT_SSE2)
8354 if (drflac__gIsSSE2Supported && pFlac->
bitsPerSample <= 24) {
8355 drflac_read_pcm_frames_s32__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
8357 #elif defined(DRFLAC_SUPPORT_NEON)
8359 drflac_read_pcm_frames_s32__decode_independent_stereo__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
8365 drflac_read_pcm_frames_s32__decode_independent_stereo__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
8378 if (pFlac ==
NULL || framesToRead == 0) {
8382 if (pBufferOut ==
NULL) {
8389 while (framesToRead > 0) {
8404 if (channelCount == 2) {
8434 for (i = 0; i < frameCountThisIteration; ++i) {
8436 for (j = 0; j < channelCount; ++j) {
8442 framesRead += frameCountThisIteration;
8443 pBufferOut += frameCountThisIteration * channelCount;
8444 framesToRead -= frameCountThisIteration;
8458 for (i = 0; i < frameCount; ++i) {
8479 for (i = 0; i < frameCount4; ++i) {
8515 for (i = (frameCount4 << 2); i < frameCount; ++i) {
8528 #if defined(DRFLAC_SUPPORT_SSE2)
8538 frameCount4 = frameCount >> 2;
8543 for (i = 0; i < frameCount4; ++i) {
8544 __m128i left = _mm_slli_epi32(_mm_loadu_si128((
const __m128i*)pInputSamples0 + i), shift0);
8545 __m128i side = _mm_slli_epi32(_mm_loadu_si128((
const __m128i*)pInputSamples1 + i), shift1);
8546 __m128i right = _mm_sub_epi32(left, side);
8548 left = _mm_srai_epi32(left, 16);
8549 right = _mm_srai_epi32(right, 16);
8551 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
8554 for (i = (frameCount4 << 2); i < frameCount; ++i) {
8568 #if defined(DRFLAC_SUPPORT_NEON)
8580 frameCount4 = frameCount >> 2;
8585 shift0_4 = vdupq_n_s32(shift0);
8586 shift1_4 = vdupq_n_s32(shift1);
8588 for (i = 0; i < frameCount4; ++i) {
8593 left = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), shift0_4);
8594 side = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), shift1_4);
8595 right = vsubq_s32(left, side);
8597 left = vshrq_n_s32(left, 16);
8598 right = vshrq_n_s32(right, 16);
8600 drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right)));
8603 for (i = (frameCount4 << 2); i < frameCount; ++i) {
8619 #if defined(DRFLAC_SUPPORT_SSE2)
8620 if (drflac__gIsSSE2Supported && pFlac->
bitsPerSample <= 24) {
8621 drflac_read_pcm_frames_s16__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
8623 #elif defined(DRFLAC_SUPPORT_NEON)
8625 drflac_read_pcm_frames_s16__decode_left_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
8631 drflac_read_pcm_frames_s16__decode_left_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
8643 for (i = 0; i < frameCount; ++i) {
8664 for (i = 0; i < frameCount4; ++i) {
8700 for (i = (frameCount4 << 2); i < frameCount; ++i) {
8713 #if defined(DRFLAC_SUPPORT_SSE2)
8723 frameCount4 = frameCount >> 2;
8728 for (i = 0; i < frameCount4; ++i) {
8729 __m128i side = _mm_slli_epi32(_mm_loadu_si128((
const __m128i*)pInputSamples0 + i), shift0);
8730 __m128i right = _mm_slli_epi32(_mm_loadu_si128((
const __m128i*)pInputSamples1 + i), shift1);
8731 __m128i left = _mm_add_epi32(right, side);
8733 left = _mm_srai_epi32(left, 16);
8734 right = _mm_srai_epi32(right, 16);
8736 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
8739 for (i = (frameCount4 << 2); i < frameCount; ++i) {
8753 #if defined(DRFLAC_SUPPORT_NEON)
8765 frameCount4 = frameCount >> 2;
8770 shift0_4 = vdupq_n_s32(shift0);
8771 shift1_4 = vdupq_n_s32(shift1);
8773 for (i = 0; i < frameCount4; ++i) {
8778 side = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), shift0_4);
8779 right = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), shift1_4);
8780 left = vaddq_s32(right, side);
8782 left = vshrq_n_s32(left, 16);
8783 right = vshrq_n_s32(right, 16);
8785 drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right)));
8788 for (i = (frameCount4 << 2); i < frameCount; ++i) {
8804 #if defined(DRFLAC_SUPPORT_SSE2)
8805 if (drflac__gIsSSE2Supported && pFlac->
bitsPerSample <= 24) {
8806 drflac_read_pcm_frames_s16__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
8808 #elif defined(DRFLAC_SUPPORT_NEON)
8810 drflac_read_pcm_frames_s16__decode_right_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
8816 drflac_read_pcm_frames_s16__decode_right_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
8833 pOutputSamples[i*2+0] = (
drflac_int16)((((mid + side) >> 1) << unusedBitsPerSample) >> 16);
8834 pOutputSamples[i*2+1] = (
drflac_int16)((((mid - side) >> 1) << unusedBitsPerSample) >> 16);
8844 int shift = unusedBitsPerSample;
8847 for (i = 0; i < frameCount4; ++i) {
8872 temp0L = ((mid0 + side0) << shift);
8873 temp1L = ((mid1 + side1) << shift);
8874 temp2L = ((mid2 + side2) << shift);
8875 temp3L = ((mid3 + side3) << shift);
8877 temp0R = ((mid0 - side0) << shift);
8878 temp1R = ((mid1 - side1) << shift);
8879 temp2R = ((mid2 - side2) << shift);
8880 temp3R = ((mid3 - side3) << shift);
8902 for (i = 0; i < frameCount4; ++i) {
8927 temp0L = ((mid0 + side0) >> 1);
8928 temp1L = ((mid1 + side1) >> 1);
8929 temp2L = ((mid2 + side2) >> 1);
8930 temp3L = ((mid3 + side3) >> 1);
8932 temp0R = ((mid0 - side0) >> 1);
8933 temp1R = ((mid1 - side1) >> 1);
8934 temp2R = ((mid2 - side2) >> 1);
8935 temp3R = ((mid3 - side3) >> 1);
8958 for (i = (frameCount4 << 2); i < frameCount; ++i) {
8964 pOutputSamples[i*2+0] = (
drflac_int16)((((mid + side) >> 1) << unusedBitsPerSample) >> 16);
8965 pOutputSamples[i*2+1] = (
drflac_int16)((((mid - side) >> 1) << unusedBitsPerSample) >> 16);
8969 #if defined(DRFLAC_SUPPORT_SSE2)
8978 frameCount4 = frameCount >> 2;
8980 shift = unusedBitsPerSample;
8982 for (i = 0; i < frameCount4; ++i) {
8991 mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
8993 left = _mm_srai_epi32(_mm_add_epi32(mid, side), 1);
8994 right = _mm_srai_epi32(_mm_sub_epi32(mid, side), 1);
8996 left = _mm_srai_epi32(left, 16);
8997 right = _mm_srai_epi32(right, 16);
8999 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
9002 for (i = (frameCount4 << 2); i < frameCount; ++i) {
9008 pOutputSamples[i*2+0] = (
drflac_int16)(((mid + side) >> 1) >> 16);
9009 pOutputSamples[i*2+1] = (
drflac_int16)(((mid - side) >> 1) >> 16);
9013 for (i = 0; i < frameCount4; ++i) {
9022 mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
9024 left = _mm_slli_epi32(_mm_add_epi32(mid, side), shift);
9025 right = _mm_slli_epi32(_mm_sub_epi32(mid, side), shift);
9027 left = _mm_srai_epi32(left, 16);
9028 right = _mm_srai_epi32(right, 16);
9030 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
9033 for (i = (frameCount4 << 2); i < frameCount; ++i) {
9039 pOutputSamples[i*2+0] = (
drflac_int16)(((mid + side) << shift) >> 16);
9040 pOutputSamples[i*2+1] = (
drflac_int16)(((mid - side) << shift) >> 16);
9046 #if defined(DRFLAC_SUPPORT_NEON)
9052 int32x4_t wbpsShift0_4;
9053 int32x4_t wbpsShift1_4;
9057 frameCount4 = frameCount >> 2;
9062 shift = unusedBitsPerSample;
9064 for (i = 0; i < frameCount4; ++i) {
9070 mid = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), wbpsShift0_4);
9071 side = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), wbpsShift1_4);
9073 mid = vorrq_s32(vshlq_n_s32(mid, 1), vandq_s32(side, vdupq_n_s32(1)));
9075 left = vshrq_n_s32(vaddq_s32(mid, side), 1);
9076 right = vshrq_n_s32(vsubq_s32(mid, side), 1);
9078 left = vshrq_n_s32(left, 16);
9079 right = vshrq_n_s32(right, 16);
9081 drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right)));
9084 for (i = (frameCount4 << 2); i < frameCount; ++i) {
9090 pOutputSamples[i*2+0] = (
drflac_int16)(((mid + side) >> 1) >> 16);
9091 pOutputSamples[i*2+1] = (
drflac_int16)(((mid - side) >> 1) >> 16);
9097 shift4 = vdupq_n_s32(shift);
9099 for (i = 0; i < frameCount4; ++i) {
9105 mid = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), wbpsShift0_4);
9106 side = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), wbpsShift1_4);
9108 mid = vorrq_s32(vshlq_n_s32(mid, 1), vandq_s32(side, vdupq_n_s32(1)));
9110 left = vshlq_s32(vaddq_s32(mid, side), shift4);
9111 right = vshlq_s32(vsubq_s32(mid, side), shift4);
9113 left = vshrq_n_s32(left, 16);
9114 right = vshrq_n_s32(right, 16);
9116 drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right)));
9119 for (i = (frameCount4 << 2); i < frameCount; ++i) {
9125 pOutputSamples[i*2+0] = (
drflac_int16)(((mid + side) << shift) >> 16);
9126 pOutputSamples[i*2+1] = (
drflac_int16)(((mid - side) << shift) >> 16);
9134 #if defined(DRFLAC_SUPPORT_SSE2)
9135 if (drflac__gIsSSE2Supported && pFlac->
bitsPerSample <= 24) {
9136 drflac_read_pcm_frames_s16__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
9138 #elif defined(DRFLAC_SUPPORT_NEON)
9140 drflac_read_pcm_frames_s16__decode_mid_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
9146 drflac_read_pcm_frames_s16__decode_mid_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
9172 for (i = 0; i < frameCount4; ++i) {
9203 for (i = (frameCount4 << 2); i < frameCount; ++i) {
9204 pOutputSamples[i*2+0] = (
drflac_int16)((pInputSamples0[i] << shift0) >> 16);
9205 pOutputSamples[i*2+1] = (
drflac_int16)((pInputSamples1[i] << shift1) >> 16);
9209 #if defined(DRFLAC_SUPPORT_SSE2)
9218 for (i = 0; i < frameCount4; ++i) {
9219 __m128i left = _mm_slli_epi32(_mm_loadu_si128((
const __m128i*)pInputSamples0 + i), shift0);
9220 __m128i right = _mm_slli_epi32(_mm_loadu_si128((
const __m128i*)pInputSamples1 + i), shift1);
9222 left = _mm_srai_epi32(left, 16);
9223 right = _mm_srai_epi32(right, 16);
9226 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
9229 for (i = (frameCount4 << 2); i < frameCount; ++i) {
9230 pOutputSamples[i*2+0] = (
drflac_int16)((pInputSamples0[i] << shift0) >> 16);
9231 pOutputSamples[i*2+1] = (
drflac_int16)((pInputSamples1[i] << shift1) >> 16);
9236 #if defined(DRFLAC_SUPPORT_NEON)
9245 int32x4_t shift0_4 = vdupq_n_s32(shift0);
9246 int32x4_t shift1_4 = vdupq_n_s32(shift1);
9248 for (i = 0; i < frameCount4; ++i) {
9252 left = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), shift0_4);
9253 right = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), shift1_4);
9255 left = vshrq_n_s32(left, 16);
9256 right = vshrq_n_s32(right, 16);
9258 drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right)));
9261 for (i = (frameCount4 << 2); i < frameCount; ++i) {
9262 pOutputSamples[i*2+0] = (
drflac_int16)((pInputSamples0[i] << shift0) >> 16);
9263 pOutputSamples[i*2+1] = (
drflac_int16)((pInputSamples1[i] << shift1) >> 16);
9270 #if defined(DRFLAC_SUPPORT_SSE2)
9271 if (drflac__gIsSSE2Supported && pFlac->
bitsPerSample <= 24) {
9272 drflac_read_pcm_frames_s16__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
9274 #elif defined(DRFLAC_SUPPORT_NEON)
9276 drflac_read_pcm_frames_s16__decode_independent_stereo__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
9282 drflac_read_pcm_frames_s16__decode_independent_stereo__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
9294 if (pFlac ==
NULL || framesToRead == 0) {
9298 if (pBufferOut ==
NULL) {
9305 while (framesToRead > 0) {
9320 if (channelCount == 2) {
9350 for (i = 0; i < frameCountThisIteration; ++i) {
9352 for (j = 0; j < channelCount; ++j) {
9354 pBufferOut[(i*channelCount)+j] = (
drflac_int16)(sampleS32 >> 16);
9359 framesRead += frameCountThisIteration;
9360 pBufferOut += frameCountThisIteration * channelCount;
9361 framesToRead -= frameCountThisIteration;
9375 for (i = 0; i < frameCount; ++i) {
9380 pOutputSamples[i*2+0] = (float)(left / 2147483648.0);
9381 pOutputSamples[i*2+1] = (float)(right / 2147483648.0);
9391 float factor = 1 / 2147483648.0;
9395 for (i = 0; i < frameCount4; ++i) {
9411 pOutputSamples[i*8+0] = left0 * factor;
9412 pOutputSamples[i*8+1] = right0 * factor;
9413 pOutputSamples[i*8+2] = left1 * factor;
9414 pOutputSamples[i*8+3] = right1 * factor;
9415 pOutputSamples[i*8+4] = left2 * factor;
9416 pOutputSamples[i*8+5] = right2 * factor;
9417 pOutputSamples[i*8+6] = left3 * factor;
9418 pOutputSamples[i*8+7] = right3 * factor;
9421 for (i = (frameCount4 << 2); i < frameCount; ++i) {
9426 pOutputSamples[i*2+0] = (float)(left * factor);
9427 pOutputSamples[i*2+1] = (float)(right * factor);
9431 #if defined(DRFLAC_SUPPORT_SSE2)
9442 frameCount4 = frameCount >> 2;
9444 factor = _mm_set1_ps(1.0f / 8388608.0f);
9448 for (i = 0; i < frameCount4; ++i) {
9449 __m128i left = _mm_slli_epi32(_mm_loadu_si128((
const __m128i*)pInputSamples0 + i), shift0);
9450 __m128i side = _mm_slli_epi32(_mm_loadu_si128((
const __m128i*)pInputSamples1 + i), shift1);
9451 __m128i right = _mm_sub_epi32(left, side);
9452 __m128 leftf = _mm_mul_ps(_mm_cvtepi32_ps(left), factor);
9453 __m128 rightf = _mm_mul_ps(_mm_cvtepi32_ps(right), factor);
9455 _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
9456 _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
9459 for (i = (frameCount4 << 2); i < frameCount; ++i) {
9464 pOutputSamples[i*2+0] = (float)(left / 8388608.0f);
9465 pOutputSamples[i*2+1] = (float)(right / 8388608.0f);
9470 #if defined(DRFLAC_SUPPORT_NEON)
9477 float32x4_t factor4;
9483 frameCount4 = frameCount >> 2;
9485 factor4 = vdupq_n_f32(1.0f / 8388608.0f);
9490 shift0_4 = vdupq_n_s32(shift0);
9491 shift1_4 = vdupq_n_s32(shift1);
9493 for (i = 0; i < frameCount4; ++i) {
9500 left = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), shift0_4);
9501 side = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), shift1_4);
9502 right = vsubq_s32(left, side);
9503 leftf = vmulq_f32(vcvtq_f32_s32(left), factor4);
9504 rightf = vmulq_f32(vcvtq_f32_s32(right), factor4);
9506 drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
9509 for (i = (frameCount4 << 2); i < frameCount; ++i) {
9514 pOutputSamples[i*2+0] = (float)(left / 8388608.0f);
9515 pOutputSamples[i*2+1] = (float)(right / 8388608.0f);
9522 #if defined(DRFLAC_SUPPORT_SSE2)
9523 if (drflac__gIsSSE2Supported && pFlac->
bitsPerSample <= 24) {
9524 drflac_read_pcm_frames_f32__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
9526 #elif defined(DRFLAC_SUPPORT_NEON)
9528 drflac_read_pcm_frames_f32__decode_left_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
9534 drflac_read_pcm_frames_f32__decode_left_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
9546 for (i = 0; i < frameCount; ++i) {
9551 pOutputSamples[i*2+0] = (float)(left / 2147483648.0);
9552 pOutputSamples[i*2+1] = (float)(right / 2147483648.0);
9562 float factor = 1 / 2147483648.0;
9566 for (i = 0; i < frameCount4; ++i) {
9582 pOutputSamples[i*8+0] = left0 * factor;
9583 pOutputSamples[i*8+1] = right0 * factor;
9584 pOutputSamples[i*8+2] = left1 * factor;
9585 pOutputSamples[i*8+3] = right1 * factor;
9586 pOutputSamples[i*8+4] = left2 * factor;
9587 pOutputSamples[i*8+5] = right2 * factor;
9588 pOutputSamples[i*8+6] = left3 * factor;
9589 pOutputSamples[i*8+7] = right3 * factor;
9592 for (i = (frameCount4 << 2); i < frameCount; ++i) {
9597 pOutputSamples[i*2+0] = (float)(left * factor);
9598 pOutputSamples[i*2+1] = (float)(right * factor);
9602 #if defined(DRFLAC_SUPPORT_SSE2)
9613 frameCount4 = frameCount >> 2;
9615 factor = _mm_set1_ps(1.0f / 8388608.0f);
9619 for (i = 0; i < frameCount4; ++i) {
9620 __m128i side = _mm_slli_epi32(_mm_loadu_si128((
const __m128i*)pInputSamples0 + i), shift0);
9621 __m128i right = _mm_slli_epi32(_mm_loadu_si128((
const __m128i*)pInputSamples1 + i), shift1);
9622 __m128i left = _mm_add_epi32(right, side);
9623 __m128 leftf = _mm_mul_ps(_mm_cvtepi32_ps(left), factor);
9624 __m128 rightf = _mm_mul_ps(_mm_cvtepi32_ps(right), factor);
9626 _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
9627 _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
9630 for (i = (frameCount4 << 2); i < frameCount; ++i) {
9635 pOutputSamples[i*2+0] = (float)(left / 8388608.0f);
9636 pOutputSamples[i*2+1] = (float)(right / 8388608.0f);
9641 #if defined(DRFLAC_SUPPORT_NEON)
9648 float32x4_t factor4;
9654 frameCount4 = frameCount >> 2;
9656 factor4 = vdupq_n_f32(1.0f / 8388608.0f);
9661 shift0_4 = vdupq_n_s32(shift0);
9662 shift1_4 = vdupq_n_s32(shift1);
9664 for (i = 0; i < frameCount4; ++i) {
9671 side = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), shift0_4);
9672 right = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), shift1_4);
9673 left = vaddq_s32(right, side);
9674 leftf = vmulq_f32(vcvtq_f32_s32(left), factor4);
9675 rightf = vmulq_f32(vcvtq_f32_s32(right), factor4);
9677 drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
9680 for (i = (frameCount4 << 2); i < frameCount; ++i) {
9685 pOutputSamples[i*2+0] = (float)(left / 8388608.0f);
9686 pOutputSamples[i*2+1] = (float)(right / 8388608.0f);
9693 #if defined(DRFLAC_SUPPORT_SSE2)
9694 if (drflac__gIsSSE2Supported && pFlac->
bitsPerSample <= 24) {
9695 drflac_read_pcm_frames_f32__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
9697 #elif defined(DRFLAC_SUPPORT_NEON)
9699 drflac_read_pcm_frames_f32__decode_right_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
9705 drflac_read_pcm_frames_f32__decode_right_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
9722 pOutputSamples[i*2+0] = (float)((((mid + side) >> 1) << (unusedBitsPerSample)) / 2147483648.0);
9723 pOutputSamples[i*2+1] = (float)((((mid - side) >> 1) << (unusedBitsPerSample)) / 2147483648.0);
9733 float factor = 1 / 2147483648.0;
9735 int shift = unusedBitsPerSample;
9738 for (i = 0; i < frameCount4; ++i) {
9763 temp0L = ((mid0 + side0) << shift);
9764 temp1L = ((mid1 + side1) << shift);
9765 temp2L = ((mid2 + side2) << shift);
9766 temp3L = ((mid3 + side3) << shift);
9768 temp0R = ((mid0 - side0) << shift);
9769 temp1R = ((mid1 - side1) << shift);
9770 temp2R = ((mid2 - side2) << shift);
9771 temp3R = ((mid3 - side3) << shift);
9773 pOutputSamples[i*8+0] = (float)(temp0L * factor);
9774 pOutputSamples[i*8+1] = (float)(temp0R * factor);
9775 pOutputSamples[i*8+2] = (float)(temp1L * factor);
9776 pOutputSamples[i*8+3] = (float)(temp1R * factor);
9777 pOutputSamples[i*8+4] = (float)(temp2L * factor);
9778 pOutputSamples[i*8+5] = (float)(temp2R * factor);
9779 pOutputSamples[i*8+6] = (float)(temp3L * factor);
9780 pOutputSamples[i*8+7] = (float)(temp3R * factor);
9783 for (i = 0; i < frameCount4; ++i) {
9808 temp0L = ((mid0 + side0) >> 1);
9809 temp1L = ((mid1 + side1) >> 1);
9810 temp2L = ((mid2 + side2) >> 1);
9811 temp3L = ((mid3 + side3) >> 1);
9813 temp0R = ((mid0 - side0) >> 1);
9814 temp1R = ((mid1 - side1) >> 1);
9815 temp2R = ((mid2 - side2) >> 1);
9816 temp3R = ((mid3 - side3) >> 1);
9818 pOutputSamples[i*8+0] = (float)(temp0L * factor);
9819 pOutputSamples[i*8+1] = (float)(temp0R * factor);
9820 pOutputSamples[i*8+2] = (float)(temp1L * factor);
9821 pOutputSamples[i*8+3] = (float)(temp1R * factor);
9822 pOutputSamples[i*8+4] = (float)(temp2L * factor);
9823 pOutputSamples[i*8+5] = (float)(temp2R * factor);
9824 pOutputSamples[i*8+6] = (float)(temp3L * factor);
9825 pOutputSamples[i*8+7] = (float)(temp3R * factor);
9829 for (i = (frameCount4 << 2); i < frameCount; ++i) {
9835 pOutputSamples[i*2+0] = (float)((((mid + side) >> 1) << unusedBitsPerSample) * factor);
9836 pOutputSamples[i*2+1] = (float)((((mid - side) >> 1) << unusedBitsPerSample) * factor);
9840 #if defined(DRFLAC_SUPPORT_SSE2)
9851 frameCount4 = frameCount >> 2;
9853 factor = 1.0f / 8388608.0f;
9854 factor128 = _mm_set1_ps(1.0f / 8388608.0f);
9856 shift = unusedBitsPerSample - 8;
9858 for (i = 0; i < frameCount4; ++i) {
9869 mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
9871 tempL = _mm_srai_epi32(_mm_add_epi32(mid, side), 1);
9872 tempR = _mm_srai_epi32(_mm_sub_epi32(mid, side), 1);
9874 leftf = _mm_mul_ps(_mm_cvtepi32_ps(tempL), factor128);
9875 rightf = _mm_mul_ps(_mm_cvtepi32_ps(tempR), factor128);
9877 _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
9878 _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
9881 for (i = (frameCount4 << 2); i < frameCount; ++i) {
9887 pOutputSamples[i*2+0] = (float)(((mid + side) >> 1) * factor);
9888 pOutputSamples[i*2+1] = (float)(((mid - side) >> 1) * factor);
9892 for (i = 0; i < frameCount4; ++i) {
9903 mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
9905 tempL = _mm_slli_epi32(_mm_add_epi32(mid, side), shift);
9906 tempR = _mm_slli_epi32(_mm_sub_epi32(mid, side), shift);
9908 leftf = _mm_mul_ps(_mm_cvtepi32_ps(tempL), factor128);
9909 rightf = _mm_mul_ps(_mm_cvtepi32_ps(tempR), factor128);
9911 _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
9912 _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
9915 for (i = (frameCount4 << 2); i < frameCount; ++i) {
9921 pOutputSamples[i*2+0] = (float)(((mid + side) << shift) * factor);
9922 pOutputSamples[i*2+1] = (float)(((mid - side) << shift) * factor);
9928 #if defined(DRFLAC_SUPPORT_NEON)
9935 float32x4_t factor4;
9942 frameCount4 = frameCount >> 2;
9944 factor = 1.0f / 8388608.0f;
9945 factor4 = vdupq_n_f32(factor);
9950 shift = unusedBitsPerSample - 8;
9952 for (i = 0; i < frameCount4; ++i) {
9958 int32x4_t mid = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), wbps0_4);
9959 int32x4_t side = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), wbps1_4);
9961 mid = vorrq_s32(vshlq_n_s32(mid, 1), vandq_s32(side, vdupq_n_s32(1)));
9963 lefti = vshrq_n_s32(vaddq_s32(mid, side), 1);
9964 righti = vshrq_n_s32(vsubq_s32(mid, side), 1);
9966 leftf = vmulq_f32(vcvtq_f32_s32(lefti), factor4);
9967 rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4);
9969 drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
9972 for (i = (frameCount4 << 2); i < frameCount; ++i) {
9978 pOutputSamples[i*2+0] = (float)(((mid + side) >> 1) * factor);
9979 pOutputSamples[i*2+1] = (float)(((mid - side) >> 1) * factor);
9983 shift4 = vdupq_n_s32(shift);
9984 for (i = 0; i < frameCount4; ++i) {
9992 mid = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), wbps0_4);
9993 side = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), wbps1_4);
9995 mid = vorrq_s32(vshlq_n_s32(mid, 1), vandq_s32(side, vdupq_n_s32(1)));
9997 lefti = vshlq_s32(vaddq_s32(mid, side), shift4);
9998 righti = vshlq_s32(vsubq_s32(mid, side), shift4);
10000 leftf = vmulq_f32(vcvtq_f32_s32(lefti), factor4);
10001 rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4);
10003 drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
10006 for (i = (frameCount4 << 2); i < frameCount; ++i) {
10012 pOutputSamples[i*2+0] = (float)(((mid + side) << shift) * factor);
10013 pOutputSamples[i*2+1] = (float)(((mid - side) << shift) * factor);
10021 #if defined(DRFLAC_SUPPORT_SSE2)
10022 if (drflac__gIsSSE2Supported && pFlac->
bitsPerSample <= 24) {
10023 drflac_read_pcm_frames_f32__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
10025 #elif defined(DRFLAC_SUPPORT_NEON)
10027 drflac_read_pcm_frames_f32__decode_mid_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
10033 drflac_read_pcm_frames_f32__decode_mid_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
10055 float factor = 1 / 2147483648.0;
10060 for (i = 0; i < frameCount4; ++i) {
10061 drflac_int32 tempL0 = pInputSamples0[i*4+0] << shift0;
10062 drflac_int32 tempL1 = pInputSamples0[i*4+1] << shift0;
10063 drflac_int32 tempL2 = pInputSamples0[i*4+2] << shift0;
10064 drflac_int32 tempL3 = pInputSamples0[i*4+3] << shift0;
10066 drflac_int32 tempR0 = pInputSamples1[i*4+0] << shift1;
10067 drflac_int32 tempR1 = pInputSamples1[i*4+1] << shift1;
10068 drflac_int32 tempR2 = pInputSamples1[i*4+2] << shift1;
10069 drflac_int32 tempR3 = pInputSamples1[i*4+3] << shift1;
10071 pOutputSamples[i*8+0] = (float)(tempL0 * factor);
10072 pOutputSamples[i*8+1] = (float)(tempR0 * factor);
10073 pOutputSamples[i*8+2] = (float)(tempL1 * factor);
10074 pOutputSamples[i*8+3] = (float)(tempR1 * factor);
10075 pOutputSamples[i*8+4] = (float)(tempL2 * factor);
10076 pOutputSamples[i*8+5] = (float)(tempR2 * factor);
10077 pOutputSamples[i*8+6] = (float)(tempL3 * factor);
10078 pOutputSamples[i*8+7] = (float)(tempR3 * factor);
10081 for (i = (frameCount4 << 2); i < frameCount; ++i) {
10082 pOutputSamples[i*2+0] = (float)((pInputSamples0[i] << shift0) * factor);
10083 pOutputSamples[i*2+1] = (float)((pInputSamples1[i] << shift1) * factor);
10087 #if defined(DRFLAC_SUPPORT_SSE2)
10093 float factor = 1.0f / 8388608.0f;
10094 __m128 factor128 = _mm_set1_ps(1.0f / 8388608.0f);
10099 for (i = 0; i < frameCount4; ++i) {
10105 lefti = _mm_slli_epi32(_mm_loadu_si128((
const __m128i*)pInputSamples0 + i), shift0);
10106 righti = _mm_slli_epi32(_mm_loadu_si128((
const __m128i*)pInputSamples1 + i), shift1);
10108 leftf = _mm_mul_ps(_mm_cvtepi32_ps(lefti), factor128);
10109 rightf = _mm_mul_ps(_mm_cvtepi32_ps(righti), factor128);
10111 _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
10112 _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
10115 for (i = (frameCount4 << 2); i < frameCount; ++i) {
10116 pOutputSamples[i*2+0] = (float)((pInputSamples0[i] << shift0) * factor);
10117 pOutputSamples[i*2+1] = (float)((pInputSamples1[i] << shift1) * factor);
10122 #if defined(DRFLAC_SUPPORT_NEON)
10128 float factor = 1.0f / 8388608.0f;
10129 float32x4_t factor4 = vdupq_n_f32(factor);
10134 int32x4_t shift0_4 = vdupq_n_s32(shift0);
10135 int32x4_t shift1_4 = vdupq_n_s32(shift1);
10137 for (i = 0; i < frameCount4; ++i) {
10141 float32x4_t rightf;
10143 lefti = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), shift0_4);
10144 righti = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), shift1_4);
10146 leftf = vmulq_f32(vcvtq_f32_s32(lefti), factor4);
10147 rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4);
10149 drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
10152 for (i = (frameCount4 << 2); i < frameCount; ++i) {
10153 pOutputSamples[i*2+0] = (float)((pInputSamples0[i] << shift0) * factor);
10154 pOutputSamples[i*2+1] = (float)((pInputSamples1[i] << shift1) * factor);
10161 #if defined(DRFLAC_SUPPORT_SSE2)
10162 if (drflac__gIsSSE2Supported && pFlac->
bitsPerSample <= 24) {
10163 drflac_read_pcm_frames_f32__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
10165 #elif defined(DRFLAC_SUPPORT_NEON)
10167 drflac_read_pcm_frames_f32__decode_independent_stereo__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
10173 drflac_read_pcm_frames_f32__decode_independent_stereo__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
10185 if (pFlac ==
NULL || framesToRead == 0) {
10189 if (pBufferOut ==
NULL) {
10196 while (framesToRead > 0) {
10211 if (channelCount == 2) {
10241 for (i = 0; i < frameCountThisIteration; ++i) {
10243 for (j = 0; j < channelCount; ++j) {
10249 framesRead += frameCountThisIteration;
10250 pBufferOut += frameCountThisIteration * channelCount;
10251 framesToRead -= frameCountThisIteration;
10263 if (pFlac ==
NULL) {
10280 if (pcmFrameIndex == 0) {
10305 if (currentFLACFramePCMFramesConsumed > offsetAbs) {
10316 #ifndef DR_FLAC_NO_OGG
10329 #if !defined(DR_FLAC_NO_CRC)
10343 return wasSuccessful;
10351 #if defined(SIZE_MAX)
10352 #define DRFLAC_SIZE_MAX SIZE_MAX
10354 #if defined(DRFLAC_64BIT)
10355 #define DRFLAC_SIZE_MAX ((drflac_uint64)0xFFFFFFFFFFFFFFFF)
10357 #define DRFLAC_SIZE_MAX 0xFFFFFFFF
10363 #define DRFLAC_DEFINE_FULL_READ_AND_CLOSE(extension, type) \
10364 static type* drflac__full_read_and_close_ ## extension (drflac* pFlac, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut)\
10366 type* pSampleData = NULL; \
10367 drflac_uint64 totalPCMFrameCount; \
10369 DRFLAC_ASSERT(pFlac != NULL); \
10371 totalPCMFrameCount = pFlac->totalPCMFrameCount; \
10373 if (totalPCMFrameCount == 0) { \
10374 type buffer[4096]; \
10375 drflac_uint64 pcmFramesRead; \
10376 size_t sampleDataBufferSize = sizeof(buffer); \
10378 pSampleData = (type*)drflac__malloc_from_callbacks(sampleDataBufferSize, &pFlac->allocationCallbacks); \
10379 if (pSampleData == NULL) { \
10383 while ((pcmFramesRead = (drflac_uint64)drflac_read_pcm_frames_##extension(pFlac, sizeof(buffer)/sizeof(buffer[0])/pFlac->channels, buffer)) > 0) { \
10384 if (((totalPCMFrameCount + pcmFramesRead) * pFlac->channels * sizeof(type)) > sampleDataBufferSize) { \
10385 type* pNewSampleData; \
10386 size_t newSampleDataBufferSize; \
10388 newSampleDataBufferSize = sampleDataBufferSize * 2; \
10389 pNewSampleData = (type*)drflac__realloc_from_callbacks(pSampleData, newSampleDataBufferSize, sampleDataBufferSize, &pFlac->allocationCallbacks); \
10390 if (pNewSampleData == NULL) { \
10391 drflac__free_from_callbacks(pSampleData, &pFlac->allocationCallbacks); \
10395 sampleDataBufferSize = newSampleDataBufferSize; \
10396 pSampleData = pNewSampleData; \
10399 DRFLAC_COPY_MEMORY(pSampleData + (totalPCMFrameCount*pFlac->channels), buffer, (size_t)(pcmFramesRead*pFlac->channels*sizeof(type))); \
10400 totalPCMFrameCount += pcmFramesRead; \
10405 DRFLAC_ZERO_MEMORY(pSampleData + (totalPCMFrameCount*pFlac->channels), (size_t)(sampleDataBufferSize - totalPCMFrameCount*pFlac->channels*sizeof(type))); \
10407 drflac_uint64 dataSize = totalPCMFrameCount*pFlac->channels*sizeof(type); \
10408 if (dataSize > DRFLAC_SIZE_MAX) { \
10412 pSampleData = (type*)drflac__malloc_from_callbacks((size_t)dataSize, &pFlac->allocationCallbacks); \
10413 if (pSampleData == NULL) { \
10417 totalPCMFrameCount = drflac_read_pcm_frames_##extension(pFlac, pFlac->totalPCMFrameCount, pSampleData); \
10420 if (sampleRateOut) *sampleRateOut = pFlac->sampleRate; \
10421 if (channelsOut) *channelsOut = pFlac->channels; \
10422 if (totalPCMFrameCountOut) *totalPCMFrameCountOut = totalPCMFrameCount; \
10424 drflac_close(pFlac); \
10425 return pSampleData; \
10428 drflac_close(pFlac); \
10443 if (sampleRateOut) {
10444 *sampleRateOut = 0;
10446 if (totalPCMFrameCountOut) {
10447 *totalPCMFrameCountOut = 0;
10450 pFlac =
drflac_open(onRead, onSeek, pUserData, pAllocationCallbacks);
10451 if (pFlac ==
NULL) {
10455 return drflac__full_read_and_close_s32(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut);
10465 if (sampleRateOut) {
10466 *sampleRateOut = 0;
10468 if (totalPCMFrameCountOut) {
10469 *totalPCMFrameCountOut = 0;
10472 pFlac =
drflac_open(onRead, onSeek, pUserData, pAllocationCallbacks);
10473 if (pFlac ==
NULL) {
10477 return drflac__full_read_and_close_s16(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut);
10487 if (sampleRateOut) {
10488 *sampleRateOut = 0;
10490 if (totalPCMFrameCountOut) {
10491 *totalPCMFrameCountOut = 0;
10494 pFlac =
drflac_open(onRead, onSeek, pUserData, pAllocationCallbacks);
10495 if (pFlac ==
NULL) {
10499 return drflac__full_read_and_close_f32(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut);
10502 #ifndef DR_FLAC_NO_STDIO
10513 if (totalPCMFrameCount) {
10514 *totalPCMFrameCount = 0;
10518 if (pFlac ==
NULL) {
10522 return drflac__full_read_and_close_s32(pFlac, channels, sampleRate, totalPCMFrameCount);
10535 if (totalPCMFrameCount) {
10536 *totalPCMFrameCount = 0;
10540 if (pFlac ==
NULL) {
10544 return drflac__full_read_and_close_s16(pFlac, channels, sampleRate, totalPCMFrameCount);
10557 if (totalPCMFrameCount) {
10558 *totalPCMFrameCount = 0;
10562 if (pFlac ==
NULL) {
10566 return drflac__full_read_and_close_f32(pFlac, channels, sampleRate, totalPCMFrameCount);
10580 if (totalPCMFrameCount) {
10581 *totalPCMFrameCount = 0;
10585 if (pFlac ==
NULL) {
10589 return drflac__full_read_and_close_s32(pFlac, channels, sampleRate, totalPCMFrameCount);
10602 if (totalPCMFrameCount) {
10603 *totalPCMFrameCount = 0;
10607 if (pFlac ==
NULL) {
10611 return drflac__full_read_and_close_s16(pFlac, channels, sampleRate, totalPCMFrameCount);
10624 if (totalPCMFrameCount) {
10625 *totalPCMFrameCount = 0;
10629 if (pFlac ==
NULL) {
10633 return drflac__full_read_and_close_f32(pFlac, channels, sampleRate, totalPCMFrameCount);
10639 if (pAllocationCallbacks !=
NULL) {
10651 if (pIter ==
NULL) {
10662 const char* pComment;
10665 if (pCommentLengthOut) {
10666 *pCommentLengthOut = 0;
10680 if (pCommentLengthOut) {
10681 *pCommentLengthOut = length;
10692 if (pIter ==
NULL) {
10703 const char* pRunningData;
10715 cuesheetTrack.
offset = offsetLo | (offsetHi << 32);
10716 cuesheetTrack.
trackNumber = pRunningData[0]; pRunningData += 1;
10718 cuesheetTrack.
isAudio = (pRunningData[0] & 0x80) != 0;
10719 cuesheetTrack.
preEmphasis = (pRunningData[0] & 0x40) != 0; pRunningData += 14;
10720 cuesheetTrack.
indexCount = pRunningData[0]; pRunningData += 1;
10726 if (pCuesheetTrack) {
10727 *pCuesheetTrack = cuesheetTrack;
10733 #if defined(__GNUC__)
10734 #pragma GCC diagnostic pop