40 #define XXH_NAMESPACE ZSTD_
41 #undef XXH_PRIVATE_API
42 #define XXH_PRIVATE_API
44 #define XXH_INLINE_ALL
45 #define ZSTD_LEGACY_SUPPORT 0
46 #ifndef __EMSCRIPTEN__
47 #define ZSTD_MULTITHREAD
51 #define ZSTD_DISABLE_ASM 1
54 #define ZSTD_DEPS_NEED_MALLOC
55 #define ZSTD_DEPS_NEED_MATH64
80 #ifndef ZSTD_DEPS_COMMON
81 #define ZSTD_DEPS_COMMON
87 #if defined(__GNUC__) && __GNUC__ >= 4
88 # define ZSTD_memcpy(d,s,l) __builtin_memcpy((d),(s),(l))
89 # define ZSTD_memmove(d,s,l) __builtin_memmove((d),(s),(l))
90 # define ZSTD_memset(p,v,l) __builtin_memset((p),(v),(l))
92 # define ZSTD_memcpy(d,s,l) memcpy((d),(s),(l))
93 # define ZSTD_memmove(d,s,l) memmove((d),(s),(l))
94 # define ZSTD_memset(p,v,l) memset((p),(v),(l))
104 #ifdef ZSTD_DEPS_NEED_MALLOC
105 #ifndef ZSTD_DEPS_MALLOC
106 #define ZSTD_DEPS_MALLOC
110 #define ZSTD_malloc(s) malloc(s)
111 #define ZSTD_calloc(n,s) calloc((n), (s))
112 #define ZSTD_free(p) free((p))
122 #ifdef ZSTD_DEPS_NEED_MATH64
123 #ifndef ZSTD_DEPS_MATH64
124 #define ZSTD_DEPS_MATH64
126 #define ZSTD_div64(dividend, divisor) ((dividend) / (divisor))
134 #ifdef ZSTD_DEPS_NEED_ASSERT
135 #ifndef ZSTD_DEPS_ASSERT
136 #define ZSTD_DEPS_ASSERT
146 #ifdef ZSTD_DEPS_NEED_IO
151 #define ZSTD_DEBUG_PRINT(...) fprintf(stderr, __VA_ARGS__)
160 #ifdef ZSTD_DEPS_NEED_STDINT
161 #ifndef ZSTD_DEPS_STDINT
162 #define ZSTD_DEPS_STDINT
224 #ifndef DEBUG_H_12987983217
225 #define DEBUG_H_12987983217
227 #if defined (__cplusplus)
235 #define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1])
242 # define DEBUGLEVEL 0
262 # define ZSTD_DEPS_NEED_ASSERT
266 # define assert(condition) ((void)0)
271 # define ZSTD_DEPS_NEED_IO
280 # define RAWLOG(l, ...) { \
281 if (l<=g_debuglevel) { \
282 ZSTD_DEBUG_PRINT(__VA_ARGS__); \
284 # define DEBUGLOG(l, ...) { \
285 if (l<=g_debuglevel) { \
286 ZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \
287 ZSTD_DEBUG_PRINT(" \n"); \
290 # define RAWLOG(l, ...) {}
291 # define DEBUGLOG(l, ...) {}
295 #if defined (__cplusplus)
336 #if defined (__cplusplus)
355 #ifndef ZSTD_COMPILER_H
356 #define ZSTD_COMPILER_H
369 #ifndef ZSTD_PORTABILITY_MACROS_H
370 #define ZSTD_PORTABILITY_MACROS_H
383 #ifndef __has_attribute
384 #define __has_attribute(x) 0
388 #ifndef __has_builtin
389 # define __has_builtin(x) 0
393 #ifndef __has_feature
394 # define __has_feature(x) 0
398 #ifndef ZSTD_MEMORY_SANITIZER
399 # if __has_feature(memory_sanitizer)
400 # define ZSTD_MEMORY_SANITIZER 1
402 # define ZSTD_MEMORY_SANITIZER 0
407 #ifndef ZSTD_ADDRESS_SANITIZER
408 # if __has_feature(address_sanitizer)
409 # define ZSTD_ADDRESS_SANITIZER 1
410 # elif defined(__SANITIZE_ADDRESS__)
411 # define ZSTD_ADDRESS_SANITIZER 1
413 # define ZSTD_ADDRESS_SANITIZER 0
418 #ifndef ZSTD_DATAFLOW_SANITIZER
419 # if __has_feature(dataflow_sanitizer)
420 # define ZSTD_DATAFLOW_SANITIZER 1
422 # define ZSTD_DATAFLOW_SANITIZER 0
428 # define ZSTD_HIDE_ASM_FUNCTION(func) .hidden func
430 # define ZSTD_HIDE_ASM_FUNCTION(func)
437 #if ((defined(__clang__) && __has_attribute(__target__)) \
438 || (defined(__GNUC__) \
439 && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
440 && (defined(__x86_64__) || defined(_M_X64)) \
441 && !defined(__BMI2__)
442 # define DYNAMIC_BMI2 1
444 # define DYNAMIC_BMI2 0
459 #if defined(__GNUC__)
460 # if defined(__linux__) || defined(__linux) || defined(__APPLE__)
461 # if ZSTD_MEMORY_SANITIZER
462 # define ZSTD_ASM_SUPPORTED 0
463 # elif ZSTD_DATAFLOW_SANITIZER
464 # define ZSTD_ASM_SUPPORTED 0
466 # define ZSTD_ASM_SUPPORTED 1
469 # define ZSTD_ASM_SUPPORTED 0
472 # define ZSTD_ASM_SUPPORTED 0
486 #if !defined(ZSTD_DISABLE_ASM) && \
487 ZSTD_ASM_SUPPORTED && \
488 defined(__x86_64__) && \
489 (DYNAMIC_BMI2 || defined(__BMI2__))
490 # define ZSTD_ENABLE_ASM_X86_64_BMI2 1
492 # define ZSTD_ENABLE_ASM_X86_64_BMI2 0
502 #if defined(__ELF__) && (defined(__x86_64__) || defined(__i386__)) \
503 && defined(__has_include)
504 # if __has_include(<cet.h>)
506 # define ZSTD_CET_ENDBRANCH _CET_ENDBR
510 #ifndef ZSTD_CET_ENDBRANCH
511 # define ZSTD_CET_ENDBRANCH
522 #if !defined(ZSTD_NO_INLINE)
523 #if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
524 # define INLINE_KEYWORD inline
526 # define INLINE_KEYWORD
529 #if defined(__GNUC__) || defined(__ICCARM__)
530 # define FORCE_INLINE_ATTR __attribute__((always_inline))
531 #elif defined(_MSC_VER)
532 # define FORCE_INLINE_ATTR __forceinline
534 # define FORCE_INLINE_ATTR
539 #define INLINE_KEYWORD
540 #define FORCE_INLINE_ATTR
549 #if defined(_MSC_VER)
550 # define WIN_CDECL __cdecl
560 #define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
572 #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5
573 # define HINT_INLINE static INLINE_KEYWORD
575 # define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
579 #if defined(__GNUC__)
580 # define UNUSED_ATTR __attribute__((unused))
587 # define FORCE_NOINLINE static __declspec(noinline)
589 # if defined(__GNUC__) || defined(__ICCARM__)
590 # define FORCE_NOINLINE static __attribute__((__noinline__))
592 # define FORCE_NOINLINE static
598 #if defined(__GNUC__) || defined(__ICCARM__)
599 # define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
601 # define TARGET_ATTRIBUTE(target)
608 #define BMI2_TARGET_ATTRIBUTE TARGET_ATTRIBUTE("lzcnt,bmi,bmi2")
612 #if defined(NO_PREFETCH)
613 # define PREFETCH_L1(ptr) (void)(ptr)
614 # define PREFETCH_L2(ptr) (void)(ptr)
616 # if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86))
617 # include <mmintrin.h>
618 # define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
619 # define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
620 # elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
621 # define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 , 3 )
622 # define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 , 2 )
623 # elif defined(__aarch64__)
624 # define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
625 # define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
627 # define PREFETCH_L1(ptr) (void)(ptr)
628 # define PREFETCH_L2(ptr) (void)(ptr)
632 #define CACHELINE_SIZE 64
634 #define PREFETCH_AREA(p, s) { \
635 const char* const _ptr = (const char*)(p); \
636 size_t const _size = (size_t)(s); \
638 for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
639 PREFETCH_L2(_ptr + _pos); \
646 #if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) && !defined(__LCC__)
647 # if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
648 # define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
650 # define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")")
653 # define DONT_VECTORIZE
661 #if defined(__GNUC__)
662 #define LIKELY(x) (__builtin_expect((x), 1))
663 #define UNLIKELY(x) (__builtin_expect((x), 0))
665 #define LIKELY(x) (x)
666 #define UNLIKELY(x) (x)
669 #if __has_builtin(__builtin_unreachable) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)))
670 # define ZSTD_UNREACHABLE { assert(0), __builtin_unreachable(); }
672 # define ZSTD_UNREACHABLE { assert(0); }
678 # pragma warning(disable : 4100)
679 # pragma warning(disable : 4127)
680 # pragma warning(disable : 4204)
681 # pragma warning(disable : 4214)
682 # pragma warning(disable : 4324)
687 # if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86))
688 # ifdef __AVX2__ //MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2
689 # define STATIC_BMI2 1
691 # elif defined(__BMI2__) && defined(__x86_64__) && defined(__GNUC__)
692 # define STATIC_BMI2 1
697 #define STATIC_BMI2 0
701 #if !defined(ZSTD_NO_INTRINSICS)
702 # if defined(__SSE2__) || defined(_M_AMD64) || (defined (_M_IX86) && defined(_M_IX86_FP) && (_M_IX86_FP >= 2))
703 # define ZSTD_ARCH_X86_SSE2
705 # if defined(__ARM_NEON) || defined(_M_ARM64)
706 # define ZSTD_ARCH_ARM_NEON
709 # if defined(ZSTD_ARCH_X86_SSE2)
710 # include <emmintrin.h>
711 # elif defined(ZSTD_ARCH_ARM_NEON)
712 # include <arm_neon.h>
717 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute)
718 # define ZSTD_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
720 # define ZSTD_HAS_C_ATTRIBUTE(x) 0
726 #if defined(__cplusplus) && defined(__has_cpp_attribute)
727 # define ZSTD_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
729 # define ZSTD_HAS_CPP_ATTRIBUTE(x) 0
737 #ifndef ZSTD_FALLTHROUGH
738 # if ZSTD_HAS_C_ATTRIBUTE(fallthrough)
739 # define ZSTD_FALLTHROUGH [[fallthrough]]
740 # elif ZSTD_HAS_CPP_ATTRIBUTE(fallthrough)
741 # define ZSTD_FALLTHROUGH [[fallthrough]]
742 # elif __has_attribute(__fallthrough__)
746 # define ZSTD_FALLTHROUGH ; __attribute__((__fallthrough__))
748 # define ZSTD_FALLTHROUGH
763 # if defined(__GNUC__) || defined(_MSC_VER)
767 # define ZSTD_ALIGNOF(T) __alignof(T)
769 # elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
771 # include <stdalign.h>
772 # define ZSTD_ALIGNOF(T) alignof(T)
776 # define ZSTD_ALIGNOF(T) (sizeof(void*) < sizeof(T) ? sizeof(void*) : sizeof(T))
788 #ifndef ZSTD_ASAN_DONT_POISON_WORKSPACE
789 #define ZSTD_ASAN_DONT_POISON_WORKSPACE 1
791 #ifndef ZSTD_MSAN_DONT_POISON_WORKSPACE
792 #define ZSTD_MSAN_DONT_POISON_WORKSPACE 1
796 #if ZSTD_MEMORY_SANITIZER && !defined(ZSTD_MSAN_DONT_POISON_WORKSPACE)
801 #define ZSTD_DEPS_NEED_STDINT
805 void __msan_unpoison(
const volatile void *a,
size_t size);
810 void __msan_poison(
const volatile void *a,
size_t size);
814 intptr_t __msan_test_shadow(
const volatile void *
x,
size_t size);
818 void __msan_print_shadow(
const volatile void *
x,
size_t size);
821 #if ZSTD_ADDRESS_SANITIZER && !defined(ZSTD_ASAN_DONT_POISON_WORKSPACE)
841 void __asan_poison_memory_region(
void const volatile *addr,
size_t size);
856 void __asan_unpoison_memory_region(
void const volatile *addr,
size_t size);
868 #if defined(_MSC_VER)
872 #if defined(__GNUC__)
873 # define MEM_STATIC static __inline __attribute__((unused))
874 #elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) )
875 # define MEM_STATIC static inline
876 #elif defined(_MSC_VER)
877 # define MEM_STATIC static __inline
879 # define MEM_STATIC static
885 #if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) ) )
887 # include <inttypes.h>
891 typedef uint8_t
BYTE;
894 typedef uint16_t
U16;
896 typedef uint32_t
U32;
898 typedef uint64_t
U64;
903 # error "this implementation requires char to be exactly 8-bit type"
906 typedef unsigned char U8;
907 typedef signed char S8;
908 #if USHRT_MAX != 65535
909 # error "this implementation requires short to be exactly 16-bit type"
911 typedef unsigned short U16;
913 #if UINT_MAX != 4294967295
914 # error "this implementation requires int to be exactly 32-bit type"
920 typedef unsigned long long U64;
921 typedef signed long long S64;
981 #ifndef MEM_FORCE_MEMORY_ACCESS
983 # define MEM_FORCE_MEMORY_ACCESS 1
992 #if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
994 #elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
996 #elif defined(__clang__) && __LITTLE_ENDIAN__
998 #elif defined(__clang__) && __BIG_ENDIAN__
1000 #elif defined(_MSC_VER) && (_M_AMD64 || _M_IX86)
1002 #elif defined(__DMC__) && defined(_M_IX86)
1005 const union {
U32 u;
BYTE c[4]; } one = { 1 };
1010 #if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
1023 #elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
1025 typedef __attribute__((aligned(1)))
U16 unalign16;
1026 typedef __attribute__((aligned(1)))
U32 unalign32;
1027 typedef __attribute__((aligned(1)))
U64 unalign64;
1028 typedef __attribute__((aligned(1)))
size_t unalignArch;
1061 size_t val;
ZSTD_memcpy(&val, memPtr,
sizeof(val));
return val;
1083 return ((
in << 24) & 0xff000000 ) |
1084 ((
in << 8) & 0x00ff0000 ) |
1085 ((
in >> 8) & 0x0000ff00 ) |
1086 ((
in >> 24) & 0x000000ff );
1091 #if defined(_MSC_VER)
1092 return _byteswap_ulong(
in);
1093 #elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \
1094 || (defined(__clang__) && __has_builtin(__builtin_bswap32))
1095 return __builtin_bswap32(
in);
1103 return ((
in << 56) & 0xff00000000000000ULL) |
1104 ((
in << 40) & 0x00ff000000000000ULL) |
1105 ((
in << 24) & 0x0000ff0000000000ULL) |
1106 ((
in << 8) & 0x000000ff00000000ULL) |
1107 ((
in >> 8) & 0x00000000ff000000ULL) |
1108 ((
in >> 24) & 0x0000000000ff0000ULL) |
1109 ((
in >> 40) & 0x000000000000ff00ULL) |
1110 ((
in >> 56) & 0x00000000000000ffULL);
1115 #if defined(_MSC_VER)
1116 return _byteswap_uint64(
in);
1117 #elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \
1118 || (defined(__clang__) && __has_builtin(__builtin_bswap64))
1119 return __builtin_bswap64(
in);
1140 const BYTE* p = (
const BYTE*)memPtr;
1141 return (
U16)(p[0] + (p[1]<<8));
1152 p[1] = (
BYTE)(val>>8);
1164 ((
BYTE*)memPtr)[2] = (
BYTE)(val>>16);
1269 #if defined (__cplusplus)
1288 #ifndef ERROR_H_MODULE
1289 #define ERROR_H_MODULE
1291 #if defined (__cplusplus)
1310 #ifndef ZSTD_ERRORS_H_398273423
1311 #define ZSTD_ERRORS_H_398273423
1313 #if defined (__cplusplus)
1322 #ifndef ZSTDERRORLIB_VISIBLE
1324 # ifdef ZSTDERRORLIB_VISIBILITY
1325 # define ZSTDERRORLIB_VISIBLE ZSTDERRORLIB_VISIBILITY
1326 # elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
1327 # define ZSTDERRORLIB_VISIBLE __attribute__ ((visibility ("default")))
1329 # define ZSTDERRORLIB_VISIBLE
1333 #ifndef ZSTDERRORLIB_HIDDEN
1334 # if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
1335 # define ZSTDERRORLIB_HIDDEN __attribute__ ((visibility ("hidden")))
1337 # define ZSTDERRORLIB_HIDDEN
1341 #if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
1342 # define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBLE
1343 #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
1344 # define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBLE
1346 # define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBLE
1409 #if defined (__cplusplus)
1423 #if defined(__GNUC__)
1424 # define ERR_STATIC static __attribute__((unused))
1425 #elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) )
1426 # define ERR_STATIC static inline
1427 #elif defined(_MSC_VER)
1428 # define ERR_STATIC static __inline
1430 # define ERR_STATIC static
1438 #define PREFIX(name) ZSTD_error_##name
1445 #define ERROR(name) ZSTD_ERROR(name)
1446 #define ZSTD_ERROR(name) ((size_t)-PREFIX(name))
1453 #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
1454 #define CHECK_F(f) { CHECK_V_F(_var_err__, f); }
1488 #define _FORCE_HAS_FORMAT_STRING(...) \
1490 _force_has_format_string(__VA_ARGS__); \
1493 #define ERR_QUOTE(str) #str
1502 #define RETURN_ERROR_IF(cond, err, ...) \
1504 RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
1505 __FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \
1506 _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
1507 RAWLOG(3, ": " __VA_ARGS__); \
1509 return ERROR(err); \
1517 #define RETURN_ERROR(err, ...) \
1519 RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
1520 __FILE__, __LINE__, ERR_QUOTE(ERROR(err))); \
1521 _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
1522 RAWLOG(3, ": " __VA_ARGS__); \
1524 return ERROR(err); \
1532 #define FORWARD_IF_ERROR(err, ...) \
1534 size_t const err_code = (err); \
1535 if (ERR_isError(err_code)) { \
1536 RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
1537 __FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \
1538 _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
1539 RAWLOG(3, ": " __VA_ARGS__); \
1545 #if defined (__cplusplus)
1551 #define FSE_STATIC_LINKING_ONLY
1567 #if defined (__cplusplus)
1584 #if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
1585 # define FSE_PUBLIC_API __attribute__ ((visibility ("default")))
1586 #elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1)
1587 # define FSE_PUBLIC_API __declspec(dllexport)
1588 #elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
1589 # define FSE_PUBLIC_API __declspec(dllimport)
1591 # define FSE_PUBLIC_API
1595 #define FSE_VERSION_MAJOR 0
1596 #define FSE_VERSION_MINOR 9
1597 #define FSE_VERSION_RELEASE 0
1599 #define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE
1600 #define FSE_QUOTE(str) #str
1601 #define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str)
1602 #define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION)
1604 #define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE)
1659 const unsigned*
count,
size_t srcSize,
unsigned maxSymbolValue,
unsigned useLowProbCount);
1671 const short* normalizedCounter,
1672 unsigned maxSymbolValue,
unsigned tableLog);
1742 unsigned* maxSymbolValuePtr,
unsigned* tableLogPtr,
1743 const void* rBuffer,
size_t rBuffSize);
1749 unsigned* maxSymbolValuePtr,
unsigned* tableLogPtr,
1750 const void* rBuffer,
size_t rBuffSize,
int bmi2);
1784 #if defined(FSE_STATIC_LINKING_ONLY) && !defined(FSE_H_FSE_STATIC_LINKING_ONLY)
1785 #define FSE_H_FSE_STATIC_LINKING_ONLY
1802 #ifndef BITSTREAM_H_MODULE
1803 #define BITSTREAM_H_MODULE
1805 #if defined (__cplusplus)
1841 static const U32 DeBruijnBytePos[32] = {0, 1, 28, 2, 29, 14, 24, 3,
1842 30, 22, 20, 15, 25, 17, 4, 8,
1843 31, 27, 13, 23, 21, 19, 16, 7,
1844 26, 12, 18, 6, 11, 5, 10, 9};
1845 return DeBruijnBytePos[((
U32) ((val & -(
S32) val) * 0x077CB531U)) >> 27];
1852 # if defined(_MSC_VER)
1853 # if STATIC_BMI2 == 1
1854 return (
unsigned)_tzcnt_u32(val);
1858 _BitScanForward(&r, val);
1865 # elif defined(__GNUC__) && (__GNUC__ >= 4)
1866 return (
unsigned)__builtin_ctz(val);
1875 static const U32 DeBruijnClz[32] = {0, 9, 1, 10, 13, 21, 2, 29,
1876 11, 14, 16, 18, 22, 25, 3, 30,
1877 8, 12, 20, 28, 15, 17, 24, 7,
1878 19, 27, 23, 6, 26, 5, 4, 31};
1884 return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27];
1891 # if defined(_MSC_VER)
1892 # if STATIC_BMI2 == 1
1893 return (
unsigned)_lzcnt_u32(val);
1897 _BitScanReverse(&r, val);
1898 return (
unsigned)(31 - r);
1904 # elif defined(__GNUC__) && (__GNUC__ >= 4)
1905 return (
unsigned)__builtin_clz(val);
1914 # if defined(_MSC_VER) && defined(_WIN64)
1915 # if STATIC_BMI2 == 1
1916 return (
unsigned)_tzcnt_u64(val);
1920 _BitScanForward64(&r, val);
1927 # elif defined(__GNUC__) && (__GNUC__ >= 4) && defined(__LP64__)
1928 return (
unsigned)__builtin_ctzll(val);
1931 U32 mostSignificantWord = (
U32)(val >> 32);
1932 U32 leastSignificantWord = (
U32)val;
1933 if (leastSignificantWord == 0) {
1945 # if defined(_MSC_VER) && defined(_WIN64)
1946 # if STATIC_BMI2 == 1
1947 return (
unsigned)_lzcnt_u64(val);
1951 _BitScanReverse64(&r, val);
1952 return (
unsigned)(63 - r);
1958 # elif defined(__GNUC__) && (__GNUC__ >= 4)
1959 return (
unsigned)(__builtin_clzll(val));
1962 U32 mostSignificantWord = (
U32)(val >> 32);
1963 U32 leastSignificantWord = (
U32)val;
1964 if (mostSignificantWord == 0) {
2004 return (value >>
count) | (
U64)(value << ((0U -
count) & 0x3F));
2011 return (value >>
count) | (
U32)(value << ((0U -
count) & 0x1F));
2018 return (value >>
count) | (
U16)(value << ((0U -
count) & 0x0F));
2028 #ifndef ZSTD_NO_INTRINSICS
2029 # if (defined(__BMI__) || defined(__BMI2__)) && defined(__GNUC__)
2030 # include <immintrin.h>
2031 # elif defined(__ICCARM__)
2032 # include <intrinsics.h>
2036 #define STREAM_ACCUMULATOR_MIN_32 25
2037 #define STREAM_ACCUMULATOR_MIN_64 57
2038 #define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))
2127 0, 1, 3, 7, 0xF, 0x1F,
2128 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF,
2129 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF,
2130 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF,
2131 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF,
2132 0x3FFFFFFF, 0x7FFFFFFF};
2133 #define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0]))
2143 void* startPtr,
size_t dstCapacity)
2156 #if defined(STATIC_BMI2) && STATIC_BMI2 == 1 && !defined(ZSTD_NO_INTRINSICS)
2157 return _bzhi_u64(bitContainer, nbBits);
2160 return bitContainer &
BIT_mask[nbBits];
2168 size_t value,
unsigned nbBits)
2181 size_t value,
unsigned nbBits)
2183 assert((value>>nbBits) == 0);
2194 size_t const nbBytes = bitC->
bitPos >> 3;
2198 bitC->
ptr += nbBytes;
2210 size_t const nbBytes = bitC->
bitPos >> 3;
2214 bitC->
ptr += nbBytes;
2227 if (bitC->
ptr >= bitC->
endPtr)
return 0;
2245 bitD->
start = (
const char*)srcBuffer;
2253 if (lastByte == 0)
return ERROR(GENERIC); }
2281 if (lastByte == 0)
return ERROR(corruption_detected);
2291 return bitContainer >>
start;
2296 U32 const regMask =
sizeof(bitContainer)*8 - 1;
2304 #if defined(__x86_64__) || defined(_M_X86)
2305 return (bitContainer >> (
start & regMask)) & ((((
U64)1) << nbBits) - 1);
2307 return (bitContainer >> (
start & regMask)) &
BIT_mask[nbBits];
2403 if (bitD->
ptr - nbBytes < bitD->
start) {
2407 bitD->
ptr -= nbBytes;
2422 #if defined (__cplusplus)
2434 #define FSE_NCOUNTBOUND 512
2435 #define FSE_BLOCKBOUND(size) ((size) + ((size)>>7) + 4 + sizeof(size_t) )
2436 #define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size))
2439 #define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<((maxTableLog)-1)) + (((maxSymbolValue)+1)*2))
2440 #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<(maxTableLog)))
2443 #define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue) (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable))
2444 #define FSE_DTABLE_SIZE(maxTableLog) (FSE_DTABLE_SIZE_U32(maxTableLog) * sizeof(FSE_DTable))
2462 #define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (((maxSymbolValue + 2) + (1ull << (tableLog)))/2 + sizeof(U64)/sizeof(U32) )
2463 #define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog))
2464 size_t FSE_buildCTable_wksp(
FSE_CTable* ct,
const short* normalizedCounter,
unsigned maxSymbolValue,
unsigned tableLog,
void* workSpace,
size_t wkspSize);
2466 #define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8)
2467 #define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned))
2471 #define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + 1 + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
2472 #define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
2473 size_t FSE_decompress_wksp_bmi2(
void*
dst,
size_t dstCapacity,
const void* cSrc,
size_t cSrcSize,
unsigned maxLog,
void* workSpace,
size_t wkspSize,
int bmi2);
2629 const void*
ptr = ct;
2632 statePtr->
value = (ptrdiff_t)1<<tableLog;
2634 statePtr->
symbolTT = ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1);
2677 return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16;
2688 U32 const threshold = (minNbBits+1) << 16;
2690 assert(accuracyLog < 31-tableLog);
2691 {
U32 const tableSize = 1 << tableLog;
2692 U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].
deltaNbBits + tableSize);
2693 U32 const normalizedDeltaFromThreshold = (deltaFromThreshold << accuracyLog) >> tableLog;
2694 U32 const bitMultiplier = 1 << accuracyLog;
2695 assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold);
2696 assert(normalizedDeltaFromThreshold <= bitMultiplier);
2697 return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold;
2718 const void*
ptr = dt;
2722 DStatePtr->
table = dt + 1;
2765 return DStatePtr->
state == 0;
2770 #ifndef FSE_COMMONDEFS_ONLY
2780 #ifndef FSE_MAX_MEMORY_USAGE
2781 # define FSE_MAX_MEMORY_USAGE 14
2783 #ifndef FSE_DEFAULT_MEMORY_USAGE
2784 # define FSE_DEFAULT_MEMORY_USAGE 13
2786 #if (FSE_DEFAULT_MEMORY_USAGE > FSE_MAX_MEMORY_USAGE)
2787 # error "FSE_DEFAULT_MEMORY_USAGE must be <= FSE_MAX_MEMORY_USAGE"
2793 #ifndef FSE_MAX_SYMBOL_VALUE
2794 # define FSE_MAX_SYMBOL_VALUE 255
2800 #define FSE_FUNCTION_TYPE BYTE
2801 #define FSE_FUNCTION_EXTENSION
2802 #define FSE_DECODE_TYPE FSE_decode_t
2811 #define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2)
2812 #define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
2813 #define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
2814 #define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
2815 #define FSE_MIN_TABLELOG 5
2817 #define FSE_TABLELOG_ABSOLUTE_MAX 15
2818 #if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
2819 # error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
2822 #define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3)
2828 #if defined (__cplusplus)
2847 #if defined (__cplusplus)
2851 #ifndef HUF_H_298734234
2852 #define HUF_H_298734234
2857 #define FSE_STATIC_LINKING_ONLY
2862 #define HUF_BLOCKSIZE_MAX (128 * 1024)
2863 size_t HUF_compressBound(size_t size);
2870 #define HUF_WORKSPACE_SIZE ((8 << 10) + 512 )
2871 #define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64))
2874 #define HUF_TABLELOG_MAX 12
2875 #define HUF_TABLELOG_DEFAULT 11
2876 #define HUF_SYMBOLVALUE_MAX 255
2878 #define HUF_TABLELOG_ABSOLUTEMAX 12
2879 #if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
2880 # error "HUF_TABLELOG_MAX is too large !"
2888 #define HUF_CTABLEBOUND 129
2889 #define HUF_BLOCKBOUND(size) (size + (size>>8) + 8)
2890 #define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size))
2895 #define HUF_CTABLE_SIZE_ST(maxSymbolValue) ((maxSymbolValue)+2)
2896 #define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_ST(maxSymbolValue) * sizeof(size_t))
2897 #define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
2898 HUF_CElt name[HUF_CTABLE_SIZE_ST(maxSymbolValue)]
2902 #define HUF_DTABLE_SIZE(maxTableLog) (1 + (1<<(maxTableLog)))
2903 #define HUF_CREATE_STATIC_DTABLEX1(DTable, maxTableLog) \
2904 HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) }
2905 #define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
2906 HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) }
2954 #define HUF_OPTIMAL_DEPTH_THRESHOLD ZSTD_btultra
2989 const void* src,
size_t srcSize,
2990 unsigned maxSymbolValue,
unsigned tableLog,
2991 void* workSpace,
size_t wkspSize,
2998 #define HUF_CTABLE_WORKSPACE_SIZE_U32 ((4 * (HUF_SYMBOLVALUE_MAX + 1)) + 192)
2999 #define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned))
3001 const unsigned*
count,
U32 maxSymbolValue,
U32 maxNbBits,
3002 void* workSpace,
size_t wkspSize);
3010 U32* rankStats,
U32* nbSymbolsPtr,
U32* tableLogPtr,
3011 const void* src,
size_t srcSize);
3018 #define HUF_READ_STATS_WORKSPACE_SIZE_U32 FSE_DECOMPRESS_WKSP_SIZE_U32(6, HUF_TABLELOG_MAX-1)
3019 #define HUF_READ_STATS_WORKSPACE_SIZE (HUF_READ_STATS_WORKSPACE_SIZE_U32 * sizeof(unsigned))
3021 U32* rankStats,
U32* nbSymbolsPtr,
U32* tableLogPtr,
3022 const void* src,
size_t srcSize,
3023 void* workspace,
size_t wkspSize,
3059 #define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9))
3060 #define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
3075 const void* src,
size_t srcSize,
3076 unsigned maxSymbolValue,
unsigned tableLog,
3077 void* workSpace,
size_t wkspSize,
3081 #ifndef HUF_FORCE_DECOMPRESS_X1
3089 #ifndef HUF_FORCE_DECOMPRESS_X2
3094 #ifndef HUF_FORCE_DECOMPRESS_X2
3097 #ifndef HUF_FORCE_DECOMPRESS_X1
3103 #if defined (__cplusplus)
3127 const void* headerBuffer,
size_t hbSize)
3129 const BYTE*
const istart = (
const BYTE*) headerBuffer;
3130 const BYTE*
const iend = istart + hbSize;
3137 unsigned charnum = 0;
3138 unsigned const maxSV1 = *maxSVPtr + 1;
3143 char buffer[8] = {0};
3145 {
size_t const countSize =
FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr,
3146 buffer,
sizeof(buffer));
3148 if (countSize > hbSize)
return ERROR(corruption_detected);
3154 ZSTD_memset(normalizedCounter, 0, (*maxSVPtr+1) *
sizeof(normalizedCounter[0]));
3160 *tableLogPtr = nbBits;
3161 remaining = (1<<nbBits)+1;
3162 threshold = 1<<nbBits;
3173 while (repeats >= 12) {
3178 bitCount -= (
int)(8 * (iend - 7 -
ip));
3185 charnum += 3 * repeats;
3186 bitStream >>= 2 * repeats;
3187 bitCount += 2 * repeats;
3190 assert((bitStream & 3) < 3);
3191 charnum += bitStream & 3;
3198 if (charnum >= maxSV1)
break;
3204 if (
LIKELY(
ip <= iend-7) || (
ip + (bitCount>>3) <= iend-4)) {
3205 assert((bitCount >> 3) <= 3);
3209 bitCount -= (
int)(8 * (iend - 4 -
ip));
3216 int const max = (2*threshold-1) - remaining;
3219 if ((bitStream & (threshold-1)) < (
U32)max) {
3220 count = bitStream & (threshold-1);
3221 bitCount += nbBits-1;
3223 count = bitStream & (2*threshold-1);
3238 normalizedCounter[charnum++] = (short)
count;
3242 if (remaining < threshold) {
3247 if (remaining <= 1)
break;
3249 threshold = 1 << (nbBits - 1);
3251 if (charnum >= maxSV1)
break;
3253 if (
LIKELY(
ip <= iend-7) || (
ip + (bitCount>>3) <= iend-4)) {
3257 bitCount -= (
int)(8 * (iend - 4 -
ip));
3263 if (remaining != 1)
return ERROR(corruption_detected);
3265 if (charnum > maxSV1)
return ERROR(maxSymbolValue_tooSmall);
3266 if (bitCount > 32)
return ERROR(corruption_detected);
3267 *maxSVPtr = charnum-1;
3269 ip += (bitCount+7)>>3;
3275 short* normalizedCounter,
unsigned* maxSVPtr,
unsigned* tableLogPtr,
3276 const void* headerBuffer,
size_t hbSize)
3278 return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
3283 short* normalizedCounter,
unsigned* maxSVPtr,
unsigned* tableLogPtr,
3284 const void* headerBuffer,
size_t hbSize)
3286 return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
3291 short* normalizedCounter,
unsigned* maxSVPtr,
unsigned* tableLogPtr,
3292 const void* headerBuffer,
size_t hbSize,
int bmi2)
3296 return FSE_readNCount_body_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
3304 short* normalizedCounter,
unsigned* maxSVPtr,
unsigned* tableLogPtr,
3305 const void* headerBuffer,
size_t hbSize)
3307 return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, 0);
3319 U32* nbSymbolsPtr,
U32* tableLogPtr,
3320 const void* src,
size_t srcSize)
3323 return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src,
srcSize, wksp,
sizeof(wksp), 0);
3328 U32* nbSymbolsPtr,
U32* tableLogPtr,
3329 const void* src,
size_t srcSize,
3330 void* workSpace,
size_t wkspSize,
3343 oSize = iSize - 127;
3344 iSize = ((oSize+1)/2);
3346 if (oSize >= hwSize)
return ERROR(corruption_detected);
3349 for (n=0; n<oSize; n+=2) {
3350 huffWeight[n] =
ip[n/2] >> 4;
3351 huffWeight[n+1] =
ip[n/2] & 15;
3363 {
U32 n;
for (n=0; n<oSize; n++) {
3365 rankStats[huffWeight[n]]++;
3366 weightTotal += (1 << huffWeight[n]) >> 1;
3368 if (weightTotal == 0)
return ERROR(corruption_detected);
3373 *tableLogPtr = tableLog;
3375 {
U32 const total = 1 << tableLog;
3376 U32 const rest = total - weightTotal;
3379 if (verif != rest)
return ERROR(corruption_detected);
3380 huffWeight[oSize] = (
BYTE)lastWeight;
3381 rankStats[lastWeight]++;
3385 if ((rankStats[1] < 2) || (rankStats[1] & 1))
return ERROR(corruption_detected);
3388 *nbSymbolsPtr = (
U32)(oSize+1);
3394 U32* nbSymbolsPtr,
U32* tableLogPtr,
3395 const void* src,
size_t srcSize,
3396 void* workSpace,
size_t wkspSize)
3398 return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src,
srcSize, workSpace, wkspSize, 0);
3403 U32* nbSymbolsPtr,
U32* tableLogPtr,
3404 const void* src,
size_t srcSize,
3405 void* workSpace,
size_t wkspSize)
3407 return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src,
srcSize, workSpace, wkspSize, 1);
3412 U32* nbSymbolsPtr,
U32* tableLogPtr,
3413 const void* src,
size_t srcSize,
3414 void* workSpace,
size_t wkspSize,
3419 return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src,
srcSize, workSpace, wkspSize);
3443 #ifdef ZSTD_STRIP_ERROR_STRINGS
3445 return "Error strings stripped";
3447 static const char*
const notErrorCode =
"Unspecified error code";
3450 case PREFIX(no_error):
return "No error detected";
3451 case PREFIX(GENERIC):
return "Error (generic)";
3452 case PREFIX(prefix_unknown):
return "Unknown frame descriptor";
3453 case PREFIX(version_unsupported):
return "Version not supported";
3454 case PREFIX(frameParameter_unsupported):
return "Unsupported frame parameter";
3455 case PREFIX(frameParameter_windowTooLarge):
return "Frame requires too much memory for decoding";
3456 case PREFIX(corruption_detected):
return "Data corruption detected";
3457 case PREFIX(checksum_wrong):
return "Restored data doesn't match checksum";
3458 case PREFIX(literals_headerWrong):
return "Header of Literals' block doesn't respect format specification";
3459 case PREFIX(parameter_unsupported):
return "Unsupported parameter";
3460 case PREFIX(parameter_combination_unsupported):
return "Unsupported combination of parameters";
3461 case PREFIX(parameter_outOfBound):
return "Parameter is out of bound";
3462 case PREFIX(init_missing):
return "Context should be init first";
3463 case PREFIX(memory_allocation):
return "Allocation error : not enough memory";
3464 case PREFIX(workSpace_tooSmall):
return "workSpace buffer is not large enough";
3465 case PREFIX(stage_wrong):
return "Operation not authorized at current processing stage";
3466 case PREFIX(tableLog_tooLarge):
return "tableLog requires too much memory : unsupported";
3467 case PREFIX(maxSymbolValue_tooLarge):
return "Unsupported max Symbol Value : too large";
3468 case PREFIX(maxSymbolValue_tooSmall):
return "Specified maxSymbolValue is too small";
3469 case PREFIX(stabilityCondition_notRespected):
return "pledged buffer stability condition is not respected";
3470 case PREFIX(dictionary_corrupted):
return "Dictionary is corrupted";
3471 case PREFIX(dictionary_wrong):
return "Dictionary mismatch";
3472 case PREFIX(dictionaryCreation_failed):
return "Cannot create Dictionary from provided samples";
3473 case PREFIX(dstSize_tooSmall):
return "Destination buffer is too small";
3474 case PREFIX(srcSize_wrong):
return "Src size is incorrect";
3475 case PREFIX(dstBuffer_null):
return "Operation on NULL destination buffer";
3476 case PREFIX(noForwardProgress_destFull):
return "Operation made no progress over multiple calls, due to output buffer being full";
3477 case PREFIX(noForwardProgress_inputEmpty):
return "Operation made no progress over multiple calls, due to input being empty";
3479 case PREFIX(frameIndex_tooLarge):
return "Frame index is too large";
3480 case PREFIX(seekableIO):
return "An I/O error occurred when reading/seeking";
3481 case PREFIX(dstBuffer_wrong):
return "Destination buffer is wrong";
3482 case PREFIX(srcBuffer_wrong):
return "Source buffer is wrong";
3483 case PREFIX(sequenceProducer_failed):
return "Block-level external sequence producer returned an error code";
3484 case PREFIX(externalSequences_invalid):
return "External sequences are not valid";
3486 default:
return notErrorCode;
3513 #define FSE_STATIC_LINKING_ONLY
3516 #define ZSTD_DEPS_NEED_MALLOC
3524 #define FSE_isError ERR_isError
3525 #define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)
3538 #ifndef FSE_FUNCTION_EXTENSION
3539 # error "FSE_FUNCTION_EXTENSION must be defined"
3541 #ifndef FSE_FUNCTION_TYPE
3542 # error "FSE_FUNCTION_TYPE must be defined"
3546 #define FSE_CAT(X,Y) X##Y
3547 #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
3548 #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
3552 void*
const tdPtr = dt+1;
3554 U16* symbolNext = (
U16*)workSpace;
3555 BYTE* spread = (
BYTE*)(symbolNext + maxSymbolValue + 1);
3557 U32 const maxSV1 = maxSymbolValue + 1;
3558 U32 const tableSize = 1 << tableLog;
3559 U32 highThreshold = tableSize-1;
3570 {
S16 const largeLimit= (
S16)(1 << (tableLog-1));
3572 for (
s=0;
s<maxSV1;
s++) {
3573 if (normalizedCounter[
s]==-1) {
3577 if (normalizedCounter[
s] >= largeLimit) DTableH.
fastMode=0;
3578 symbolNext[
s] = normalizedCounter[
s];
3584 if (highThreshold == tableSize - 1) {
3585 size_t const tableMask = tableSize-1;
3594 U64 const add = 0x0101010101010101ull;
3598 for (
s=0;
s<maxSV1; ++
s, sv += add) {
3600 int const n = normalizedCounter[
s];
3602 for (i = 8; i < n; i += 8) {
3615 size_t position = 0;
3621 for (u = 0; u <
unroll; ++u) {
3622 size_t const uPosition = (position + (u * step)) & tableMask;
3623 tableDecode[uPosition].symbol = spread[
s + u];
3625 position = (position + (
unroll * step)) & tableMask;
3630 U32 const tableMask = tableSize-1;
3632 U32 s, position = 0;
3633 for (
s=0;
s<maxSV1;
s++) {
3635 for (i=0; i<normalizedCounter[
s]; i++) {
3637 position = (position + step) & tableMask;
3638 while (position > highThreshold) position = (position + step) & tableMask;
3640 if (position!=0)
return ERROR(GENERIC);
3645 for (u=0; u<tableSize; u++) {
3647 U32 const nextState = symbolNext[symbol]++;
3649 tableDecode[u].newState = (
U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
3661 #ifndef FSE_COMMONDEFS_ONLY
3669 const void* cSrc,
size_t cSrcSize,
3675 BYTE*
const olimit = omax-3;
3687 #define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
3712 if (
op>(omax-2))
return ERROR(dstSize_tooSmall);
3719 if (
op>(omax-2))
return ERROR(dstSize_tooSmall);
3736 void*
dst,
size_t dstCapacity,
3737 const void* cSrc,
size_t cSrcSize,
3738 unsigned maxLog,
void* workSpace,
size_t wkspSize,
3741 const BYTE*
const istart = (
const BYTE*)cSrc;
3748 if (wkspSize <
sizeof(*wksp))
return ERROR(GENERIC);
3753 if (
FSE_isError(NCountLength))
return NCountLength;
3754 if (tableLog > maxLog)
return ERROR(tableLog_tooLarge);
3755 assert(NCountLength <= cSrcSize);
3757 cSrcSize -= NCountLength;
3785 BMI2_TARGET_ATTRIBUTE static size_t FSE_decompress_wksp_body_bmi2(
void*
dst,
size_t dstCapacity,
const void* cSrc,
size_t cSrcSize,
unsigned maxLog,
void* workSpace,
size_t wkspSize)
3791 size_t FSE_decompress_wksp_bmi2(
void*
dst,
size_t dstCapacity,
const void* cSrc,
size_t cSrcSize,
unsigned maxLog,
void* workSpace,
size_t wkspSize,
int bmi2)
3795 return FSE_decompress_wksp_body_bmi2(
dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
3836 #ifndef THREADING_H_938743
3837 #define THREADING_H_938743
3841 #if defined (__cplusplus)
3845 #if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
3853 #define WINVER 0x0600
3856 # undef _WIN32_WINNT
3858 #define _WIN32_WINNT 0x0600
3860 #ifndef WIN32_LEAN_AND_MEAN
3861 # define WIN32_LEAN_AND_MEAN
3865 #include <windows.h>
3867 #define ERROR(name) ZSTD_ERROR(name)
3871 #define ZSTD_pthread_mutex_t CRITICAL_SECTION
3872 #define ZSTD_pthread_mutex_init(a, b) ((void)(b), InitializeCriticalSection((a)), 0)
3873 #define ZSTD_pthread_mutex_destroy(a) DeleteCriticalSection((a))
3874 #define ZSTD_pthread_mutex_lock(a) EnterCriticalSection((a))
3875 #define ZSTD_pthread_mutex_unlock(a) LeaveCriticalSection((a))
3878 #define ZSTD_pthread_cond_t CONDITION_VARIABLE
3879 #define ZSTD_pthread_cond_init(a, b) ((void)(b), InitializeConditionVariable((a)), 0)
3880 #define ZSTD_pthread_cond_destroy(a) ((void)(a))
3881 #define ZSTD_pthread_cond_wait(a, b) SleepConditionVariableCS((a), (b), INFINITE)
3882 #define ZSTD_pthread_cond_signal(a) WakeConditionVariable((a))
3883 #define ZSTD_pthread_cond_broadcast(a) WakeAllConditionVariable((a))
3889 void* (*start_routine) (
void*),
void*
arg);
3898 #elif defined(ZSTD_MULTITHREAD)
3900 # include <pthread.h>
3904 #define ZSTD_pthread_mutex_t pthread_mutex_t
3905 #define ZSTD_pthread_mutex_init(a, b) pthread_mutex_init((a), (b))
3906 #define ZSTD_pthread_mutex_destroy(a) pthread_mutex_destroy((a))
3907 #define ZSTD_pthread_mutex_lock(a) pthread_mutex_lock((a))
3908 #define ZSTD_pthread_mutex_unlock(a) pthread_mutex_unlock((a))
3910 #define ZSTD_pthread_cond_t pthread_cond_t
3911 #define ZSTD_pthread_cond_init(a, b) pthread_cond_init((a), (b))
3912 #define ZSTD_pthread_cond_destroy(a) pthread_cond_destroy((a))
3913 #define ZSTD_pthread_cond_wait(a, b) pthread_cond_wait((a), (b))
3914 #define ZSTD_pthread_cond_signal(a) pthread_cond_signal((a))
3915 #define ZSTD_pthread_cond_broadcast(a) pthread_cond_broadcast((a))
3917 #define ZSTD_pthread_t pthread_t
3918 #define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
3919 #define ZSTD_pthread_join(a) pthread_join((a),NULL)
3929 #define ZSTD_pthread_mutex_t pthread_mutex_t*
3932 #define ZSTD_pthread_mutex_lock(a) pthread_mutex_lock(*(a))
3933 #define ZSTD_pthread_mutex_unlock(a) pthread_mutex_unlock(*(a))
3935 #define ZSTD_pthread_cond_t pthread_cond_t*
3938 #define ZSTD_pthread_cond_wait(a, b) pthread_cond_wait(*(a), *(b))
3939 #define ZSTD_pthread_cond_signal(a) pthread_cond_signal(*(a))
3940 #define ZSTD_pthread_cond_broadcast(a) pthread_cond_broadcast(*(a))
3942 #define ZSTD_pthread_t pthread_t
3943 #define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
3944 #define ZSTD_pthread_join(a) pthread_join((a),NULL)
3952 #define ZSTD_pthread_mutex_init(a, b) ((void)(a), (void)(b), 0)
3953 #define ZSTD_pthread_mutex_destroy(a) ((void)(a))
3954 #define ZSTD_pthread_mutex_lock(a) ((void)(a))
3955 #define ZSTD_pthread_mutex_unlock(a) ((void)(a))
3958 #define ZSTD_pthread_cond_init(a, b) ((void)(a), (void)(b), 0)
3959 #define ZSTD_pthread_cond_destroy(a) ((void)(a))
3960 #define ZSTD_pthread_cond_wait(a, b) ((void)(a), (void)(b))
3961 #define ZSTD_pthread_cond_signal(a) ((void)(a))
3962 #define ZSTD_pthread_cond_broadcast(a) ((void)(a))
3968 #if defined (__cplusplus)
3978 #if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
3986 #include <process.h>
3993 void* (*start_routine)(
void*);
3998 } ZSTD_thread_params_t;
4000 static unsigned __stdcall worker(
void *
arg)
4002 void* (*start_routine)(
void*);
4009 ZSTD_thread_params_t* thread_param = (ZSTD_thread_params_t*)
arg;
4010 thread_arg = thread_param->arg;
4011 start_routine = thread_param->start_routine;
4015 thread_param->initialized = 1;
4020 start_routine(thread_arg);
4026 void* (*start_routine) (
void*),
void*
arg)
4028 ZSTD_thread_params_t thread_param;
4031 thread_param.start_routine = start_routine;
4032 thread_param.arg =
arg;
4033 thread_param.initialized = 0;
4048 *
thread = (HANDLE)_beginthreadex(NULL, 0, worker, &thread_param, 0, NULL);
4057 while(!thread_param.initialized) {
4073 result = WaitForSingleObject(
thread, INFINITE);
4079 case WAIT_ABANDONED:
4082 return GetLastError();
4088 #if defined(ZSTD_MULTITHREAD) && DEBUGLEVEL >= 1 && !defined(_WIN32)
4090 #define ZSTD_DEPS_NEED_MALLOC
4095 *mutex = (pthread_mutex_t*)
ZSTD_malloc(
sizeof(pthread_mutex_t));
4098 return pthread_mutex_init(*mutex, attr);
4106 int const ret = pthread_mutex_destroy(*mutex);
4117 return pthread_cond_init(*
cond, attr);
4125 int const ret = pthread_cond_destroy(*
cond);
4160 #define ZSTD_DEPS_NEED_MALLOC
4164 #define ZSTD_STATIC_LINKING_ONLY
4166 #include "../zstd.h"
4168 #ifndef ZSTD_ALLOCATIONS_H
4169 #define ZSTD_ALLOCATIONS_H
4175 if (customMem.customAlloc)
4176 return customMem.customAlloc(customMem.opaque,
size);
4182 if (customMem.customAlloc) {
4185 void*
const ptr = customMem.customAlloc(customMem.opaque,
size);
4195 if (customMem.customFree)
4196 customMem.customFree(customMem.opaque,
ptr);
4220 #if defined (__cplusplus)
4226 #define ZSTD_STATIC_LINKING_ONLY
4292 #if defined (__cplusplus)
4300 #if defined(_MSC_VER)
4301 # pragma warning(disable : 4204)
4305 #ifdef ZSTD_MULTITHREAD
4350 if (!ctx) {
return NULL; }
4397 ZSTD_customMem customMem)
4401 if (!numThreads) {
return NULL; }
4404 if (!ctx) {
return NULL; }
4431 for (i = 0; i < numThreads; ++i) {
4462 if (!ctx) {
return; }
4488 if (ctx==NULL)
return 0;
4498 if (numThreads <= ctx->threadCapacity) {
4499 if (!numThreads)
return 1;
4505 if (!threadPool)
return 1;
4512 for (threadId = ctx->
threadCapacity; threadId < numThreads; ++threadId) {
4528 if (ctx==NULL)
return 1;
4621 assert(!ctx || ctx == &g_poolCtx);
4626 assert(!ctx || ctx == &g_poolCtx);
4647 if (ctx==NULL)
return 0;
4648 assert(ctx == &g_poolCtx);
4649 return sizeof(*ctx);
4670 #define ZSTD_DEPS_NEED_MALLOC
4683 #ifndef ZSTD_CCOMMON_H_MODULE
4684 #define ZSTD_CCOMMON_H_MODULE
4706 #ifndef ZSTD_COMMON_CPU_H
4707 #define ZSTD_COMMON_CPU_H
4732 #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
4734 __cpuid((
int*)reg, 0);
4736 int const n = reg[0];
4738 __cpuid((
int*)reg, 1);
4743 __cpuidex((
int*)reg, 7, 0);
4748 #elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__)
4767 :
"=a"(f1a),
"=c"(f1c),
"=d"(f1d)
4774 "movl %%ebx, %%eax\n\t"
4776 :
"=a"(f7b),
"=c"(f7c)
4780 #elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__)
4782 __asm__(
"cpuid" :
"=a"(n) :
"a"(0) :
"ebx",
"ecx",
"edx");
4785 __asm__(
"cpuid" :
"=a"(f1a),
"=c"(f1c),
"=d"(f1d) :
"a"(1) :
"ebx");
4790 :
"=a"(f7a),
"=b"(f7b),
"=c"(f7c)
4805 #define X(name, r, bit) \
4806 MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) { \
4807 return ((cpuid.r) & (1U << bit)) != 0; \
4811 #define C(name, bit) X(name, f1c, bit)
4842 #define D(name, bit) X(name, f1d, bit)
4875 #define B(name, bit) X(name, f7b, bit)
4901 #define C(name, bit) X(name, f7c, bit)
4913 #define ZSTD_STATIC_LINKING_ONLY
4915 #define FSE_STATIC_LINKING_ONLY
4918 #ifndef XXH_STATIC_LINKING_ONLY
4919 # define XXH_STATIC_LINKING_ONLY
4938 # define XXH_NO_XXH3
4941 #ifndef XXH_NAMESPACE
4942 # define XXH_NAMESPACE ZSTD_
4989 #if defined (__cplusplus)
5012 #if (defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)) \
5013 && !defined(XXH_INLINE_ALL_31684351384)
5015 # define XXH_INLINE_ALL_31684351384
5017 # undef XXH_STATIC_LINKING_ONLY
5018 # define XXH_STATIC_LINKING_ONLY
5020 # undef XXH_PUBLIC_API
5021 # if defined(__GNUC__)
5022 # define XXH_PUBLIC_API static __inline __attribute__((unused))
5023 # elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) )
5024 # define XXH_PUBLIC_API static inline
5025 # elif defined(_MSC_VER)
5026 # define XXH_PUBLIC_API static __inline
5029 # define XXH_PUBLIC_API static
5045 # undef XXH_versionNumber
5048 # undef XXH32_createState
5049 # undef XXH32_freeState
5051 # undef XXH32_update
5052 # undef XXH32_digest
5053 # undef XXH32_copyState
5054 # undef XXH32_canonicalFromHash
5055 # undef XXH32_hashFromCanonical
5058 # undef XXH64_createState
5059 # undef XXH64_freeState
5061 # undef XXH64_update
5062 # undef XXH64_digest
5063 # undef XXH64_copyState
5064 # undef XXH64_canonicalFromHash
5065 # undef XXH64_hashFromCanonical
5068 # undef XXH3_64bits_withSecret
5069 # undef XXH3_64bits_withSeed
5070 # undef XXH3_64bits_withSecretandSeed
5071 # undef XXH3_createState
5072 # undef XXH3_freeState
5073 # undef XXH3_copyState
5074 # undef XXH3_64bits_reset
5075 # undef XXH3_64bits_reset_withSeed
5076 # undef XXH3_64bits_reset_withSecret
5077 # undef XXH3_64bits_update
5078 # undef XXH3_64bits_digest
5079 # undef XXH3_generateSecret
5082 # undef XXH3_128bits
5083 # undef XXH3_128bits_withSeed
5084 # undef XXH3_128bits_withSecret
5085 # undef XXH3_128bits_reset
5086 # undef XXH3_128bits_reset_withSeed
5087 # undef XXH3_128bits_reset_withSecret
5088 # undef XXH3_128bits_reset_withSecretandSeed
5089 # undef XXH3_128bits_update
5090 # undef XXH3_128bits_digest
5091 # undef XXH128_isEqual
5093 # undef XXH128_canonicalFromHash
5094 # undef XXH128_hashFromCanonical
5096 # undef XXH_NAMESPACE
5099 # define XXH_NAMESPACE XXH_INLINE_
5107 # define XXH_IPREF(Id) XXH_NAMESPACE ## Id
5108 # define XXH_OK XXH_IPREF(XXH_OK)
5109 # define XXH_ERROR XXH_IPREF(XXH_ERROR)
5110 # define XXH_errorcode XXH_IPREF(XXH_errorcode)
5111 # define XXH32_canonical_t XXH_IPREF(XXH32_canonical_t)
5112 # define XXH64_canonical_t XXH_IPREF(XXH64_canonical_t)
5113 # define XXH128_canonical_t XXH_IPREF(XXH128_canonical_t)
5114 # define XXH32_state_s XXH_IPREF(XXH32_state_s)
5115 # define XXH32_state_t XXH_IPREF(XXH32_state_t)
5116 # define XXH64_state_s XXH_IPREF(XXH64_state_s)
5117 # define XXH64_state_t XXH_IPREF(XXH64_state_t)
5118 # define XXH3_state_s XXH_IPREF(XXH3_state_s)
5119 # define XXH3_state_t XXH_IPREF(XXH3_state_t)
5120 # define XXH128_hash_t XXH_IPREF(XXH128_hash_t)
5122 # undef XXHASH_H_5627135585666179
5123 # undef XXHASH_H_STATIC_13879238742
5131 #ifndef XXHASH_H_5627135585666179
5132 #define XXHASH_H_5627135585666179 1
5141 #if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
5142 # if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
5144 # define XXH_PUBLIC_API __declspec(dllexport)
5146 # define XXH_PUBLIC_API __declspec(dllimport)
5149 # define XXH_PUBLIC_API
5167 # define XXH_NAMESPACE
5168 # undef XXH_NAMESPACE
5171 #ifdef XXH_NAMESPACE
5172 # define XXH_CAT(A,B) A##B
5173 # define XXH_NAME2(A,B) XXH_CAT(A,B)
5174 # define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
5176 # define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
5177 # define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
5178 # define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
5179 # define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
5180 # define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
5181 # define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
5182 # define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
5183 # define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
5184 # define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
5186 # define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
5187 # define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
5188 # define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
5189 # define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
5190 # define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
5191 # define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
5192 # define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
5193 # define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
5194 # define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
5196 # define XXH3_64bits XXH_NAME2(XXH_NAMESPACE, XXH3_64bits)
5197 # define XXH3_64bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecret)
5198 # define XXH3_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSeed)
5199 # define XXH3_64bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecretandSeed)
5200 # define XXH3_createState XXH_NAME2(XXH_NAMESPACE, XXH3_createState)
5201 # define XXH3_freeState XXH_NAME2(XXH_NAMESPACE, XXH3_freeState)
5202 # define XXH3_copyState XXH_NAME2(XXH_NAMESPACE, XXH3_copyState)
5203 # define XXH3_64bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset)
5204 # define XXH3_64bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSeed)
5205 # define XXH3_64bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecret)
5206 # define XXH3_64bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecretandSeed)
5207 # define XXH3_64bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_update)
5208 # define XXH3_64bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_digest)
5209 # define XXH3_generateSecret XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret)
5210 # define XXH3_generateSecret_fromSeed XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret_fromSeed)
5212 # define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128)
5213 # define XXH3_128bits XXH_NAME2(XXH_NAMESPACE, XXH3_128bits)
5214 # define XXH3_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSeed)
5215 # define XXH3_128bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecret)
5216 # define XXH3_128bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecretandSeed)
5217 # define XXH3_128bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset)
5218 # define XXH3_128bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSeed)
5219 # define XXH3_128bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecret)
5220 # define XXH3_128bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecretandSeed)
5221 # define XXH3_128bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_update)
5222 # define XXH3_128bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_digest)
5223 # define XXH128_isEqual XXH_NAME2(XXH_NAMESPACE, XXH128_isEqual)
5224 # define XXH128_cmp XXH_NAME2(XXH_NAMESPACE, XXH128_cmp)
5225 # define XXH128_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH128_canonicalFromHash)
5226 # define XXH128_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH128_hashFromCanonical)
5233 #define XXH_VERSION_MAJOR 0
5234 #define XXH_VERSION_MINOR 8
5235 #define XXH_VERSION_RELEASE 1
5236 #define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
5259 #if defined(XXH_DOXYGEN)
5267 #elif !defined (__VMS) \
5268 && (defined (__cplusplus) \
5269 || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) ) )
5270 # include <stdint.h>
5274 # include <limits.h>
5275 # if UINT_MAX == 0xFFFFFFFFUL
5278 # if ULONG_MAX == 0xFFFFFFFFUL
5281 # error "unsupported platform: need a 32-bit type"
5486 unsigned char digest[4];
5513 #ifdef __has_attribute
5514 # define XXH_HAS_ATTRIBUTE(x) __has_attribute(x)
5516 # define XXH_HAS_ATTRIBUTE(x) 0
5520 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute)
5521 # define XXH_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
5523 # define XXH_HAS_C_ATTRIBUTE(x) 0
5526 #if defined(__cplusplus) && defined(__has_cpp_attribute)
5527 # define XXH_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
5529 # define XXH_HAS_CPP_ATTRIBUTE(x) 0
5538 #if XXH_HAS_C_ATTRIBUTE(x)
5539 # define XXH_FALLTHROUGH [[fallthrough]]
5540 #elif XXH_HAS_CPP_ATTRIBUTE(x)
5541 # define XXH_FALLTHROUGH [[fallthrough]]
5542 #elif XXH_HAS_ATTRIBUTE(__fallthrough__)
5543 # define XXH_FALLTHROUGH __attribute__ ((fallthrough))
5545 # define XXH_FALLTHROUGH
5554 #ifndef XXH_NO_LONG_LONG
5558 #if defined(XXH_DOXYGEN)
5565 #elif !defined (__VMS) \
5566 && (defined (__cplusplus) \
5567 || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) ) )
5568 # include <stdint.h>
5571 # include <limits.h>
5572 # if defined(__LP64__) && ULONG_MAX == 0xFFFFFFFFFFFFFFFFULL
5712 #define XXH3_SECRET_SIZE_MIN 136
5859 #if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742)
5860 #define XXHASH_H_STATIC_13879238742
5897 #ifndef XXH_NO_LONG_LONG
5923 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
5924 # include <stdalign.h>
5925 # define XXH_ALIGN(n) alignas(n)
5926 #elif defined(__cplusplus) && (__cplusplus >= 201103L)
5928 # define XXH_ALIGN(n) alignas(n)
5929 #elif defined(__GNUC__)
5930 # define XXH_ALIGN(n) __attribute__ ((aligned(n)))
5931 #elif defined(_MSC_VER)
5932 # define XXH_ALIGN(n) __declspec(align(n))
5934 # define XXH_ALIGN(n)
5938 #if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) \
5939 && ! (defined(__cplusplus) && (__cplusplus >= 201103L)) \
5940 && defined(__GNUC__)
5941 # define XXH_ALIGN_MEMBER(align, type) type XXH_ALIGN(align)
5943 # define XXH_ALIGN_MEMBER(align, type) XXH_ALIGN(align) type
5953 #define XXH3_INTERNALBUFFER_SIZE 256
5962 #define XXH3_SECRET_DEFAULT_SIZE 192
5989 XXH_ALIGN_MEMBER(64,
unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]);
5991 XXH_ALIGN_MEMBER(64,
unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]);
5997 size_t nbStripesSoFar;
6001 size_t nbStripesPerBlock;
6009 const unsigned char* extSecret;
6015 #undef XXH_ALIGN_MEMBER
6028 #define XXH3_INITSTATE(XXH3_state_ptr) { (XXH3_state_ptr)->seed = 0; }
6115 const void* secret,
size_t secretSize,
6120 const void* secret,
size_t secretSize,
6125 const void* secret,
size_t secretSize,
6130 const void* secret,
size_t secretSize,
6136 #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
6137 # define XXH_IMPLEMENTATION
6170 #if ( defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) \
6171 || defined(XXH_IMPLEMENTATION) ) && !defined(XXH_IMPLEM_13a8737387)
6172 # define XXH_IMPLEM_13a8737387
6190 # define XXH_NO_LONG_LONG
6191 # undef XXH_NO_LONG_LONG
6242 # define XXH_FORCE_MEMORY_ACCESS 0
6270 # define XXH_FORCE_ALIGN_CHECK 0
6292 # define XXH_NO_INLINE_HINTS 0
6304 # define XXH32_ENDJMP 0
6313 # define XXH_OLD_NAMES
6314 # undef XXH_OLD_NAMES
6320 #ifndef XXH_FORCE_MEMORY_ACCESS
6322 # if !defined(__clang__) && \
6324 (defined(__INTEL_COMPILER) && !defined(_WIN32)) || \
6326 defined(__GNUC__) && ( \
6327 (defined(__ARM_ARCH) && __ARM_ARCH >= 7) || \
6329 defined(__mips__) && \
6330 (__mips <= 5 || __mips_isa_rev < 6) && \
6331 (!defined(__mips16) || defined(__mips_mips16e2)) \
6336 # define XXH_FORCE_MEMORY_ACCESS 1
6340 #ifndef XXH_FORCE_ALIGN_CHECK
6341 # if defined(__i386) || defined(__x86_64__) || defined(__aarch64__) \
6342 || defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64)
6343 # define XXH_FORCE_ALIGN_CHECK 0
6345 # define XXH_FORCE_ALIGN_CHECK 1
6349 #ifndef XXH_NO_INLINE_HINTS
6350 # if defined(__OPTIMIZE_SIZE__) \
6351 || defined(__NO_INLINE__)
6352 # define XXH_NO_INLINE_HINTS 1
6354 # define XXH_NO_INLINE_HINTS 0
6358 #ifndef XXH32_ENDJMP
6360 # define XXH32_ENDJMP 0
6374 #define ZSTD_DEPS_NEED_MALLOC
6385 # pragma warning(disable : 4127)
6388 #if XXH_NO_INLINE_HINTS
6389 # if defined(__GNUC__) || defined(__clang__)
6390 # define XXH_FORCE_INLINE static __attribute__((unused))
6392 # define XXH_FORCE_INLINE static
6394 # define XXH_NO_INLINE static
6396 #elif defined(__GNUC__) || defined(__clang__)
6397 # define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused))
6398 # define XXH_NO_INLINE static __attribute__((noinline))
6399 #elif defined(_MSC_VER)
6400 # define XXH_FORCE_INLINE static __forceinline
6401 # define XXH_NO_INLINE static __declspec(noinline)
6402 #elif defined (__cplusplus) \
6403 || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L))
6404 # define XXH_FORCE_INLINE static inline
6405 # define XXH_NO_INLINE static
6407 # define XXH_FORCE_INLINE static
6408 # define XXH_NO_INLINE static
6424 #ifndef XXH_DEBUGLEVEL
6426 # define XXH_DEBUGLEVEL DEBUGLEVEL
6428 # define XXH_DEBUGLEVEL 0
6432 #if (XXH_DEBUGLEVEL>=1)
6434 # define XXH_ASSERT(c) assert(c)
6436 # define XXH_ASSERT(c) ((void)0)
6440 #ifndef XXH_STATIC_ASSERT
6441 # if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
6443 # define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0)
6444 # elif defined(__cplusplus) && (__cplusplus >= 201103L)
6445 # define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0)
6447 # define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { struct xxh_sa { char x[(c) ? 1 : -1]; }; } while(0)
6449 # define XXH_STATIC_ASSERT(c) XXH_STATIC_ASSERT_WITH_MESSAGE((c),#c)
6468 #if defined(__GNUC__) || defined(__clang__)
6469 # define XXH_COMPILER_GUARD(var) __asm__ __volatile__("" : "+r" (var))
6471 # define XXH_COMPILER_GUARD(var) ((void)0)
6477 #if !defined (__VMS) \
6478 && (defined (__cplusplus) \
6479 || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) ) )
6480 # include <stdint.h>
6487 #ifdef XXH_OLD_NAMES
6488 # define BYTE xxh_u8
6490 # define U32 xxh_u32
6545 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
6550 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
6558 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
6566 #ifdef XXH_OLD_NAMES
6567 typedef union {
xxh_u32 u32; } __attribute__((packed)) unalign;
6571 typedef union {
xxh_u32 u32; } __attribute__((packed)) xxh_unalign;
6572 return ((
const xxh_unalign*)
ptr)->u32;
6609 #ifndef XXH_CPU_LITTLE_ENDIAN
6614 # if defined(_WIN32) \
6615 || defined(__LITTLE_ENDIAN__) \
6616 || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
6617 # define XXH_CPU_LITTLE_ENDIAN 1
6618 # elif defined(__BIG_ENDIAN__) \
6619 || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
6620 # define XXH_CPU_LITTLE_ENDIAN 0
6637 # define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian()
6647 #define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
6649 #ifdef __has_builtin
6650 # define XXH_HAS_BUILTIN(x) __has_builtin(x)
6652 # define XXH_HAS_BUILTIN(x) 0
6668 #if !defined(NO_CLANG_BUILTIN) && XXH_HAS_BUILTIN(__builtin_rotateleft32) \
6669 && XXH_HAS_BUILTIN(__builtin_rotateleft64)
6670 # define XXH_rotl32 __builtin_rotateleft32
6671 # define XXH_rotl64 __builtin_rotateleft64
6673 #elif defined(_MSC_VER)
6674 # define XXH_rotl32(x,r) _rotl(x,r)
6675 # define XXH_rotl64(x,r) _rotl64(x,r)
6677 # define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
6678 # define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))
6689 #if defined(_MSC_VER)
6690 # define XXH_swap32 _byteswap_ulong
6691 #elif XXH_GCC_VERSION >= 403
6692 # define XXH_swap32 __builtin_bswap32
6696 return ((
x << 24) & 0xff000000 ) |
6697 ((
x << 8) & 0x00ff0000 ) |
6698 ((
x >> 8) & 0x0000ff00 ) |
6699 ((
x >> 24) & 0x000000ff );
6722 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
6730 | ((
xxh_u32)bytePtr[3] << 24);
6739 | ((
xxh_u32)bytePtr[0] << 24);
6782 #define XXH_PRIME32_1 0x9E3779B1U
6783 #define XXH_PRIME32_2 0x85EBCA77U
6784 #define XXH_PRIME32_3 0xC2B2AE3DU
6785 #define XXH_PRIME32_4 0x27D4EB2FU
6786 #define XXH_PRIME32_5 0x165667B1U
6788 #ifdef XXH_OLD_NAMES
6789 # define PRIME32_1 XXH_PRIME32_1
6790 # define PRIME32_2 XXH_PRIME32_2
6791 # define PRIME32_3 XXH_PRIME32_3
6792 # define PRIME32_4 XXH_PRIME32_4
6793 # define PRIME32_5 XXH_PRIME32_5
6812 #if (defined(__SSE4_1__) || defined(__aarch64__)) && !defined(XXH_ENABLE_AUTOVECTORIZE)
6871 #define XXH_get32bits(p) XXH_readLE32_align(p, align)
6890 #define XXH_PROCESS1 do { \
6891 h32 += (*ptr++) * XXH_PRIME32_5; \
6892 h32 = XXH_rotl32(h32, 11) * XXH_PRIME32_1; \
6895 #define XXH_PROCESS4 do { \
6896 h32 += XXH_get32bits(ptr) * XXH_PRIME32_3; \
6898 h32 = XXH_rotl32(h32, 17) * XXH_PRIME32_4; \
6960 #ifdef XXH_OLD_NAMES
6961 # define PROCESS1 XXH_PROCESS1
6962 # define PROCESS4 XXH_PROCESS4
6964 # undef XXH_PROCESS1
6965 # undef XXH_PROCESS4
6984 const xxh_u8*
const bEnd = input + len;
6985 const xxh_u8*
const limit = bEnd - 15;
6996 }
while (input < limit);
7020 if ((((
size_t)input) & 3) == 0) {
7048 XXH_memcpy(dstState, srcState,
sizeof(*dstState));
7055 memset(statePtr, 0,
sizeof(*statePtr));
7058 statePtr->
v[2] = seed + 0;
7074 const xxh_u8*
const bEnd = p + len;
7079 if (
state->memsize + len < 16) {
7085 if (
state->memsize) {
7093 p += 16-
state->memsize;
7098 const xxh_u8*
const limit = bEnd - 16;
7111 state->memsize = (unsigned)(bEnd-p);
7124 if (
state->large_len) {
7133 h32 +=
state->total_len_32;
7168 #ifndef XXH_NO_LONG_LONG
7182 #ifdef XXH_OLD_NAMES
7183 # define U64 xxh_u64
7186 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
7191 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
7196 return *(
const xxh_u64*) memPtr;
7199 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
7207 #ifdef XXH_OLD_NAMES
7208 typedef union {
xxh_u32 u32;
xxh_u64 u64; } __attribute__((packed)) unalign64;
7212 typedef union {
xxh_u32 u32;
xxh_u64 u64; } __attribute__((packed)) xxh_unalign64;
7213 return ((
const xxh_unalign64*)
ptr)->u64;
7231 #if defined(_MSC_VER)
7232 # define XXH_swap64 _byteswap_uint64
7233 #elif XXH_GCC_VERSION >= 403
7234 # define XXH_swap64 __builtin_bswap64
7238 return ((
x << 56) & 0xff00000000000000ULL) |
7239 ((
x << 40) & 0x00ff000000000000ULL) |
7240 ((
x << 24) & 0x0000ff0000000000ULL) |
7241 ((
x << 8) & 0x000000ff00000000ULL) |
7242 ((
x >> 8) & 0x00000000ff000000ULL) |
7243 ((
x >> 24) & 0x0000000000ff0000ULL) |
7244 ((
x >> 40) & 0x000000000000ff00ULL) |
7245 ((
x >> 56) & 0x00000000000000ffULL);
7251 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
7263 | ((
xxh_u64)bytePtr[7] << 56);
7276 | ((
xxh_u64)bytePtr[0] << 56);
7309 #define XXH_PRIME64_1 0x9E3779B185EBCA87ULL
7310 #define XXH_PRIME64_2 0xC2B2AE3D27D4EB4FULL
7311 #define XXH_PRIME64_3 0x165667B19E3779F9ULL
7312 #define XXH_PRIME64_4 0x85EBCA77C2B2AE63ULL
7313 #define XXH_PRIME64_5 0x27D4EB2F165667C5ULL
7315 #ifdef XXH_OLD_NAMES
7316 # define PRIME64_1 XXH_PRIME64_1
7317 # define PRIME64_2 XXH_PRIME64_2
7318 # define PRIME64_3 XXH_PRIME64_3
7319 # define PRIME64_4 XXH_PRIME64_4
7320 # define PRIME64_5 XXH_PRIME64_5
7350 #define XXH_get64bits(p) XXH_readLE64_align(p, align)
7378 #ifdef XXH_OLD_NAMES
7379 # define PROCESS1_64 XXH_PROCESS1_64
7380 # define PROCESS4_64 XXH_PROCESS4_64
7381 # define PROCESS8_64 XXH_PROCESS8_64
7383 # undef XXH_PROCESS1_64
7384 # undef XXH_PROCESS4_64
7385 # undef XXH_PROCESS8_64
7395 const xxh_u8*
const bEnd = input + len;
7396 const xxh_u8*
const limit = bEnd - 31;
7407 }
while (input<limit);
7436 if ((((
size_t)input) & 7)==0) {
7462 XXH_memcpy(dstState, srcState,
sizeof(*dstState));
7469 memset(statePtr, 0,
sizeof(*statePtr));
7472 statePtr->
v[2] = seed + 0;
7487 const xxh_u8*
const bEnd = p + len;
7489 state->total_len += len;
7491 if (
state->memsize + len < 32) {
7497 if (
state->memsize) {
7503 p += 32 -
state->memsize;
7508 const xxh_u8*
const limit = bEnd - 32;
7521 state->memsize = (unsigned)(bEnd-p);
7534 if (
state->total_len >= 32) {
7581 #if ((defined(sun) || defined(__sun)) && __cplusplus)
7582 # define XXH_RESTRICT
7583 #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
7584 # define XXH_RESTRICT restrict
7587 # define XXH_RESTRICT
7590 #if (defined(__GNUC__) && (__GNUC__ >= 3)) \
7591 || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) \
7592 || defined(__clang__)
7593 # define XXH_likely(x) __builtin_expect(x, 1)
7594 # define XXH_unlikely(x) __builtin_expect(x, 0)
7596 # define XXH_likely(x) (x)
7597 # define XXH_unlikely(x) (x)
7600 #if defined(__GNUC__) || defined(__clang__)
7601 # if defined(__ARM_NEON__) || defined(__ARM_NEON) \
7602 || defined(__aarch64__) || defined(_M_ARM) \
7603 || defined(_M_ARM64) || defined(_M_ARM64EC)
7604 # define inline __inline__
7605 # include <arm_neon.h>
7607 # elif defined(__AVX2__)
7608 # include <immintrin.h>
7609 # elif defined(__SSE2__)
7610 # include <emmintrin.h>
7614 #if defined(_MSC_VER)
7615 # include <intrin.h>
7687 #if defined(__thumb__) && !defined(__thumb2__) && defined(__ARM_ARCH_ISA_ARM)
7688 # warning "XXH3 is highly inefficient without ARM or Thumb-2."
7706 # define XXH_VECTOR XXH_SCALAR
7716 enum XXH_VECTOR_TYPE {
7738 # define XXH_ACC_ALIGN 8
7743 # define XXH_SCALAR 0
7746 # define XXH_AVX512 3
7753 defined(__ARM_NEON__) || defined(__ARM_NEON) \
7754 || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) \
7756 defined(_WIN32) || defined(__LITTLE_ENDIAN__) \
7757 || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \
7759 # define XXH_VECTOR XXH_NEON
7760 # elif defined(__AVX512F__)
7761 # define XXH_VECTOR XXH_AVX512
7762 # elif defined(__AVX2__)
7763 # define XXH_VECTOR XXH_AVX2
7764 # elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2))
7765 # define XXH_VECTOR XXH_SSE2
7766 # elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) \
7767 || (defined(__s390x__) && defined(__VEC__)) \
7768 && defined(__GNUC__)
7769 # define XXH_VECTOR XXH_VSX
7771 # define XXH_VECTOR XXH_SCALAR
7779 #ifndef XXH_ACC_ALIGN
7780 # if defined(XXH_X86DISPATCH)
7781 # define XXH_ACC_ALIGN 64
7782 # elif XXH_VECTOR == XXH_SCALAR
7783 # define XXH_ACC_ALIGN 8
7784 # elif XXH_VECTOR == XXH_SSE2
7785 # define XXH_ACC_ALIGN 16
7786 # elif XXH_VECTOR == XXH_AVX2
7787 # define XXH_ACC_ALIGN 32
7788 # elif XXH_VECTOR == XXH_NEON
7789 # define XXH_ACC_ALIGN 16
7790 # elif XXH_VECTOR == XXH_VSX
7791 # define XXH_ACC_ALIGN 16
7792 # elif XXH_VECTOR == XXH_AVX512
7793 # define XXH_ACC_ALIGN 64
7797 #if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 \
7798 || XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX512
7799 # define XXH_SEC_ALIGN XXH_ACC_ALIGN
7801 # define XXH_SEC_ALIGN 8
7825 #if XXH_VECTOR == XXH_AVX2 \
7826 && defined(__GNUC__) && !defined(__clang__) \
7827 && defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__)
7828 # pragma GCC push_options
7829 # pragma GCC optimize("-O2")
7833 #if XXH_VECTOR == XXH_NEON
7914 # if !defined(XXH_NO_VZIP_HACK) \
7915 && (defined(__GNUC__) || defined(__clang__)) \
7916 && (defined(__arm__) || defined(__thumb__) || defined(_M_ARM))
7917 # define XXH_SPLIT_IN_PLACE(in, outLo, outHi) \
7922 __asm__("vzip.32 %e0, %f0" : "+w" (in)); \
7923 (outLo) = vget_low_u32 (vreinterpretq_u32_u64(in)); \
7924 (outHi) = vget_high_u32(vreinterpretq_u32_u64(in)); \
7927 # define XXH_SPLIT_IN_PLACE(in, outLo, outHi) \
7929 (outLo) = vmovn_u64 (in); \
7930 (outHi) = vshrn_n_u64 ((in), 32); \
7971 # ifndef XXH3_NEON_LANES
7972 # if (defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) || defined(_M_ARM64EC)) \
7973 && !defined(__OPTIMIZE_SIZE__)
7974 # define XXH3_NEON_LANES 6
7976 # define XXH3_NEON_LANES XXH_ACC_NB
7989 #if XXH_VECTOR == XXH_VSX
7990 # if defined(__s390x__)
7991 # include <s390intrin.h>
8001 # if defined(__GNUC__) && !defined(__APPLE_ALTIVEC__)
8002 # define __APPLE_ALTIVEC__
8004 # include <altivec.h>
8007 typedef __vector
unsigned long long xxh_u64x2;
8008 typedef __vector
unsigned char xxh_u8x16;
8009 typedef __vector
unsigned xxh_u32x4;
8012 # if defined(__BIG_ENDIAN__) \
8013 || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
8014 # define XXH_VSX_BE 1
8015 # elif defined(__VEC_ELEMENT_REG_ORDER__) && __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__
8016 # warning "-maltivec=be is not recommended. Please use native endianness."
8017 # define XXH_VSX_BE 1
8019 # define XXH_VSX_BE 0
8024 # if defined(__POWER9_VECTOR__) || (defined(__clang__) && defined(__s390x__))
8025 # define XXH_vec_revb vec_revb
8032 xxh_u8x16
const vByteSwap = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
8033 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 };
8034 return vec_perm(val, val, vByteSwap);
8058 # if defined(__s390x__)
8060 # define XXH_vec_mulo vec_mulo
8061 # define XXH_vec_mule vec_mule
8062 # elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw)
8064 # define XXH_vec_mulo __builtin_altivec_vmulouw
8065 # define XXH_vec_mule __builtin_altivec_vmuleuw
8072 __asm__(
"vmulouw %0, %1, %2" :
"=v" (result) :
"v" (a),
"v" (b));
8078 __asm__(
"vmuleuw %0, %1, %2" :
"=v" (result) :
"v" (a),
"v" (b));
8087 #if defined(XXH_NO_PREFETCH)
8088 # define XXH_PREFETCH(ptr) (void)(ptr)
8090 # if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
8091 # include <mmintrin.h>
8092 # define XXH_PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
8093 # elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
8094 # define XXH_PREFETCH(ptr) __builtin_prefetch((ptr), 0 , 3 )
8096 # define XXH_PREFETCH(ptr) (void)(ptr)
8105 #define XXH_SECRET_DEFAULT_SIZE 192
8107 #if (XXH_SECRET_DEFAULT_SIZE < XXH3_SECRET_SIZE_MIN)
8108 # error "default keyset is not large enough"
8112 XXH_ALIGN(64) static const
xxh_u8 XXH3_kSecret[XXH_SECRET_DEFAULT_SIZE] = {
8113 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
8114 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
8115 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
8116 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
8117 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
8118 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
8119 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
8120 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
8121 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
8122 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
8123 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
8124 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
8128 #ifdef XXH_OLD_NAMES
8129 # define kSecret XXH3_kSecret
8152 return (
x & 0xFFFFFFFF) * (
y & 0xFFFFFFFF);
8154 #elif defined(_MSC_VER) && defined(_M_IX86)
8155 # define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y))
8164 # define XXH_mult32to64(x, y) ((xxh_u64)(xxh_u32)(x) * (xxh_u64)(xxh_u32)(y))
8194 #if (defined(__GNUC__) || defined(__clang__)) && !defined(__wasm__) \
8195 && defined(__SIZEOF_INT128__) \
8196 || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
8198 __uint128_t
const product = (__uint128_t)lhs * (__uint128_t)rhs;
8200 r128.low64 = (
xxh_u64)(product);
8201 r128.high64 = (
xxh_u64)(product >> 64);
8211 #elif (defined(_M_X64) || defined(_M_IA64)) && !defined(_M_ARM64EC)
8214 # pragma intrinsic(_umul128)
8217 xxh_u64 const product_low = _umul128(lhs, rhs, &product_high);
8219 r128.low64 = product_low;
8220 r128.high64 = product_high;
8228 #elif defined(_M_ARM64) || defined(_M_ARM64EC)
8231 # pragma intrinsic(__umulh)
8234 r128.low64 = lhs * rhs;
8235 r128.high64 = __umulh(lhs, rhs);
8283 xxh_u64 const lo_lo = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs & 0xFFFFFFFF);
8284 xxh_u64 const hi_lo = XXH_mult32to64(lhs >> 32, rhs & 0xFFFFFFFF);
8285 xxh_u64 const lo_hi = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs >> 32);
8286 xxh_u64 const hi_hi = XXH_mult32to64(lhs >> 32, rhs >> 32);
8289 xxh_u64 const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi;
8290 xxh_u64 const upper = (hi_lo >> 32) + (cross >> 32) + hi_hi;
8291 xxh_u64 const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF);
8295 r128.high64 = upper;
8314 return product.low64 ^ product.high64;
8321 return v64 ^ (v64 >> shift);
8330 h64 = XXH_xorshift64(h64, 37);
8331 h64 *= 0x165667919E3779F9ULL;
8332 h64 = XXH_xorshift64(h64, 32);
8345 h64 *= 0x9FB21C651E98DF25ULL;
8346 h64 ^= (h64 >> 35) + len ;
8347 h64 *= 0x9FB21C651E98DF25ULL;
8348 return XXH_xorshift64(h64, 28);
8396 {
xxh_u8 const c1 = input[0];
8397 xxh_u8 const c2 = input[len >> 1];
8398 xxh_u8 const c3 = input[len - 1];
8418 xxh_u64 const keyed = input64 ^ bitflip;
8419 return XXH3_rrmxmx(keyed, len);
8435 + XXH3_mul128_fold64(input_lo, input_hi);
8436 return XXH3_avalanche(acc);
8444 {
if (XXH_likely(len > 8))
return XXH3_len_9to16_64b(input, len, secret, seed);
8445 if (XXH_likely(len >= 4))
return XXH3_len_4to8_64b(input, len, secret, seed);
8446 if (len)
return XXH3_len_1to3_64b(input, len, secret, seed);
8480 #if defined(__GNUC__) && !defined(__clang__) \
8481 && defined(__i386__) && defined(__SSE2__) \
8482 && !defined(XXH_ENABLE_AUTOVECTORIZE)
8502 return XXH3_mul128_fold64(
8511 XXH3_len_17to128_64b(
const xxh_u8* XXH_RESTRICT input,
size_t len,
8512 const xxh_u8* XXH_RESTRICT secret,
size_t secretSize,
8515 XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (
void)secretSize;
8522 acc += XXH3_mix16B(input+48, secret+96, seed);
8523 acc += XXH3_mix16B(input+len-64, secret+112, seed);
8525 acc += XXH3_mix16B(input+32, secret+64, seed);
8526 acc += XXH3_mix16B(input+len-48, secret+80, seed);
8528 acc += XXH3_mix16B(input+16, secret+32, seed);
8529 acc += XXH3_mix16B(input+len-32, secret+48, seed);
8531 acc += XXH3_mix16B(input+0, secret+0, seed);
8532 acc += XXH3_mix16B(input+len-16, secret+16, seed);
8534 return XXH3_avalanche(acc);
8538 #define XXH3_MIDSIZE_MAX 240
8541 XXH3_len_129to240_64b(
const xxh_u8* XXH_RESTRICT input,
size_t len,
8542 const xxh_u8* XXH_RESTRICT secret,
size_t secretSize,
8545 XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (
void)secretSize;
8546 XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
8548 #define XXH3_MIDSIZE_STARTOFFSET 3
8549 #define XXH3_MIDSIZE_LASTOFFSET 17
8552 int const nbRounds = (
int)len / 16;
8554 for (i=0; i<8; i++) {
8555 acc += XXH3_mix16B(input+(16*i), secret+(16*i), seed);
8557 acc = XXH3_avalanche(acc);
8559 #if defined(__clang__) \
8560 && (defined(__ARM_NEON) || defined(__ARM_NEON__)) \
8561 && !defined(XXH_ENABLE_AUTOVECTORIZE)
8582 #pragma clang loop vectorize(disable)
8584 for (i=8 ; i < nbRounds; i++) {
8585 acc += XXH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed);
8588 acc += XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed);
8589 return XXH3_avalanche(acc);
8596 #define XXH_STRIPE_LEN 64
8597 #define XXH_SECRET_CONSUME_RATE 8
8598 #define XXH_ACC_NB (XXH_STRIPE_LEN / sizeof(xxh_u64))
8600 #ifdef XXH_OLD_NAMES
8601 # define STRIPE_LEN XXH_STRIPE_LEN
8602 # define ACC_NB XXH_ACC_NB
8616 #if !defined (__VMS) \
8617 && (defined (__cplusplus) \
8618 || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) ) )
8619 typedef int64_t xxh_i64;
8622 typedef long long xxh_i64;
8649 #if (XXH_VECTOR == XXH_AVX512) \
8650 || (defined(XXH_DISPATCH_AVX512) && XXH_DISPATCH_AVX512 != 0)
8652 #ifndef XXH_TARGET_AVX512
8653 # define XXH_TARGET_AVX512
8657 XXH3_accumulate_512_avx512(
void* XXH_RESTRICT acc,
8658 const void* XXH_RESTRICT input,
8659 const void* XXH_RESTRICT secret)
8661 __m512i*
const xacc = (__m512i *) acc;
8667 __m512i
const data_vec = _mm512_loadu_si512 (input);
8669 __m512i
const key_vec = _mm512_loadu_si512 (secret);
8671 __m512i
const data_key = _mm512_xor_si512 (data_vec, key_vec);
8673 __m512i
const data_key_lo = _mm512_shuffle_epi32 (data_key, (_MM_PERM_ENUM)_MM_SHUFFLE(0, 3, 0, 1));
8675 __m512i
const product = _mm512_mul_epu32 (data_key, data_key_lo);
8677 __m512i
const data_swap = _mm512_shuffle_epi32(data_vec, (_MM_PERM_ENUM)_MM_SHUFFLE(1, 0, 3, 2));
8678 __m512i
const sum = _mm512_add_epi64(*xacc, data_swap);
8680 *xacc = _mm512_add_epi64(product, sum);
8706 XXH3_scrambleAcc_avx512(
void* XXH_RESTRICT acc,
const void* XXH_RESTRICT secret)
8710 { __m512i*
const xacc = (__m512i*) acc;
8711 const __m512i prime32 = _mm512_set1_epi32((
int)
XXH_PRIME32_1);
8714 __m512i
const acc_vec = *xacc;
8715 __m512i
const shifted = _mm512_srli_epi64 (acc_vec, 47);
8716 __m512i
const data_vec = _mm512_xor_si512 (acc_vec, shifted);
8718 __m512i
const key_vec = _mm512_loadu_si512 (secret);
8719 __m512i
const data_key = _mm512_xor_si512 (data_vec, key_vec);
8722 __m512i
const data_key_hi = _mm512_shuffle_epi32 (data_key, (_MM_PERM_ENUM)_MM_SHUFFLE(0, 3, 0, 1));
8723 __m512i
const prod_lo = _mm512_mul_epu32 (data_key, prime32);
8724 __m512i
const prod_hi = _mm512_mul_epu32 (data_key_hi, prime32);
8725 *xacc = _mm512_add_epi64(prod_lo, _mm512_slli_epi64(prod_hi, 32));
8730 XXH3_initCustomSecret_avx512(
void* XXH_RESTRICT customSecret,
xxh_u64 seed64)
8734 XXH_ASSERT(((
size_t)customSecret & 63) == 0);
8735 (
void)(&XXH_writeLE64);
8736 {
int const nbRounds = XXH_SECRET_DEFAULT_SIZE /
sizeof(__m512i);
8737 __m512i
const seed = _mm512_mask_set1_epi64(_mm512_set1_epi64((xxh_i64)seed64), 0xAA, (xxh_i64)(0U - seed64));
8739 const __m512i*
const src = (
const __m512i*) ((
const void*) XXH3_kSecret);
8740 __m512i*
const dest = ( __m512i*) customSecret;
8744 for (i=0; i < nbRounds; ++i) {
8750 } remote_const_void;
8751 remote_const_void.cp = src + i;
8752 dest[i] = _mm512_add_epi64(_mm512_stream_load_si512(remote_const_void.p), seed);
8758 #if (XXH_VECTOR == XXH_AVX2) \
8759 || (defined(XXH_DISPATCH_AVX2) && XXH_DISPATCH_AVX2 != 0)
8761 #ifndef XXH_TARGET_AVX2
8762 # define XXH_TARGET_AVX2
8766 XXH3_accumulate_512_avx2(
void* XXH_RESTRICT acc,
8767 const void* XXH_RESTRICT input,
8768 const void* XXH_RESTRICT secret)
8771 { __m256i*
const xacc = (__m256i *) acc;
8774 const __m256i*
const xinput = (
const __m256i *) input;
8777 const __m256i*
const xsecret = (
const __m256i *) secret;
8780 for (i=0; i < XXH_STRIPE_LEN/
sizeof(__m256i); i++) {
8782 __m256i
const data_vec = _mm256_loadu_si256 (xinput+i);
8784 __m256i
const key_vec = _mm256_loadu_si256 (xsecret+i);
8786 __m256i
const data_key = _mm256_xor_si256 (data_vec, key_vec);
8788 __m256i
const data_key_lo = _mm256_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
8790 __m256i
const product = _mm256_mul_epu32 (data_key, data_key_lo);
8792 __m256i
const data_swap = _mm256_shuffle_epi32(data_vec, _MM_SHUFFLE(1, 0, 3, 2));
8793 __m256i
const sum = _mm256_add_epi64(xacc[i], data_swap);
8795 xacc[i] = _mm256_add_epi64(product, sum);
8800 XXH3_scrambleAcc_avx2(
void* XXH_RESTRICT acc,
const void* XXH_RESTRICT secret)
8803 { __m256i*
const xacc = (__m256i*) acc;
8806 const __m256i*
const xsecret = (
const __m256i *) secret;
8807 const __m256i prime32 = _mm256_set1_epi32((
int)
XXH_PRIME32_1);
8810 for (i=0; i < XXH_STRIPE_LEN/
sizeof(__m256i); i++) {
8812 __m256i
const acc_vec = xacc[i];
8813 __m256i
const shifted = _mm256_srli_epi64 (acc_vec, 47);
8814 __m256i
const data_vec = _mm256_xor_si256 (acc_vec, shifted);
8816 __m256i
const key_vec = _mm256_loadu_si256 (xsecret+i);
8817 __m256i
const data_key = _mm256_xor_si256 (data_vec, key_vec);
8820 __m256i
const data_key_hi = _mm256_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
8821 __m256i
const prod_lo = _mm256_mul_epu32 (data_key, prime32);
8822 __m256i
const prod_hi = _mm256_mul_epu32 (data_key_hi, prime32);
8823 xacc[i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32));
8828 XXH_FORCE_INLINE XXH_TARGET_AVX2
void XXH3_initCustomSecret_avx2(
void* XXH_RESTRICT customSecret,
xxh_u64 seed64)
8833 (
void)(&XXH_writeLE64);
8834 XXH_PREFETCH(customSecret);
8835 { __m256i
const seed = _mm256_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64, (xxh_i64)(0U - seed64), (xxh_i64)seed64);
8837 const __m256i*
const src = (
const __m256i*) ((
const void*) XXH3_kSecret);
8838 __m256i*
dest = ( __m256i*) customSecret;
8840 # if defined(__GNUC__) || defined(__clang__)
8852 dest[0] = _mm256_add_epi64(_mm256_stream_load_si256(src+0), seed);
8853 dest[1] = _mm256_add_epi64(_mm256_stream_load_si256(src+1), seed);
8854 dest[2] = _mm256_add_epi64(_mm256_stream_load_si256(src+2), seed);
8855 dest[3] = _mm256_add_epi64(_mm256_stream_load_si256(src+3), seed);
8856 dest[4] = _mm256_add_epi64(_mm256_stream_load_si256(src+4), seed);
8857 dest[5] = _mm256_add_epi64(_mm256_stream_load_si256(src+5), seed);
8864 #if (XXH_VECTOR == XXH_SSE2) || defined(XXH_X86DISPATCH)
8866 #ifndef XXH_TARGET_SSE2
8867 # define XXH_TARGET_SSE2
8871 XXH3_accumulate_512_sse2(
void* XXH_RESTRICT acc,
8872 const void* XXH_RESTRICT input,
8873 const void* XXH_RESTRICT secret)
8877 { __m128i*
const xacc = (__m128i *) acc;
8880 const __m128i*
const xinput = (
const __m128i *) input;
8883 const __m128i*
const xsecret = (
const __m128i *) secret;
8886 for (i=0; i < XXH_STRIPE_LEN/
sizeof(__m128i); i++) {
8888 __m128i
const data_vec = _mm_loadu_si128 (xinput+i);
8890 __m128i
const key_vec = _mm_loadu_si128 (xsecret+i);
8892 __m128i
const data_key = _mm_xor_si128 (data_vec, key_vec);
8894 __m128i
const data_key_lo = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
8896 __m128i
const product = _mm_mul_epu32 (data_key, data_key_lo);
8898 __m128i
const data_swap = _mm_shuffle_epi32(data_vec, _MM_SHUFFLE(1,0,3,2));
8899 __m128i
const sum = _mm_add_epi64(xacc[i], data_swap);
8901 xacc[i] = _mm_add_epi64(product, sum);
8906 XXH3_scrambleAcc_sse2(
void* XXH_RESTRICT acc,
const void* XXH_RESTRICT secret)
8909 { __m128i*
const xacc = (__m128i*) acc;
8912 const __m128i*
const xsecret = (
const __m128i *) secret;
8916 for (i=0; i < XXH_STRIPE_LEN/
sizeof(__m128i); i++) {
8918 __m128i
const acc_vec = xacc[i];
8919 __m128i
const shifted = _mm_srli_epi64 (acc_vec, 47);
8920 __m128i
const data_vec = _mm_xor_si128 (acc_vec, shifted);
8922 __m128i
const key_vec = _mm_loadu_si128 (xsecret+i);
8923 __m128i
const data_key = _mm_xor_si128 (data_vec, key_vec);
8926 __m128i
const data_key_hi = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
8927 __m128i
const prod_lo = _mm_mul_epu32 (data_key, prime32);
8928 __m128i
const prod_hi = _mm_mul_epu32 (data_key_hi, prime32);
8929 xacc[i] = _mm_add_epi64(prod_lo, _mm_slli_epi64(prod_hi, 32));
8934 XXH_FORCE_INLINE XXH_TARGET_SSE2
void XXH3_initCustomSecret_sse2(
void* XXH_RESTRICT customSecret,
xxh_u64 seed64)
8937 (
void)(&XXH_writeLE64);
8938 {
int const nbRounds = XXH_SECRET_DEFAULT_SIZE /
sizeof(__m128i);
8940 # if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER < 1900
8942 XXH_ALIGN(16) const xxh_i64 seed64x2[2] = { (xxh_i64)seed64, (xxh_i64)(0U - seed64) };
8943 __m128i
const seed = _mm_load_si128((__m128i
const*)seed64x2);
8945 __m128i
const seed = _mm_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64);
8949 const void*
const src16 = XXH3_kSecret;
8950 __m128i* dst16 = (__m128i*) customSecret;
8951 # if defined(__GNUC__) || defined(__clang__)
8962 for (i=0; i < nbRounds; ++i) {
8963 dst16[i] = _mm_add_epi64(_mm_load_si128((
const __m128i *)src16+i), seed);
8969 #if (XXH_VECTOR == XXH_NEON)
8973 XXH3_scalarRound(
void* XXH_RESTRICT acc,
void const* XXH_RESTRICT input,
8974 void const* XXH_RESTRICT secret,
size_t lane);
8977 XXH3_scalarScrambleRound(
void* XXH_RESTRICT acc,
8978 void const* XXH_RESTRICT secret,
size_t lane);
8991 XXH3_accumulate_512_neon(
void* XXH_RESTRICT acc,
8992 const void* XXH_RESTRICT input,
8993 const void* XXH_RESTRICT secret)
8996 XXH_STATIC_ASSERT(XXH3_NEON_LANES > 0 && XXH3_NEON_LANES <= XXH_ACC_NB && XXH3_NEON_LANES % 2 == 0);
8998 uint64x2_t*
const xacc = (uint64x2_t *) acc;
9000 uint8_t
const*
const xinput = (
const uint8_t *) input;
9001 uint8_t
const*
const xsecret = (
const uint8_t *) secret;
9005 for (i=0; i < XXH3_NEON_LANES / 2; i++) {
9007 uint8x16_t data_vec = vld1q_u8(xinput + (i * 16));
9009 uint8x16_t key_vec = vld1q_u8(xsecret + (i * 16));
9010 uint64x2_t data_key;
9011 uint32x2_t data_key_lo, data_key_hi;
9013 uint64x2_t
const data64 = vreinterpretq_u64_u8(data_vec);
9014 uint64x2_t
const swapped = vextq_u64(data64, data64, 1);
9015 xacc[i] = vaddq_u64 (xacc[i], swapped);
9017 data_key = vreinterpretq_u64_u8(veorq_u8(data_vec, key_vec));
9021 XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi);
9023 xacc[i] = vmlal_u32 (xacc[i], data_key_lo, data_key_hi);
9027 for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
9028 XXH3_scalarRound(acc, input, secret, i);
9034 XXH3_scrambleAcc_neon(
void* XXH_RESTRICT acc,
const void* XXH_RESTRICT secret)
9038 { uint64x2_t* xacc = (uint64x2_t*) acc;
9039 uint8_t
const* xsecret = (uint8_t
const*) secret;
9044 for (i=0; i < XXH3_NEON_LANES / 2; i++) {
9046 uint64x2_t acc_vec = xacc[i];
9047 uint64x2_t shifted = vshrq_n_u64 (acc_vec, 47);
9048 uint64x2_t data_vec = veorq_u64 (acc_vec, shifted);
9051 uint8x16_t key_vec = vld1q_u8 (xsecret + (i * 16));
9052 uint64x2_t data_key = veorq_u64 (data_vec, vreinterpretq_u64_u8(key_vec));
9055 uint32x2_t data_key_lo, data_key_hi;
9059 XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi);
9078 uint64x2_t prod_hi = vmull_u32 (data_key_hi, prime);
9080 xacc[i] = vshlq_n_u64(prod_hi, 32);
9082 xacc[i] = vmlal_u32(xacc[i], data_key_lo, prime);
9086 for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
9087 XXH3_scalarScrambleRound(acc, secret, i);
9094 #if (XXH_VECTOR == XXH_VSX)
9097 XXH3_accumulate_512_vsx(
void* XXH_RESTRICT acc,
9098 const void* XXH_RESTRICT input,
9099 const void* XXH_RESTRICT secret)
9102 unsigned int*
const xacc = (
unsigned int*) acc;
9103 xxh_u64x2
const*
const xinput = (xxh_u64x2
const*) input;
9104 xxh_u64x2
const*
const xsecret = (xxh_u64x2
const*) secret;
9105 xxh_u64x2
const v32 = { 32, 32 };
9107 for (i = 0; i < XXH_STRIPE_LEN /
sizeof(xxh_u64x2); i++) {
9109 xxh_u64x2
const data_vec = XXH_vec_loadu(xinput + i);
9111 xxh_u64x2
const key_vec = XXH_vec_loadu(xsecret + i);
9112 xxh_u64x2
const data_key = data_vec ^ key_vec;
9114 xxh_u32x4
const shuffled = (xxh_u32x4)vec_rl(data_key, v32);
9116 xxh_u64x2
const product = XXH_vec_mulo((xxh_u32x4)data_key, shuffled);
9118 xxh_u64x2 acc_vec = (xxh_u64x2)vec_xl(0, xacc + 4 * i);
9123 acc_vec += vec_permi(data_vec, data_vec, 2);
9125 acc_vec += vec_xxpermdi(data_vec, data_vec, 2);
9128 vec_xst((xxh_u32x4)acc_vec, 0, xacc + 4 * i);
9133 XXH3_scrambleAcc_vsx(
void* XXH_RESTRICT acc,
const void* XXH_RESTRICT secret)
9137 { xxh_u64x2*
const xacc = (xxh_u64x2*) acc;
9138 const xxh_u64x2*
const xsecret = (
const xxh_u64x2*) secret;
9140 xxh_u64x2
const v32 = { 32, 32 };
9141 xxh_u64x2
const v47 = { 47, 47 };
9144 for (i = 0; i < XXH_STRIPE_LEN /
sizeof(xxh_u64x2); i++) {
9146 xxh_u64x2
const acc_vec = xacc[i];
9147 xxh_u64x2
const data_vec = acc_vec ^ (acc_vec >> v47);
9150 xxh_u64x2
const key_vec = XXH_vec_loadu(xsecret + i);
9151 xxh_u64x2
const data_key = data_vec ^ key_vec;
9155 xxh_u64x2
const prod_even = XXH_vec_mule((xxh_u32x4)data_key, prime);
9157 xxh_u64x2
const prod_odd = XXH_vec_mulo((xxh_u32x4)data_key, prime);
9158 xacc[i] = prod_odd + (prod_even << v32);
9174 XXH3_scalarRound(
void* XXH_RESTRICT acc,
9175 void const* XXH_RESTRICT input,
9176 void const* XXH_RESTRICT secret,
9183 XXH_ASSERT(((
size_t)acc & (XXH_ACC_ALIGN-1)) == 0);
9187 xacc[lane ^ 1] += data_val;
9188 xacc[lane] += XXH_mult32to64(data_key & 0xFFFFFFFF, data_key >> 32);
9197 XXH3_accumulate_512_scalar(
void* XXH_RESTRICT acc,
9198 const void* XXH_RESTRICT input,
9199 const void* XXH_RESTRICT secret)
9202 for (i=0; i < XXH_ACC_NB; i++) {
9203 XXH3_scalarRound(acc, input, secret, i);
9215 XXH3_scalarScrambleRound(
void* XXH_RESTRICT acc,
9216 void const* XXH_RESTRICT secret,
9221 XXH_ASSERT((((
size_t)acc) & (XXH_ACC_ALIGN-1)) == 0);
9226 acc64 = XXH_xorshift64(acc64, 47);
9238 XXH3_scrambleAcc_scalar(
void* XXH_RESTRICT acc,
const void* XXH_RESTRICT secret)
9241 for (i=0; i < XXH_ACC_NB; i++) {
9242 XXH3_scalarScrambleRound(acc, secret, i);
9247 XXH3_initCustomSecret_scalar(
void* XXH_RESTRICT customSecret,
xxh_u64 seed64)
9254 const xxh_u8* kSecretPtr = XXH3_kSecret;
9257 #if defined(__clang__) && defined(__aarch64__)
9298 {
int const nbRounds = XXH_SECRET_DEFAULT_SIZE / 16;
9300 for (i=0; i < nbRounds; i++) {
9309 XXH_writeLE64((
xxh_u8*)customSecret + 16*i, lo);
9310 XXH_writeLE64((
xxh_u8*)customSecret + 16*i + 8, hi);
9315 typedef void (*XXH3_f_accumulate_512)(
void* XXH_RESTRICT,
const void*,
const void*);
9316 typedef void (*XXH3_f_scrambleAcc)(
void* XXH_RESTRICT,
const void*);
9317 typedef void (*XXH3_f_initCustomSecret)(
void* XXH_RESTRICT,
xxh_u64);
9320 #if (XXH_VECTOR == XXH_AVX512)
9322 #define XXH3_accumulate_512 XXH3_accumulate_512_avx512
9323 #define XXH3_scrambleAcc XXH3_scrambleAcc_avx512
9324 #define XXH3_initCustomSecret XXH3_initCustomSecret_avx512
9326 #elif (XXH_VECTOR == XXH_AVX2)
9328 #define XXH3_accumulate_512 XXH3_accumulate_512_avx2
9329 #define XXH3_scrambleAcc XXH3_scrambleAcc_avx2
9330 #define XXH3_initCustomSecret XXH3_initCustomSecret_avx2
9332 #elif (XXH_VECTOR == XXH_SSE2)
9334 #define XXH3_accumulate_512 XXH3_accumulate_512_sse2
9335 #define XXH3_scrambleAcc XXH3_scrambleAcc_sse2
9336 #define XXH3_initCustomSecret XXH3_initCustomSecret_sse2
9338 #elif (XXH_VECTOR == XXH_NEON)
9340 #define XXH3_accumulate_512 XXH3_accumulate_512_neon
9341 #define XXH3_scrambleAcc XXH3_scrambleAcc_neon
9342 #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
9344 #elif (XXH_VECTOR == XXH_VSX)
9346 #define XXH3_accumulate_512 XXH3_accumulate_512_vsx
9347 #define XXH3_scrambleAcc XXH3_scrambleAcc_vsx
9348 #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
9352 #define XXH3_accumulate_512 XXH3_accumulate_512_scalar
9353 #define XXH3_scrambleAcc XXH3_scrambleAcc_scalar
9354 #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
9360 #ifndef XXH_PREFETCH_DIST
9362 # define XXH_PREFETCH_DIST 320
9364 # if (XXH_VECTOR == XXH_AVX512)
9365 # define XXH_PREFETCH_DIST 512
9367 # define XXH_PREFETCH_DIST 384
9378 XXH3_accumulate(
xxh_u64* XXH_RESTRICT acc,
9379 const xxh_u8* XXH_RESTRICT input,
9380 const xxh_u8* XXH_RESTRICT secret,
9382 XXH3_f_accumulate_512 f_acc512)
9385 for (n = 0; n < nbStripes; n++ ) {
9386 const xxh_u8*
const in = input + n*XXH_STRIPE_LEN;
9387 XXH_PREFETCH(
in + XXH_PREFETCH_DIST);
9390 secret + n*XXH_SECRET_CONSUME_RATE);
9395 XXH3_hashLong_internal_loop(
xxh_u64* XXH_RESTRICT acc,
9396 const xxh_u8* XXH_RESTRICT input,
size_t len,
9397 const xxh_u8* XXH_RESTRICT secret,
size_t secretSize,
9398 XXH3_f_accumulate_512 f_acc512,
9399 XXH3_f_scrambleAcc f_scramble)
9401 size_t const nbStripesPerBlock = (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE;
9402 size_t const block_len = XXH_STRIPE_LEN * nbStripesPerBlock;
9403 size_t const nb_blocks = (len - 1) / block_len;
9407 XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
9409 for (n = 0; n < nb_blocks; n++) {
9410 XXH3_accumulate(acc, input + n*block_len, secret, nbStripesPerBlock, f_acc512);
9411 f_scramble(acc, secret + secretSize - XXH_STRIPE_LEN);
9416 {
size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / XXH_STRIPE_LEN;
9417 XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE));
9418 XXH3_accumulate(acc, input + nb_blocks*block_len, secret, nbStripes, f_acc512);
9421 {
const xxh_u8*
const p = input + len - XXH_STRIPE_LEN;
9422 #define XXH_SECRET_LASTACC_START 7
9423 f_acc512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START);
9428 XXH3_mix2Accs(
const xxh_u64* XXH_RESTRICT acc,
const xxh_u8* XXH_RESTRICT secret)
9430 return XXH3_mul128_fold64(
9436 XXH3_mergeAccs(
const xxh_u64* XXH_RESTRICT acc,
const xxh_u8* XXH_RESTRICT secret,
xxh_u64 start)
9441 for (i = 0; i < 4; i++) {
9442 result64 += XXH3_mix2Accs(acc+2*i, secret + 16*i);
9443 #if defined(__clang__) \
9444 && (defined(__arm__) || defined(__thumb__)) \
9445 && (defined(__ARM_NEON) || defined(__ARM_NEON__)) \
9446 && !defined(XXH_ENABLE_AUTOVECTORIZE)
9459 return XXH3_avalanche(result64);
9462 #define XXH3_INIT_ACC { XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3, \
9463 XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1 }
9466 XXH3_hashLong_64b_internal(
const void* XXH_RESTRICT input,
size_t len,
9467 const void* XXH_RESTRICT secret,
size_t secretSize,
9468 XXH3_f_accumulate_512 f_acc512,
9469 XXH3_f_scrambleAcc f_scramble)
9471 XXH_ALIGN(XXH_ACC_ALIGN)
xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
9473 XXH3_hashLong_internal_loop(acc, (
const xxh_u8*)input, len, (
const xxh_u8*)secret, secretSize, f_acc512, f_scramble);
9478 #define XXH_SECRET_MERGEACCS_START 11
9479 XXH_ASSERT(secretSize >=
sizeof(acc) + XXH_SECRET_MERGEACCS_START);
9489 XXH3_hashLong_64b_withSecret(
const void* XXH_RESTRICT input,
size_t len,
9493 return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate_512, XXH3_scrambleAcc);
9503 XXH3_hashLong_64b_default(
const void* XXH_RESTRICT input,
size_t len,
9507 return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret,
sizeof(XXH3_kSecret), XXH3_accumulate_512, XXH3_scrambleAcc);
9522 XXH3_hashLong_64b_withSeed_internal(
const void* input,
size_t len,
9524 XXH3_f_accumulate_512 f_acc512,
9525 XXH3_f_scrambleAcc f_scramble,
9526 XXH3_f_initCustomSecret f_initSec)
9529 return XXH3_hashLong_64b_internal(input, len,
9530 XXH3_kSecret,
sizeof(XXH3_kSecret),
9531 f_acc512, f_scramble);
9532 { XXH_ALIGN(XXH_SEC_ALIGN)
xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
9533 f_initSec(secret, seed);
9534 return XXH3_hashLong_64b_internal(input, len, secret,
sizeof(secret),
9535 f_acc512, f_scramble);
9543 XXH3_hashLong_64b_withSeed(
const void* input,
size_t len,
9547 return XXH3_hashLong_64b_withSeed_internal(input, len, seed,
9548 XXH3_accumulate_512, XXH3_scrambleAcc, XXH3_initCustomSecret);
9556 XXH3_64bits_internal(
const void* XXH_RESTRICT input,
size_t len,
9557 XXH64_hash_t seed64,
const void* XXH_RESTRICT secret,
size_t secretLen,
9558 XXH3_hashLong64_f f_hashLong)
9560 XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN);
9569 return XXH3_len_0to16_64b((
const xxh_u8*)input, len, (
const xxh_u8*)secret, seed64);
9571 return XXH3_len_17to128_64b((
const xxh_u8*)input, len, (
const xxh_u8*)secret, secretLen, seed64);
9572 if (len <= XXH3_MIDSIZE_MAX)
9573 return XXH3_len_129to240_64b((
const xxh_u8*)input, len, (
const xxh_u8*)secret, secretLen, seed64);
9574 return f_hashLong(input, len, seed64, (
const xxh_u8*)secret, secretLen);
9583 return XXH3_64bits_internal(input, len, 0, XXH3_kSecret,
sizeof(XXH3_kSecret), XXH3_hashLong_64b_default);
9590 return XXH3_64bits_internal(input, len, 0, secret, secretSize, XXH3_hashLong_64b_withSecret);
9597 return XXH3_64bits_internal(input, len, seed, XXH3_kSecret,
sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed);
9603 if (len <= XXH3_MIDSIZE_MAX)
9604 return XXH3_64bits_internal(input, len, seed, XXH3_kSecret,
sizeof(XXH3_kSecret), NULL);
9605 return XXH3_hashLong_64b_withSecret(input, len, seed, (
const xxh_u8*)secret, secretSize);
9634 static void* XXH_alignedMalloc(
size_t s,
size_t align)
9665 static void XXH_alignedFree(
void* p)
9680 if (
state==NULL)
return NULL;
9681 XXH3_INITSTATE(
state);
9688 XXH_alignedFree(statePtr);
9696 XXH_memcpy(dst_state, src_state,
sizeof(*dst_state));
9702 const void* secret,
size_t secretSize)
9704 size_t const initStart = offsetof(
XXH3_state_t, bufferedSize);
9705 size_t const initLength = offsetof(
XXH3_state_t, nbStripesPerBlock) - initStart;
9709 memset((
char*)statePtr + initStart, 0, initLength);
9718 statePtr->seed = seed;
9719 statePtr->useSeed = (seed != 0);
9720 statePtr->extSecret = (
const unsigned char*)secret;
9721 XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
9722 statePtr->secretLimit = secretSize - XXH_STRIPE_LEN;
9723 statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE;
9731 XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
9740 XXH3_reset_internal(statePtr, 0, secret, secretSize);
9742 if (secretSize < XXH3_SECRET_SIZE_MIN)
return XXH_ERROR;
9752 if ((seed != statePtr->seed) || (statePtr->extSecret != NULL))
9753 XXH3_initCustomSecret(statePtr->customSecret, seed);
9754 XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE);
9764 if (secretSize < XXH3_SECRET_SIZE_MIN)
return XXH_ERROR;
9765 XXH3_reset_internal(statePtr, seed64, secret, secretSize);
9766 statePtr->useSeed = 1;
9774 XXH3_consumeStripes(
xxh_u64* XXH_RESTRICT acc,
9775 size_t* XXH_RESTRICT nbStripesSoFarPtr,
size_t nbStripesPerBlock,
9776 const xxh_u8* XXH_RESTRICT input,
size_t nbStripes,
9777 const xxh_u8* XXH_RESTRICT secret,
size_t secretLimit,
9778 XXH3_f_accumulate_512 f_acc512,
9779 XXH3_f_scrambleAcc f_scramble)
9782 XXH_ASSERT(*nbStripesSoFarPtr < nbStripesPerBlock);
9783 if (nbStripesPerBlock - *nbStripesSoFarPtr <= nbStripes) {
9785 size_t const nbStripesToEndofBlock = nbStripesPerBlock - *nbStripesSoFarPtr;
9786 size_t const nbStripesAfterBlock = nbStripes - nbStripesToEndofBlock;
9787 XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripesToEndofBlock, f_acc512);
9788 f_scramble(acc, secret + secretLimit);
9789 XXH3_accumulate(acc, input + nbStripesToEndofBlock * XXH_STRIPE_LEN, secret, nbStripesAfterBlock, f_acc512);
9790 *nbStripesSoFarPtr = nbStripesAfterBlock;
9792 XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripes, f_acc512);
9793 *nbStripesSoFarPtr += nbStripes;
9797 #ifndef XXH3_STREAM_USE_STACK
9799 # define XXH3_STREAM_USE_STACK 1
9807 const xxh_u8* XXH_RESTRICT input,
size_t len,
9808 XXH3_f_accumulate_512 f_acc512,
9809 XXH3_f_scrambleAcc f_scramble)
9817 {
const xxh_u8*
const bEnd = input + len;
9818 const unsigned char*
const secret = (
state->extSecret == NULL) ?
state->customSecret :
state->extSecret;
9819 #
if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
9824 XXH_ALIGN(XXH_ACC_ALIGN)
xxh_u64 acc[8]; memcpy(acc,
state->acc,
sizeof(acc));
9828 state->totalLen += len;
9832 if (
state->bufferedSize + len <= XXH3_INTERNALBUFFER_SIZE) {
9839 #define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN)
9846 if (
state->bufferedSize) {
9847 size_t const loadSize = XXH3_INTERNALBUFFER_SIZE -
state->bufferedSize;
9850 XXH3_consumeStripes(acc,
9851 &
state->nbStripesSoFar,
state->nbStripesPerBlock,
9852 state->buffer, XXH3_INTERNALBUFFER_STRIPES,
9853 secret,
state->secretLimit,
9854 f_acc512, f_scramble);
9855 state->bufferedSize = 0;
9860 if ((
size_t)(bEnd - input) >
state->nbStripesPerBlock * XXH_STRIPE_LEN) {
9861 size_t nbStripes = (
size_t)(bEnd - 1 - input) / XXH_STRIPE_LEN;
9864 {
size_t const nbStripesToEnd =
state->nbStripesPerBlock -
state->nbStripesSoFar;
9866 XXH3_accumulate(acc, input, secret +
state->nbStripesSoFar * XXH_SECRET_CONSUME_RATE, nbStripesToEnd, f_acc512);
9867 f_scramble(acc, secret +
state->secretLimit);
9868 state->nbStripesSoFar = 0;
9869 input += nbStripesToEnd * XXH_STRIPE_LEN;
9870 nbStripes -= nbStripesToEnd;
9873 while(nbStripes >=
state->nbStripesPerBlock) {
9874 XXH3_accumulate(acc, input, secret,
state->nbStripesPerBlock, f_acc512);
9875 f_scramble(acc, secret +
state->secretLimit);
9876 input +=
state->nbStripesPerBlock * XXH_STRIPE_LEN;
9877 nbStripes -=
state->nbStripesPerBlock;
9880 XXH3_accumulate(acc, input, secret, nbStripes, f_acc512);
9881 input += nbStripes * XXH_STRIPE_LEN;
9883 state->nbStripesSoFar = nbStripes;
9885 XXH_memcpy(
state->buffer +
sizeof(
state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
9890 if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {
9891 const xxh_u8*
const limit = bEnd - XXH3_INTERNALBUFFER_SIZE;
9893 XXH3_consumeStripes(acc,
9894 &
state->nbStripesSoFar,
state->nbStripesPerBlock,
9895 input, XXH3_INTERNALBUFFER_STRIPES,
9896 secret,
state->secretLimit,
9897 f_acc512, f_scramble);
9898 input += XXH3_INTERNALBUFFER_SIZE;
9899 }
while (input<limit);
9901 XXH_memcpy(
state->buffer +
sizeof(
state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
9907 XXH_ASSERT(bEnd - input <= XXH3_INTERNALBUFFER_SIZE);
9911 #if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
9913 memcpy(
state->acc, acc,
sizeof(acc));
9924 return XXH3_update(
state, (
const xxh_u8*)input, len,
9925 XXH3_accumulate_512, XXH3_scrambleAcc);
9932 const unsigned char* secret)
9939 if (
state->bufferedSize >= XXH_STRIPE_LEN) {
9940 size_t const nbStripes = (
state->bufferedSize - 1) / XXH_STRIPE_LEN;
9941 size_t nbStripesSoFar =
state->nbStripesSoFar;
9942 XXH3_consumeStripes(acc,
9943 &nbStripesSoFar,
state->nbStripesPerBlock,
9944 state->buffer, nbStripes,
9945 secret,
state->secretLimit,
9946 XXH3_accumulate_512, XXH3_scrambleAcc);
9948 XXH3_accumulate_512(acc,
9949 state->buffer +
state->bufferedSize - XXH_STRIPE_LEN,
9950 secret +
state->secretLimit - XXH_SECRET_LASTACC_START);
9952 xxh_u8 lastStripe[XXH_STRIPE_LEN];
9953 size_t const catchupSize = XXH_STRIPE_LEN -
state->bufferedSize;
9957 XXH3_accumulate_512(acc,
9959 secret +
state->secretLimit - XXH_SECRET_LASTACC_START);
9966 const unsigned char*
const secret = (
state->extSecret == NULL) ?
state->customSecret :
state->extSecret;
9967 if (
state->totalLen > XXH3_MIDSIZE_MAX) {
9969 XXH3_digest_long(acc,
state, secret);
9970 return XXH3_mergeAccs(acc,
9971 secret + XXH_SECRET_MERGEACCS_START,
9978 secret,
state->secretLimit + XXH_STRIPE_LEN);
10012 {
xxh_u8 const c1 = input[0];
10013 xxh_u8 const c2 = input[len >> 1];
10014 xxh_u8 const c3 = input[len - 1];
10040 xxh_u64 const keyed = input_64 ^ bitflip;
10045 m128.high64 += (m128.low64 << 1);
10046 m128.low64 ^= (m128.high64 >> 3);
10048 m128.low64 = XXH_xorshift64(m128.low64, 35);
10049 m128.low64 *= 0x9FB21C651E98DF25ULL;
10050 m128.low64 = XXH_xorshift64(m128.low64, 28);
10051 m128.high64 = XXH3_avalanche(m128.high64);
10071 m128.low64 += (
xxh_u64)(len - 1) << 54;
10072 input_hi ^= bitfliph;
10080 if (
sizeof(
void *) <
sizeof(
xxh_u64)) {
10087 m128.high64 += (input_hi & 0xFFFFFFFF00000000ULL) + XXH_mult32to64((
xxh_u32)input_hi,
XXH_PRIME32_2);
10122 h128.low64 = XXH3_avalanche(h128.low64);
10123 h128.high64 = XXH3_avalanche(h128.high64);
10135 {
if (len > 8)
return XXH3_len_9to16_128b(input, len, secret, seed);
10136 if (len >= 4)
return XXH3_len_4to8_128b(input, len, secret, seed);
10137 if (len)
return XXH3_len_1to3_128b(input, len, secret, seed);
10154 acc.low64 += XXH3_mix16B (input_1, secret+0, seed);
10156 acc.high64 += XXH3_mix16B (input_2, secret+16, seed);
10163 XXH3_len_17to128_128b(
const xxh_u8* XXH_RESTRICT input,
size_t len,
10164 const xxh_u8* XXH_RESTRICT secret,
size_t secretSize,
10167 XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (
void)secretSize;
10176 acc = XXH128_mix32B(acc, input+48, input+len-64, secret+96, seed);
10178 acc = XXH128_mix32B(acc, input+32, input+len-48, secret+64, seed);
10180 acc = XXH128_mix32B(acc, input+16, input+len-32, secret+32, seed);
10182 acc = XXH128_mix32B(acc, input, input+len-16, secret, seed);
10184 h128.low64 = acc.low64 + acc.high64;
10188 h128.low64 = XXH3_avalanche(h128.low64);
10189 h128.high64 = (
XXH64_hash_t)0 - XXH3_avalanche(h128.high64);
10196 XXH3_len_129to240_128b(
const xxh_u8* XXH_RESTRICT input,
size_t len,
10197 const xxh_u8* XXH_RESTRICT secret,
size_t secretSize,
10200 XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (
void)secretSize;
10201 XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
10204 int const nbRounds = (
int)len / 32;
10208 for (i=0; i<4; i++) {
10209 acc = XXH128_mix32B(acc,
10211 input + (32 * i) + 16,
10215 acc.low64 = XXH3_avalanche(acc.low64);
10216 acc.high64 = XXH3_avalanche(acc.high64);
10218 for (i=4 ; i < nbRounds; i++) {
10219 acc = XXH128_mix32B(acc,
10221 input + (32 * i) + 16,
10222 secret + XXH3_MIDSIZE_STARTOFFSET + (32 * (i - 4)),
10226 acc = XXH128_mix32B(acc,
10229 secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16,
10233 h128.low64 = acc.low64 + acc.high64;
10237 h128.low64 = XXH3_avalanche(h128.low64);
10238 h128.high64 = (
XXH64_hash_t)0 - XXH3_avalanche(h128.high64);
10245 XXH3_hashLong_128b_internal(
const void* XXH_RESTRICT input,
size_t len,
10246 const xxh_u8* XXH_RESTRICT secret,
size_t secretSize,
10247 XXH3_f_accumulate_512 f_acc512,
10248 XXH3_f_scrambleAcc f_scramble)
10250 XXH_ALIGN(XXH_ACC_ALIGN)
xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
10252 XXH3_hashLong_internal_loop(acc, (
const xxh_u8*)input, len, secret, secretSize, f_acc512, f_scramble);
10256 XXH_ASSERT(secretSize >=
sizeof(acc) + XXH_SECRET_MERGEACCS_START);
10258 h128.low64 = XXH3_mergeAccs(acc,
10259 secret + XXH_SECRET_MERGEACCS_START,
10261 h128.high64 = XXH3_mergeAccs(acc,
10262 secret + secretSize
10263 -
sizeof(acc) - XXH_SECRET_MERGEACCS_START,
10273 XXH3_hashLong_128b_default(
const void* XXH_RESTRICT input,
size_t len,
10275 const void* XXH_RESTRICT secret,
size_t secretLen)
10278 return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret,
sizeof(XXH3_kSecret),
10279 XXH3_accumulate_512, XXH3_scrambleAcc);
10287 XXH3_hashLong_128b_withSecret(
const void* XXH_RESTRICT input,
size_t len,
10289 const void* XXH_RESTRICT secret,
size_t secretLen)
10292 return XXH3_hashLong_128b_internal(input, len, (
const xxh_u8*)secret, secretLen,
10293 XXH3_accumulate_512, XXH3_scrambleAcc);
10297 XXH3_hashLong_128b_withSeed_internal(
const void* XXH_RESTRICT input,
size_t len,
10299 XXH3_f_accumulate_512 f_acc512,
10300 XXH3_f_scrambleAcc f_scramble,
10301 XXH3_f_initCustomSecret f_initSec)
10304 return XXH3_hashLong_128b_internal(input, len,
10305 XXH3_kSecret,
sizeof(XXH3_kSecret),
10306 f_acc512, f_scramble);
10307 { XXH_ALIGN(XXH_SEC_ALIGN)
xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
10308 f_initSec(secret, seed64);
10309 return XXH3_hashLong_128b_internal(input, len, (
const xxh_u8*)secret,
sizeof(secret),
10310 f_acc512, f_scramble);
10318 XXH3_hashLong_128b_withSeed(
const void* input,
size_t len,
10319 XXH64_hash_t seed64,
const void* XXH_RESTRICT secret,
size_t secretLen)
10322 return XXH3_hashLong_128b_withSeed_internal(input, len, seed64,
10323 XXH3_accumulate_512, XXH3_scrambleAcc, XXH3_initCustomSecret);
10330 XXH3_128bits_internal(
const void* input,
size_t len,
10331 XXH64_hash_t seed64,
const void* XXH_RESTRICT secret,
size_t secretLen,
10332 XXH3_hashLong128_f f_hl128)
10334 XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN);
10342 return XXH3_len_0to16_128b((
const xxh_u8*)input, len, (
const xxh_u8*)secret, seed64);
10344 return XXH3_len_17to128_128b((
const xxh_u8*)input, len, (
const xxh_u8*)secret, secretLen, seed64);
10345 if (len <= XXH3_MIDSIZE_MAX)
10346 return XXH3_len_129to240_128b((
const xxh_u8*)input, len, (
const xxh_u8*)secret, secretLen, seed64);
10347 return f_hl128(input, len, seed64, secret, secretLen);
10356 return XXH3_128bits_internal(input, len, 0,
10357 XXH3_kSecret,
sizeof(XXH3_kSecret),
10358 XXH3_hashLong_128b_default);
10365 return XXH3_128bits_internal(input, len, 0,
10366 (
const xxh_u8*)secret, secretSize,
10367 XXH3_hashLong_128b_withSecret);
10374 return XXH3_128bits_internal(input, len, seed,
10375 XXH3_kSecret,
sizeof(XXH3_kSecret),
10376 XXH3_hashLong_128b_withSeed);
10383 if (len <= XXH3_MIDSIZE_MAX)
10384 return XXH3_128bits_internal(input, len, seed, XXH3_kSecret,
sizeof(XXH3_kSecret), NULL);
10385 return XXH3_hashLong_128b_withSecret(input, len, seed, secret, secretSize);
10435 return XXH3_update(
state, (
const xxh_u8*)input, len,
10436 XXH3_accumulate_512, XXH3_scrambleAcc);
10442 const unsigned char*
const secret = (
state->extSecret == NULL) ?
state->customSecret :
state->extSecret;
10443 if (
state->totalLen > XXH3_MIDSIZE_MAX) {
10444 XXH_ALIGN(XXH_ACC_ALIGN)
XXH64_hash_t acc[XXH_ACC_NB];
10445 XXH3_digest_long(acc,
state, secret);
10446 XXH_ASSERT(
state->secretLimit + XXH_STRIPE_LEN >=
sizeof(acc) + XXH_SECRET_MERGEACCS_START);
10448 h128.low64 = XXH3_mergeAccs(acc,
10449 secret + XXH_SECRET_MERGEACCS_START,
10451 h128.high64 = XXH3_mergeAccs(acc,
10452 secret +
state->secretLimit + XXH_STRIPE_LEN
10453 -
sizeof(acc) - XXH_SECRET_MERGEACCS_START,
10462 secret,
state->secretLimit + XXH_STRIPE_LEN);
10467 #include <string.h>
10474 return !(memcmp(&h1, &h2,
sizeof(h1)));
10486 int const hcmp = (h1.high64 > h2.high64) - (h2.high64 > h1.high64);
10488 if (hcmp)
return hcmp;
10489 return (h1.low64 > h2.low64) - (h2.low64 > h1.low64);
10523 #define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x))
10533 XXH3_generateSecret(
void* secretBuffer,
size_t secretSize,
const void* customSeed,
size_t customSeedSize)
10535 #if (XXH_DEBUGLEVEL >= 1)
10537 XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
10540 if (secretBuffer == NULL)
return XXH_ERROR;
10541 if (secretSize < XXH3_SECRET_SIZE_MIN)
return XXH_ERROR;
10544 if (customSeedSize == 0) {
10545 customSeed = XXH3_kSecret;
10546 customSeedSize = XXH_SECRET_DEFAULT_SIZE;
10548 #if (XXH_DEBUGLEVEL >= 1)
10551 if (customSeed == NULL)
return XXH_ERROR;
10556 while (pos < secretSize) {
10557 size_t const toCopy = XXH_MIN((secretSize - pos), customSeedSize);
10558 memcpy((
char*)secretBuffer + pos, customSeed, toCopy);
10562 {
size_t const nbSeg16 = secretSize / 16;
10566 for (n=0; n<nbSeg16; n++) {
10568 XXH3_combine16((
char*)secretBuffer + n*16, h128);
10580 XXH_ALIGN(XXH_SEC_ALIGN)
xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
10581 XXH3_initCustomSecret(secret, seed);
10583 memcpy(secretBuffer, secret, XXH_SECRET_DEFAULT_SIZE);
10589 #if XXH_VECTOR == XXH_AVX2 \
10590 && defined(__GNUC__) && !defined(__clang__) \
10591 && defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__)
10592 # pragma GCC pop_options
10605 #if defined (__cplusplus)
10609 #ifndef ZSTD_NO_TRACE
10621 #ifndef ZSTD_TRACE_H
10622 #define ZSTD_TRACE_H
10624 #if defined (__cplusplus)
10628 #include <stddef.h>
10638 #if !defined(ZSTD_HAVE_WEAK_SYMBOLS) && \
10639 defined(__GNUC__) && defined(__ELF__) && \
10640 (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86) || defined(__aarch64__)) && \
10641 !defined(__APPLE__) && !defined(_WIN32) && !defined(__MINGW32__) && \
10642 !defined(__CYGWIN__) && !defined(_AIX)
10643 # define ZSTD_HAVE_WEAK_SYMBOLS 1
10645 # define ZSTD_HAVE_WEAK_SYMBOLS 0
10647 #if ZSTD_HAVE_WEAK_SYMBOLS
10648 # define ZSTD_WEAK_ATTR __attribute__((__weak__))
10650 # define ZSTD_WEAK_ATTR
10655 # define ZSTD_TRACE ZSTD_HAVE_WEAK_SYMBOLS
10677 unsigned streaming;
10681 unsigned dictionaryID;
10686 unsigned dictionaryIsCold;
10690 size_t dictionarySize;
10694 size_t uncompressedSize;
10727 typedef unsigned long long ZSTD_TraceCtx;
10746 ZSTD_Trace
const* trace);
10765 ZSTD_Trace
const* trace);
10769 #if defined (__cplusplus)
10776 # define ZSTD_TRACE 0
10779 #if defined (__cplusplus)
10784 #define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)
10785 #define ZSTD_isError ERR_isError
10786 #define FSE_isError ERR_isError
10787 #define HUF_isError ERR_isError
10795 #define MIN(a,b) ((a)<(b) ? (a) : (b))
10796 #define MAX(a,b) ((a)>(b) ? (a) : (b))
10797 #define BOUNDED(min,val,max) (MAX(min,MIN(val,max)))
10803 #define ZSTD_OPT_NUM (1<<12)
10805 #define ZSTD_REP_NUM 3
10808 #define KB *(1 <<10)
10809 #define MB *(1 <<20)
10810 #define GB *(1U<<30)
10819 #define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
10823 #define ZSTD_FRAMEIDSIZE 4
10825 #define ZSTD_BLOCKHEADERSIZE 3
10829 #define ZSTD_FRAMECHECKSUMSIZE 4
10831 #define MIN_SEQUENCES_SIZE 1
10832 #define MIN_CBLOCK_SIZE (1 + 1 )
10833 #define MIN_LITERALS_FOR_4_STREAMS 6
10837 #define LONGNBSEQ 0x7F00
10842 #define LitHufLog 11
10843 #define MaxLit ((1<<Litbits) - 1)
10846 #define DefaultMaxOff 28
10848 #define MaxSeq MAX(MaxLL, MaxML)
10851 #define OffFSELog 8
10852 #define MaxFSELog MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
10853 #define MaxMLBits 16
10854 #define MaxLLBits 16
10856 #define ZSTD_MAX_HUF_HEADER_SIZE 128
10858 #define ZSTD_MAX_FSE_HEADERS_SIZE (((MaxML + 1) * MLFSELog + (MaxLL + 1) * LLFSELog + (MaxOff + 1) * OffFSELog + 7) / 8)
10861 0, 0, 0, 0, 0, 0, 0, 0,
10862 0, 0, 0, 0, 0, 0, 0, 0,
10863 1, 1, 1, 1, 2, 2, 3, 3,
10864 4, 6, 7, 8, 9,10,11,12,
10868 4, 3, 2, 2, 2, 2, 2, 2,
10869 2, 2, 2, 2, 2, 1, 1, 1,
10870 2, 2, 2, 2, 2, 2, 2, 2,
10871 2, 3, 2, 1, 1, 1, 1, 1,
10874 #define LL_DEFAULTNORMLOG 6
10878 0, 0, 0, 0, 0, 0, 0, 0,
10879 0, 0, 0, 0, 0, 0, 0, 0,
10880 0, 0, 0, 0, 0, 0, 0, 0,
10881 0, 0, 0, 0, 0, 0, 0, 0,
10882 1, 1, 1, 1, 2, 2, 3, 3,
10883 4, 4, 5, 7, 8, 9,10,11,
10887 1, 4, 3, 2, 2, 2, 2, 2,
10888 2, 1, 1, 1, 1, 1, 1, 1,
10889 1, 1, 1, 1, 1, 1, 1, 1,
10890 1, 1, 1, 1, 1, 1, 1, 1,
10891 1, 1, 1, 1, 1, 1, 1, 1,
10892 1, 1, 1, 1, 1, 1,-1,-1,
10895 #define ML_DEFAULTNORMLOG 6
10899 1, 1, 1, 1, 1, 1, 2, 2,
10900 2, 1, 1, 1, 1, 1, 1, 1,
10901 1, 1, 1, 1, 1, 1, 1, 1,
10904 #define OF_DEFAULTNORMLOG 5
10912 #if defined(ZSTD_ARCH_ARM_NEON)
10913 vst1_u8((uint8_t*)
dst, vld1_u8((
const uint8_t*)src));
10918 #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
10925 #if defined(ZSTD_ARCH_ARM_NEON)
10926 vst1q_u8((uint8_t*)
dst, vld1q_u8((
const uint8_t*)src));
10927 #elif defined(ZSTD_ARCH_X86_SSE2)
10928 _mm_storeu_si128((__m128i*)
dst, _mm_loadu_si128((
const __m128i*)src));
10929 #elif defined(__clang__)
10933 BYTE copy16_buf[16];
10938 #define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
10940 #define WILDCOPY_OVERLENGTH 32
10941 #define WILDCOPY_VECLEN 16
10962 BYTE*
const oend =
op + length;
10968 }
while (
op < oend);
10978 if (16 >= length)
return;
10991 size_t const length =
MIN(dstCapacity,
srcSize);
10999 #define ZSTD_WORKSPACETOOLARGE_FACTOR 3
11006 #define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128
11114 const void* src,
size_t srcSize);
11122 return ZSTD_cpuid_bmi1(cpuid) && ZSTD_cpuid_bmi2(cpuid);
11125 #if defined (__cplusplus)
11144 #undef ZSTD_isError
11215 const void* src,
size_t srcSize);
11222 #define HIST_WKSP_SIZE_U32 1024
11223 #define HIST_WKSP_SIZE (HIST_WKSP_SIZE_U32 * sizeof(unsigned))
11231 const void* src,
size_t srcSize,
11232 void* workSpace,
size_t workSpaceSize);
11239 const void* src,
size_t srcSize);
11247 const void* src,
size_t srcSize,
11248 void* workSpace,
size_t workSpaceSize);
11259 const void* src,
size_t srcSize);
11262 #define FSE_STATIC_LINKING_ONLY
11265 #define ZSTD_DEPS_NEED_MALLOC
11266 #define ZSTD_DEPS_NEED_MATH64
11274 #define FSE_isError ERR_isError
11287 #ifndef FSE_FUNCTION_EXTENSION
11288 # error "FSE_FUNCTION_EXTENSION must be defined"
11290 #ifndef FSE_FUNCTION_TYPE
11291 # error "FSE_FUNCTION_TYPE must be defined"
11295 #define FSE_CAT(X,Y) X##Y
11296 #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
11297 #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
11308 const short* normalizedCounter,
unsigned maxSymbolValue,
unsigned tableLog,
11309 void* workSpace,
size_t wkspSize)
11311 U32 const tableSize = 1 << tableLog;
11312 U32 const tableMask = tableSize - 1;
11313 void*
const ptr = ct;
11314 U16*
const tableU16 = ( (
U16*)
ptr) + 2;
11315 void*
const FSCT = ((
U32*)
ptr) + 1 + (tableLog ? tableSize>>1 : 1) ;
11318 U32 const maxSV1 = maxSymbolValue+1;
11320 U16* cumul = (
U16*)workSpace;
11323 U32 highThreshold = tableSize-1;
11325 assert(((
size_t)workSpace & 1) == 0);
11328 tableU16[-2] = (
U16) tableLog;
11329 tableU16[-1] = (
U16) maxSymbolValue;
11335 #ifdef __clang_analyzer__
11336 ZSTD_memset(tableSymbol, 0,
sizeof(*tableSymbol) * tableSize);
11342 for (u=1; u <= maxSV1; u++) {
11343 if (normalizedCounter[u-1]==-1) {
11344 cumul[u] = cumul[u-1] + 1;
11347 assert(normalizedCounter[u-1] >= 0);
11348 cumul[u] = cumul[u-1] + (
U16)normalizedCounter[u-1];
11349 assert(cumul[u] >= cumul[u-1]);
11351 cumul[maxSV1] = (
U16)(tableSize+1);
11355 if (highThreshold == tableSize - 1) {
11359 BYTE*
const spread = tableSymbol + tableSize;
11360 {
U64 const add = 0x0101010101010101ull;
11364 for (
s=0;
s<maxSV1; ++
s, sv += add) {
11366 int const n = normalizedCounter[
s];
11368 for (i = 8; i < n; i += 8) {
11379 {
size_t position = 0;
11381 size_t const unroll = 2;
11385 for (u = 0; u <
unroll; ++u) {
11386 size_t const uPosition = (position + (u * step)) & tableMask;
11387 tableSymbol[uPosition] = spread[
s + u];
11389 position = (position + (
unroll * step)) & tableMask;
11396 for (symbol=0; symbol<maxSV1; symbol++) {
11398 int const freq = normalizedCounter[symbol];
11399 for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {
11401 position = (position + step) & tableMask;
11402 while (position > highThreshold)
11403 position = (position + step) & tableMask;
11409 {
U32 u;
for (u=0; u<tableSize; u++) {
11411 tableU16[cumul[
s]++] = (
U16) (tableSize+u);
11415 {
unsigned total = 0;
11417 for (
s=0;
s<=maxSymbolValue;
s++) {
11418 switch (normalizedCounter[
s])
11422 symbolTT[
s].
deltaNbBits = ((tableLog+1) << 16) - (1<<tableLog);
11427 symbolTT[
s].
deltaNbBits = (tableLog << 16) - (1<<tableLog);
11428 assert(total <= INT_MAX);
11433 assert(normalizedCounter[
s] > 1);
11435 U32 const minStatePlus = (
U32)normalizedCounter[
s] << maxBitsOut;
11436 symbolTT[
s].
deltaNbBits = (maxBitsOut << 16) - minStatePlus;
11438 total += (unsigned)normalizedCounter[
s];
11442 DEBUGLOG(5,
"\n --- table statistics : ");
11444 for (symbol=0; symbol<=maxSymbolValue; symbol++) {
11445 DEBUGLOG(5,
"%3u: w=%3i, maxBits=%u, fracBits=%.2f",
11446 symbol, normalizedCounter[symbol],
11448 (
double)
FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
11457 #ifndef FSE_COMMONDEFS_ONLY
11464 size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog
11474 const short* normalizedCounter,
unsigned maxSymbolValue,
unsigned tableLog,
11475 unsigned writeIsSafe)
11478 BYTE* out = ostart;
11479 BYTE*
const oend = ostart + headerBufferSize;
11481 const int tableSize = 1 << tableLog;
11486 unsigned symbol = 0;
11487 unsigned const alphabetSize = maxSymbolValue + 1;
11488 int previousIs0 = 0;
11495 remaining = tableSize+1;
11496 threshold = tableSize;
11497 nbBits = tableLog+1;
11499 while ((symbol < alphabetSize) && (remaining>1)) {
11501 unsigned start = symbol;
11502 while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++;
11503 if (symbol == alphabetSize)
break;
11504 while (symbol >=
start+24) {
11506 bitStream += 0xFFFFU << bitCount;
11507 if ((!writeIsSafe) && (out > oend-2))
11508 return ERROR(dstSize_tooSmall);
11509 out[0] = (
BYTE) bitStream;
11510 out[1] = (
BYTE)(bitStream>>8);
11514 while (symbol >=
start+3) {
11516 bitStream += 3 << bitCount;
11519 bitStream += (symbol-
start) << bitCount;
11522 if ((!writeIsSafe) && (out > oend - 2))
11523 return ERROR(dstSize_tooSmall);
11524 out[0] = (
BYTE)bitStream;
11525 out[1] = (
BYTE)(bitStream>>8);
11530 {
int count = normalizedCounter[symbol++];
11531 int const max = (2*threshold-1) - remaining;
11534 if (
count>=threshold)
11536 bitStream +=
count << bitCount;
11537 bitCount += nbBits;
11538 bitCount -= (
count<max);
11539 previousIs0 = (
count==1);
11540 if (remaining<1)
return ERROR(GENERIC);
11541 while (remaining<threshold) { nbBits--; threshold>>=1; }
11544 if ((!writeIsSafe) && (out > oend - 2))
11545 return ERROR(dstSize_tooSmall);
11546 out[0] = (
BYTE)bitStream;
11547 out[1] = (
BYTE)(bitStream>>8);
11553 if (remaining != 1)
11554 return ERROR(GENERIC);
11555 assert(symbol <= alphabetSize);
11558 if ((!writeIsSafe) && (out > oend - 2))
11559 return ERROR(dstSize_tooSmall);
11560 out[0] = (
BYTE)bitStream;
11561 out[1] = (
BYTE)(bitStream>>8);
11562 out+= (bitCount+7) /8;
11564 return (out-ostart);
11569 const short* normalizedCounter,
unsigned maxSymbolValue,
unsigned tableLog)
11590 U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
11598 U32 tableLog = maxTableLog;
11602 if (maxBitsSrc < tableLog) tableLog = maxBitsSrc;
11603 if (minBits > tableLog) tableLog = minBits;
11619 short const NOT_YET_ASSIGNED = -2;
11621 U32 distributed = 0;
11625 U32 const lowThreshold = (
U32)(total >> tableLog);
11626 U32 lowOne = (
U32)((total * 3) >> (tableLog + 1));
11628 for (
s=0;
s<=maxSymbolValue;
s++) {
11633 if (
count[
s] <= lowThreshold) {
11634 norm[
s] = lowProbCount;
11639 if (
count[
s] <= lowOne) {
11646 norm[
s]=NOT_YET_ASSIGNED;
11648 ToDistribute = (1 << tableLog) - distributed;
11650 if (ToDistribute == 0)
11653 if ((total / ToDistribute) > lowOne) {
11655 lowOne = (
U32)((total * 3) / (ToDistribute * 2));
11656 for (
s=0;
s<=maxSymbolValue;
s++) {
11657 if ((norm[
s] == NOT_YET_ASSIGNED) && (
count[
s] <= lowOne)) {
11663 ToDistribute = (1 << tableLog) - distributed;
11666 if (distributed == maxSymbolValue+1) {
11670 U32 maxV = 0, maxC = 0;
11671 for (
s=0;
s<=maxSymbolValue;
s++)
11673 norm[maxV] += (short)ToDistribute;
11679 for (
s=0; ToDistribute > 0;
s = (
s+1)%(maxSymbolValue+1))
11680 if (norm[
s] > 0) { ToDistribute--; norm[
s]++; }
11684 {
U64 const vStepLog = 62 - tableLog;
11685 U64 const mid = (1ULL << (vStepLog-1)) - 1;
11687 U64 tmpTotal = mid;
11688 for (
s=0;
s<=maxSymbolValue;
s++) {
11689 if (norm[
s]==NOT_YET_ASSIGNED) {
11690 U64 const end = tmpTotal + (
count[
s] * rStep);
11691 U32 const sStart = (
U32)(tmpTotal >> vStepLog);
11692 U32 const sEnd = (
U32)(end >> vStepLog);
11693 U32 const weight = sEnd - sStart;
11695 return ERROR(GENERIC);
11696 norm[
s] = (short)weight;
11704 const unsigned*
count,
size_t total,
11705 unsigned maxSymbolValue,
unsigned useLowProbCount)
11713 {
static U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
11714 short const lowProbCount = useLowProbCount ? -1 : 1;
11715 U64 const scale = 62 - tableLog;
11717 U64 const vStep = 1ULL<<(scale-20);
11718 int stillToDistribute = 1<<tableLog;
11720 unsigned largest=0;
11722 U32 lowThreshold = (
U32)(total >> tableLog);
11724 for (
s=0;
s<=maxSymbolValue;
s++) {
11725 if (
count[
s] == total)
return 0;
11726 if (
count[
s] == 0) { normalizedCounter[
s]=0;
continue; }
11727 if (
count[
s] <= lowThreshold) {
11728 normalizedCounter[
s] = lowProbCount;
11729 stillToDistribute--;
11731 short proba = (short)((
count[
s]*step) >> scale);
11733 U64 restToBeat = vStep * rtbTable[proba];
11734 proba += (
count[
s]*step) - ((
U64)proba<<scale) > restToBeat;
11736 if (proba > largestP) { largestP=proba; largest=
s; }
11737 normalizedCounter[
s] = proba;
11738 stillToDistribute -= proba;
11740 if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
11742 size_t const errorCode =
FSE_normalizeM2(normalizedCounter, tableLog,
count, total, maxSymbolValue, lowProbCount);
11745 else normalizedCounter[largest] += (short)stillToDistribute;
11752 for (
s=0;
s<=maxSymbolValue;
s++)
11753 RAWLOG(2,
"%3i: %4i \n",
s, normalizedCounter[
s]);
11754 for (
s=0;
s<=maxSymbolValue;
s++)
11755 nTotal += abs(normalizedCounter[
s]);
11756 if (nTotal != (1U<<tableLog))
11757 RAWLOG(2,
"Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
11770 void* FSCTptr = (
U32*)
ptr + 2;
11774 tableU16[-2] = (
U16) 0;
11775 tableU16[-1] = (
U16) symbolValue;
11790 const void* src,
size_t srcSize,
11793 const BYTE*
const istart = (
const BYTE*) src;
11805 #define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
11826 while (
ip>istart ) {
11849 const void* src,
size_t srcSize,
11895 const void* src,
size_t srcSize)
11899 unsigned maxSymbolValue = *maxSymbolValuePtr;
11900 unsigned largestCount=0;
11903 if (
srcSize==0) { *maxSymbolValuePtr = 0;
return 0; }
11910 while (!
count[maxSymbolValue]) maxSymbolValue--;
11911 *maxSymbolValuePtr = maxSymbolValue;
11914 for (
s=0;
s<=maxSymbolValue;
s++)
11918 return largestCount;
11932 unsigned*
count,
unsigned* maxSymbolValuePtr,
11933 const void*
source,
size_t sourceSize,
11935 U32*
const workSpace)
11938 const BYTE*
const iend =
ip+sourceSize;
11939 size_t const countSize = (*maxSymbolValuePtr + 1) *
sizeof(*
count);
11941 U32*
const Counting1 = workSpace;
11942 U32*
const Counting2 = Counting1 + 256;
11943 U32*
const Counting3 = Counting2 + 256;
11944 U32*
const Counting4 = Counting3 + 256;
11947 assert(*maxSymbolValuePtr <= 255);
11950 *maxSymbolValuePtr = 0;
11953 ZSTD_memset(workSpace, 0, 4*256*
sizeof(
unsigned));
11957 while (
ip < iend-15) {
11959 Counting1[(
BYTE) c ]++;
11960 Counting2[(
BYTE)(c>>8) ]++;
11961 Counting3[(
BYTE)(c>>16)]++;
11962 Counting4[ c>>24 ]++;
11964 Counting1[(
BYTE) c ]++;
11965 Counting2[(
BYTE)(c>>8) ]++;
11966 Counting3[(
BYTE)(c>>16)]++;
11967 Counting4[ c>>24 ]++;
11969 Counting1[(
BYTE) c ]++;
11970 Counting2[(
BYTE)(c>>8) ]++;
11971 Counting3[(
BYTE)(c>>16)]++;
11972 Counting4[ c>>24 ]++;
11974 Counting1[(
BYTE) c ]++;
11975 Counting2[(
BYTE)(c>>8) ]++;
11976 Counting3[(
BYTE)(c>>16)]++;
11977 Counting4[ c>>24 ]++;
11983 while (
ip<iend) Counting1[*
ip++]++;
11986 for (
s=0;
s<256;
s++) {
11987 Counting1[
s] += Counting2[
s] + Counting3[
s] + Counting4[
s];
11988 if (Counting1[
s] > max) max = Counting1[
s];
11991 {
unsigned maxSymbolValue = 255;
11992 while (!Counting1[maxSymbolValue]) maxSymbolValue--;
11993 if (
check && maxSymbolValue > *maxSymbolValuePtr)
return ERROR(maxSymbolValue_tooSmall);
11994 *maxSymbolValuePtr = maxSymbolValue;
11997 return (
size_t)max;
12006 const void*
source,
size_t sourceSize,
12007 void* workSpace,
size_t workSpaceSize)
12009 if (sourceSize < 1500)
12011 if ((
size_t)workSpace & 3)
return ERROR(GENERIC);
12020 const void*
source,
size_t sourceSize,
12021 void* workSpace,
size_t workSpaceSize)
12023 if ((
size_t)workSpace & 3)
return ERROR(GENERIC);
12025 if (*maxSymbolValuePtr < 255)
12027 *maxSymbolValuePtr = 255;
12031 #ifndef ZSTD_NO_UNUSED_FUNCTIONS
12034 const void*
source,
size_t sourceSize)
12041 const void* src,
size_t srcSize)
12067 # pragma warning(disable : 4127)
12078 #define FSE_STATIC_LINKING_ONLY
12088 #define HUF_isError ERR_isError
12089 #define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)
12107 #if DEBUGLEVEL >= 2
12109 static size_t showU32(
const U32* arr,
size_t size)
12112 for (u=0; u<
size; u++) {
12121 static size_t showCTableBits(
const HUF_CElt* ctable,
size_t size)
12124 for (u=0; u<
size; u++) {
12132 static size_t showHNodeSymbols(
const nodeElt* hnode,
size_t size)
12135 for (u=0; u<
size; u++) {
12136 RAWLOG(6,
" %u", hnode[u].
byte); (
void)hnode;
12142 static size_t showHNodeBits(
const nodeElt* hnode,
size_t size)
12145 for (u=0; u<
size; u++) {
12146 RAWLOG(6,
" %u", hnode[u].nbBits); (
void)hnode;
12158 #define HUF_WORKSPACE_MAX_ALIGNMENT 8
12162 size_t const mask =
align - 1;
12163 size_t const rem = (
size_t)workspace & mask;
12164 size_t const add = (
align - rem) & mask;
12165 BYTE*
const aligned = (
BYTE*)workspace + add;
12168 if (*workspaceSizePtr >= add) {
12170 assert(((
size_t)aligned & mask) == 0);
12171 *workspaceSizePtr -= add;
12174 *workspaceSizePtr = 0;
12185 #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
12196 const void* weightTable,
size_t wtSize,
12197 void* workspace,
size_t workspaceSize)
12201 BYTE*
const oend = ostart + dstSize;
12210 if (wtSize <= 1)
return 0;
12214 if (maxCount == wtSize)
return 1;
12215 if (maxCount == 1)
return 0;
12229 if (cSize == 0)
return 0;
12233 return (
size_t)(
op-ostart);
12248 return elt & ~(
size_t)0xFF;
12266 assert((value >> nbBits) == 0);
12267 *elt |= value << (
sizeof(
HUF_CElt) * 8 - nbBits);
12278 const HUF_CElt* CTable,
unsigned maxSymbolValue,
unsigned huffLog,
12279 void* workspace,
size_t workspaceSize)
12281 HUF_CElt const*
const ct = CTable + 1;
12294 for (n=1; n<huffLog+1; n++)
12296 for (n=0; n<maxSymbolValue; n++)
12302 if ((hSize>1) & (hSize < maxSymbolValue/2)) {
12308 if (maxSymbolValue > (256-128))
return ERROR(GENERIC);
12309 if (((maxSymbolValue+1)/2) + 1 >
maxDstSize)
return ERROR(dstSize_tooSmall);
12310 op[0] = (
BYTE)(128 + (maxSymbolValue-1));
12312 for (n=0; n<maxSymbolValue; n+=2)
12314 return ((maxSymbolValue+1)/2) + 1;
12328 *hasZeroWeights = (rankVal[0] > 0);
12332 if (nbSymbols > *maxSymbolValuePtr+1)
return ERROR(maxSymbolValue_tooSmall);
12334 CTable[0] = tableLog;
12337 {
U32 n, nextRankStart = 0;
12338 for (n=1; n<=tableLog; n++) {
12339 U32 curr = nextRankStart;
12340 nextRankStart += (rankVal[n] << (n-1));
12345 {
U32 n;
for (n=0; n<nbSymbols; n++) {
12346 const U32 w = huffWeight[n];
12355 valPerRank[tableLog+1] = 0;
12357 U32 n;
for (n=tableLog; n>0; n--) {
12358 valPerRank[n] = min;
12359 min += nbPerRank[n];
12366 *maxSymbolValuePtr = nbSymbols - 1;
12372 const HUF_CElt*
const ct = CTable + 1;
12401 const U32 largestBits = huffNode[lastNonNull].
nbBits;
12403 if (largestBits <= targetNbBits)
return largestBits;
12405 DEBUGLOG(5,
"HUF_setMaxHeight (targetNbBits = %u)", targetNbBits);
12408 {
int totalCost = 0;
12409 const U32 baseCost = 1 << (largestBits - targetNbBits);
12410 int n = (
int)lastNonNull;
12416 while (huffNode[n].nbBits > targetNbBits) {
12417 totalCost += baseCost - (1 << (largestBits - huffNode[n].
nbBits));
12422 assert(huffNode[n].nbBits <= targetNbBits);
12424 while (huffNode[n].nbBits == targetNbBits) --n;
12428 assert(((
U32)totalCost & (baseCost - 1)) == 0);
12429 totalCost >>= (largestBits - targetNbBits);
12433 {
U32 const noSymbol = 0xF0F0F0F0;
12438 {
U32 currentNbBits = targetNbBits;
12440 for (pos=n ; pos >= 0; pos--) {
12441 if (huffNode[pos].nbBits >= currentNbBits)
continue;
12442 currentNbBits = huffNode[pos].
nbBits;
12443 rankLast[targetNbBits-currentNbBits] = (
U32)pos;
12446 while (totalCost > 0) {
12451 for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
12452 U32 const highPos = rankLast[nBitsToDecrease];
12453 U32 const lowPos = rankLast[nBitsToDecrease-1];
12454 if (highPos == noSymbol)
continue;
12458 if (lowPos == noSymbol)
break;
12459 {
U32 const highTotal = huffNode[highPos].
count;
12460 U32 const lowTotal = 2 * huffNode[lowPos].
count;
12461 if (highTotal <= lowTotal)
break;
12464 assert(rankLast[nBitsToDecrease] != noSymbol || nBitsToDecrease == 1);
12466 while ((nBitsToDecrease<=
HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
12468 assert(rankLast[nBitsToDecrease] != noSymbol);
12470 totalCost -= 1 << (nBitsToDecrease-1);
12471 huffNode[rankLast[nBitsToDecrease]].
nbBits++;
12477 if (rankLast[nBitsToDecrease-1] == noSymbol)
12478 rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];
12486 if (rankLast[nBitsToDecrease] == 0)
12487 rankLast[nBitsToDecrease] = noSymbol;
12489 rankLast[nBitsToDecrease]--;
12490 if (huffNode[rankLast[nBitsToDecrease]].nbBits != targetNbBits-nBitsToDecrease)
12491 rankLast[nBitsToDecrease] = noSymbol;
12501 while (totalCost < 0) {
12505 if (rankLast[1] == noSymbol) {
12506 while (huffNode[n].nbBits == targetNbBits) n--;
12509 rankLast[1] = (
U32)(n+1);
12513 huffNode[ rankLast[1] + 1 ].
nbBits--;
12520 return targetNbBits;
12531 #define RANK_POSITION_TABLE_SIZE 192
12546 #define RANK_POSITION_MAX_COUNT_LOG 32
12547 #define RANK_POSITION_LOG_BUCKETS_BEGIN ((RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 )
12548 #define RANK_POSITION_DISTINCT_COUNT_CUTOFF (RANK_POSITION_LOG_BUCKETS_BEGIN + ZSTD_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) )
12569 for (i = 1; i < maxSymbolValue1; ++i) {
12570 if (huffNode[i].
count > huffNode[i-1].
count) {
12580 int const size = high-low+1;
12582 for (i = 1; i <
size; ++i) {
12583 nodeElt const key = huffNode[i];
12585 while (j >= 0 && huffNode[j].
count < key.count) {
12586 huffNode[j + 1] = huffNode[j];
12589 huffNode[j + 1] = key;
12598 U32 const pivot = arr[high].
count;
12601 for ( ; j < high; j++) {
12602 if (arr[j].
count > pivot) {
12615 int const kInsertionSortThreshold = 8;
12616 if (high - low < kInsertionSortThreshold) {
12620 while (low < high) {
12622 if (idx - low < high - idx) {
12645 U32 const maxSymbolValue1 = maxSymbolValue+1;
12654 for (n = 0; n < maxSymbolValue1; ++n) {
12657 rankPosition[lowerRank].
base++;
12663 rankPosition[n-1].
base += rankPosition[n].
base;
12664 rankPosition[n-1].
curr = rankPosition[n-1].
base;
12668 for (n = 0; n < maxSymbolValue1; ++n) {
12671 U32 const pos = rankPosition[r].
curr++;
12672 assert(pos < maxSymbolValue1);
12673 huffNode[pos].
count = c;
12679 int const bucketSize = rankPosition[n].
curr - rankPosition[n].
base;
12680 U32 const bucketStartIdx = rankPosition[n].
base;
12681 if (bucketSize > 1) {
12682 assert(bucketStartIdx < maxSymbolValue1);
12695 #define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
12706 nodeElt*
const huffNode0 = huffNode - 1;
12711 DEBUGLOG(5,
"HUF_buildTree (alphabet size = %u)", maxSymbolValue + 1);
12713 nonNullRank = (
int)maxSymbolValue;
12714 while(huffNode[nonNullRank].
count == 0) nonNullRank--;
12715 lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
12716 huffNode[nodeNb].
count = huffNode[lowS].
count + huffNode[lowS-1].
count;
12719 for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].
count = (
U32)(1U<<30);
12720 huffNode0[0].
count = (
U32)(1U<<31);
12723 while (nodeNb <= nodeRoot) {
12724 int const n1 = (huffNode[lowS].
count < huffNode[lowN].
count) ? lowS-- : lowN++;
12725 int const n2 = (huffNode[lowS].
count < huffNode[lowN].
count) ? lowS-- : lowN++;
12732 huffNode[nodeRoot].
nbBits = 0;
12734 huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
12735 for (n=0; n<=nonNullRank; n++)
12736 huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
12738 DEBUGLOG(6,
"Initial distribution of bits completed (%zu sorted symbols)", showHNodeBits(huffNode, maxSymbolValue+1));
12740 return nonNullRank;
12760 int const alphabetSize = (
int)(maxSymbolValue + 1);
12761 for (n=0; n<=nonNullRank; n++)
12762 nbPerRank[huffNode[n].nbBits]++;
12765 for (n=(
int)maxNbBits; n>0; n--) {
12766 valPerRank[n] = min;
12767 min += nbPerRank[n];
12770 for (n=0; n<alphabetSize; n++)
12772 for (n=0; n<alphabetSize; n++)
12774 CTable[0] = maxNbBits;
12779 void* workSpace,
size_t wkspSize)
12784 nodeElt*
const huffNode = huffNode0+1;
12789 DEBUGLOG(5,
"HUF_buildCTable_wksp (alphabet size = %u)", maxSymbolValue+1);
12793 return ERROR(workSpace_tooSmall);
12796 return ERROR(maxSymbolValue_tooLarge);
12801 DEBUGLOG(6,
"sorted symbols completed (%zu symbols)", showHNodeSymbols(huffNode, maxSymbolValue+1));
12820 for (
s = 0;
s <= (
int)maxSymbolValue; ++
s) {
12823 return nbBits >> 3;
12830 for (
s = 0;
s <= (
int)maxSymbolValue; ++
s) {
12856 #define HUF_BITS_IN_CONTAINER (sizeof(size_t) * 8)
12859 size_t bitContainer[2];
12872 void* startPtr,
size_t dstCapacity)
12878 if (dstCapacity <=
sizeof(bitC->
bitContainer[0]))
return ERROR(dstSize_tooSmall);
12912 #if DEBUGLEVEL >= 1
12918 assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0);
12955 size_t const nbBits = bitC->
bitPos[0] & 0xFF;
12956 size_t const nbBytes = nbBits >> 3;
12965 bitC->
ptr += nbBytes;
12993 size_t const nbBits = bitC->
bitPos[0] & 0xFF;
12994 if (bitC->
ptr >= bitC->
endPtr)
return 0;
12995 return (
size_t)(bitC->
ptr - bitC->
startPtr) + (nbBits > 0);
13009 int kUnroll,
int kFastFlush,
int kLastFast)
13013 int rem = n % kUnroll;
13015 for (; rem > 0; --rem) {
13020 assert(n % kUnroll == 0);
13023 if (n % (2 * kUnroll)) {
13025 for (u = 1; u < kUnroll; ++u) {
13032 assert(n % (2 * kUnroll) == 0);
13034 for (; n>0; n-= 2 * kUnroll) {
13037 for (u = 1; u < kUnroll; ++u) {
13047 for (u = 1; u < kUnroll; ++u) {
13066 return ((
srcSize * tableLog) >> 3) + 8;
13072 const void* src,
size_t srcSize,
13075 U32 const tableLog = (
U32)CTable[0];
13079 BYTE*
const oend = ostart + dstSize;
13084 if (dstSize < 8)
return 0;
13092 switch (tableLog) {
13107 switch (tableLog) {
13138 HUF_compress1X_usingCTable_internal_bmi2(
void*
dst,
size_t dstSize,
13139 const void* src,
size_t srcSize,
13146 HUF_compress1X_usingCTable_internal_default(
void*
dst,
size_t dstSize,
13147 const void* src,
size_t srcSize,
13155 const void* src,
size_t srcSize,
13156 const HUF_CElt* CTable,
const int flags)
13159 return HUF_compress1X_usingCTable_internal_bmi2(
dst, dstSize, src,
srcSize, CTable);
13161 return HUF_compress1X_usingCTable_internal_default(
dst, dstSize, src,
srcSize, CTable);
13168 const void* src,
size_t srcSize,
13169 const HUF_CElt* CTable,
const int flags)
13184 const void* src,
size_t srcSize,
13185 const HUF_CElt* CTable,
int flags)
13187 size_t const segmentSize = (
srcSize+3)/4;
13191 BYTE*
const oend = ostart + dstSize;
13194 if (dstSize < 6 + 1 + 1 + 1 + 8)
return 0;
13200 if (cSize == 0 || cSize > 65535)
return 0;
13208 if (cSize == 0 || cSize > 65535)
return 0;
13216 if (cSize == 0 || cSize > 65535)
return 0;
13225 if (cSize == 0 || cSize > 65535)
return 0;
13229 return (
size_t)(
op-ostart);
13241 const void* src,
size_t srcSize,
13248 if (cSize==0) {
return 0; }
13252 if ((
size_t)(
op-ostart) >=
srcSize-1) {
return 0; }
13253 return (
size_t)(
op-ostart);
13266 #define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096
13267 #define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10
13271 unsigned cardinality = 0;
13274 for (i = 0; i < maxSymbolValue + 1; i++) {
13275 if (
count[i] != 0) cardinality += 1;
13278 return cardinality;
13284 return minBitsSymbols;
13288 unsigned maxTableLog,
13290 unsigned maxSymbolValue,
13291 void* workSpace,
size_t wkspSize,
13293 const unsigned*
count,
13306 size_t maxBits, hSize, newSize;
13309 size_t optSize = ((
size_t) ~0) - 1;
13310 unsigned optLog = maxTableLog, optLogGuess;
13312 DEBUGLOG(6,
"HUF_optimalTableLog: probing huf depth (srcSize=%zu)",
srcSize);
13315 for (optLogGuess = minTableLog; optLogGuess <= maxTableLog; optLogGuess++) {
13316 DEBUGLOG(7,
"checking for huffLog=%u", optLogGuess);
13320 if (maxBits < optLogGuess && optLogGuess > minTableLog)
break;
13328 if (newSize > optSize + 1) {
13332 if (newSize < optSize) {
13334 optLog = optLogGuess;
13347 const void* src,
size_t srcSize,
13348 unsigned maxSymbolValue,
unsigned huffLog,
13350 void* workSpace,
size_t wkspSize,
13355 BYTE*
const oend = ostart + dstSize;
13362 if (wkspSize <
sizeof(*
table))
return ERROR(workSpace_tooSmall);
13364 if (!dstSize)
return 0;
13375 nbStreams, oldHufTable, flags);
13381 size_t largestTotal = 0;
13382 DEBUGLOG(5,
"input suspected incompressible : sampling to check");
13383 {
unsigned maxSymbolValueBegin = maxSymbolValue;
13385 largestTotal += largestBegin;
13387 {
unsigned maxSymbolValueEnd = maxSymbolValue;
13389 largestTotal += largestEnd;
13396 if (largest ==
srcSize) { *ostart = ((
const BYTE*)src)[0];
return 1; }
13397 if (largest <= (
srcSize >> 7)+4)
return 0;
13399 DEBUGLOG(6,
"histogram detail completed (%zu symbols)", showU32(
table->count, maxSymbolValue+1));
13411 nbStreams, oldHufTable, flags);
13417 maxSymbolValue, huffLog,
13418 &
table->wksps.buildCTable_wksp,
sizeof(
table->wksps.buildCTable_wksp));
13420 huffLog = (
U32)maxBits;
13421 DEBUGLOG(6,
"bit distribution completed (%zu symbols)", showCTableBits(
table->CTable + 1, maxSymbolValue+1));
13426 size_t const unusedSize =
sizeof(
table->CTable) - ctableSize *
sizeof(
HUF_CElt);
13432 &
table->wksps.writeCTable_wksp,
sizeof(
table->wksps.writeCTable_wksp)) );
13437 if (oldSize <= hSize + newSize || hSize + 12 >=
srcSize) {
13440 nbStreams, oldHufTable, flags);
13444 if (hSize + 12ul >=
srcSize) {
return 0; }
13452 nbStreams,
table->CTable, flags);
13456 const void* src,
size_t srcSize,
13457 unsigned maxSymbolValue,
unsigned huffLog,
13458 void* workSpace,
size_t wkspSize,
13464 workSpace, wkspSize, hufTable,
13473 const void* src,
size_t srcSize,
13474 unsigned maxSymbolValue,
unsigned huffLog,
13475 void* workSpace,
size_t wkspSize,
13481 workSpace, wkspSize,
13482 hufTable, repeat, flags);
13510 #ifndef ZSTD_COMPRESS_LITERALS_H
13511 #define ZSTD_COMPRESS_LITERALS_H
13528 #ifndef ZSTD_COMPRESS_H
13529 #define ZSTD_COMPRESS_H
13546 #ifndef ZSTD_CWKSP_H
13547 #define ZSTD_CWKSP_H
13556 #if defined (__cplusplus)
13571 #ifndef ZSTD_CWKSP_ASAN_REDZONE_SIZE
13572 #define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128
13577 #define ZSTD_CWKSP_ALIGNMENT_BYTES 64
13722 assert(
ws->tableValidEnd <=
ws->allocStart);
13726 #if ZSTD_MEMORY_SANITIZER
13728 intptr_t
const offset = __msan_test_shadow(
ws->initOnceStart,
13730 #if defined(ZSTD_MSAN_PRINT)
13732 __msan_print_shadow((
U8*)
ws->initOnceStart + offset - 8, 32);
13744 size_t const mask =
align - 1;
13746 return (
size + mask) & ~mask;
13764 #if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
13797 size_t const alignBytesMask = alignBytes - 1;
13798 size_t const bytes = (alignBytes - ((
size_t)
ptr & (alignBytesMask))) & alignBytesMask;
13799 assert((alignBytes & alignBytesMask) == 0);
13822 void*
const alloc = (
BYTE*)
ws->allocStart -
bytes;
13823 void*
const bottom =
ws->tableEnd;
13824 DEBUGLOG(5,
"cwksp: reserving %p %zd bytes, %zd bytes remaining",
13827 assert(alloc >= bottom);
13828 if (alloc < bottom) {
13829 DEBUGLOG(4,
"cwksp: alloc failed!");
13830 ws->allocFailed = 1;
13835 if (alloc < ws->tableValidEnd) {
13836 ws->tableValidEnd = alloc;
13838 ws->allocStart = alloc;
13851 if (phase >
ws->phase) {
13855 ws->tableValidEnd =
ws->objectEnd;
13859 void *
const alloc =
ws->objectEnd;
13861 void *
const objectEnd = (
BYTE *) alloc + bytesToAlign;
13862 DEBUGLOG(5,
"reserving table alignment addtl space: %zu", bytesToAlign);
13864 "table phase - alignment initial allocation failed!");
13865 ws->objectEnd = objectEnd;
13866 ws->tableEnd = objectEnd;
13867 if (
ws->tableValidEnd <
ws->tableEnd) {
13868 ws->tableValidEnd =
ws->tableEnd;
13883 return (
ptr != NULL) && (
ws->workspace <=
ptr) && (ptr < ws->workspaceEnd);
13897 #if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
13904 #if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
13942 if(
ptr && ptr < ws->initOnceStart) {
13950 ws->initOnceStart =
ptr;
13952 #if ZSTD_MEMORY_SANITIZER
13983 if(
ws->phase < phase) {
13988 alloc =
ws->tableEnd;
13990 top =
ws->allocStart;
13992 DEBUGLOG(5,
"cwksp: reserving %p table %zd bytes, %zd bytes remaining",
13998 DEBUGLOG(4,
"cwksp: table alloc failed!");
13999 ws->allocFailed = 1;
14002 ws->tableEnd = end;
14004 #if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
14006 __asan_unpoison_memory_region(alloc,
bytes);
14022 void* alloc =
ws->objectEnd;
14023 void* end = (
BYTE*)alloc + roundedBytes;
14025 #if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
14031 "cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining",
14038 DEBUGLOG(3,
"cwksp: object alloc failed!");
14039 ws->allocFailed = 1;
14042 ws->objectEnd = end;
14043 ws->tableEnd = end;
14044 ws->tableValidEnd = end;
14046 #if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
14051 __asan_unpoison_memory_region(alloc,
bytes);
14060 DEBUGLOG(4,
"cwksp: ZSTD_cwksp_mark_tables_dirty");
14062 #if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
14071 assert(__msan_test_shadow(
ws->objectEnd,
size) == -1);
14072 if((
BYTE*)
ws->tableValidEnd < (
BYTE*)
ws->initOnceStart) {
14073 __msan_poison(
ws->objectEnd,
size);
14076 __msan_poison(
ws->objectEnd, (
BYTE*)
ws->initOnceStart - (
BYTE*)
ws->objectEnd);
14082 assert(
ws->tableValidEnd <=
ws->allocStart);
14083 ws->tableValidEnd =
ws->objectEnd;
14088 DEBUGLOG(4,
"cwksp: ZSTD_cwksp_mark_tables_clean");
14090 assert(
ws->tableValidEnd <=
ws->allocStart);
14091 if (
ws->tableValidEnd <
ws->tableEnd) {
14092 ws->tableValidEnd =
ws->tableEnd;
14101 DEBUGLOG(4,
"cwksp: ZSTD_cwksp_clean_tables");
14103 assert(
ws->tableValidEnd <=
ws->allocStart);
14104 if (
ws->tableValidEnd <
ws->tableEnd) {
14115 DEBUGLOG(4,
"cwksp: clearing tables!");
14117 #if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
14124 __asan_poison_memory_region(
ws->objectEnd,
size);
14128 ws->tableEnd =
ws->objectEnd;
14139 #if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
14146 if((
BYTE*)
ws->tableValidEnd < (
BYTE*)
ws->initOnceStart) {
14148 __msan_poison(
ws->tableValidEnd,
size);
14153 #if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
14160 __asan_poison_memory_region(
ws->objectEnd,
size);
14164 ws->tableEnd =
ws->objectEnd;
14166 ws->allocFailed = 0;
14179 DEBUGLOG(4,
"cwksp: init'ing workspace with %zd bytes",
size);
14180 assert(((
size_t)
start & (
sizeof(
void*)-1)) == 0);
14183 ws->objectEnd =
ws->workspace;
14184 ws->tableValidEnd =
ws->objectEnd;
14187 ws->isStatic = isStatic;
14189 ws->workspaceOversizedDuration = 0;
14195 DEBUGLOG(4,
"cwksp: creating new workspace with %zd bytes",
size);
14196 RETURN_ERROR_IF(workspace == NULL, memory_allocation,
"NULL pointer!");
14202 void *
ptr =
ws->workspace;
14203 DEBUGLOG(4,
"cwksp: freeing workspace");
14218 return (
size_t)((
BYTE*)
ws->workspaceEnd - (
BYTE*)
ws->workspace);
14222 return (
size_t)((
BYTE*)
ws->tableEnd - (
BYTE*)
ws->workspace)
14223 + (
size_t)((
BYTE*)
ws->workspaceEnd - (
BYTE*)
ws->allocStart);
14227 return ws->allocFailed;
14247 return (
size_t)((
BYTE*)
ws->allocStart - (
BYTE*)
ws->tableEnd);
14267 ws->workspaceOversizedDuration++;
14269 ws->workspaceOversizedDuration = 0;
14273 #if defined (__cplusplus)
14279 #ifdef ZSTD_MULTITHREAD
14291 #ifndef ZSTDMT_COMPRESS_H
14292 #define ZSTDMT_COMPRESS_H
14294 #if defined (__cplusplus)
14310 #define ZSTD_STATIC_LINKING_ONLY
14315 #ifndef ZSTDMT_NBWORKERS_MAX
14316 # define ZSTDMT_NBWORKERS_MAX ((sizeof(void*)==4) ? 64 : 256)
14318 #ifndef ZSTDMT_JOBSIZE_MIN
14319 # define ZSTDMT_JOBSIZE_MIN (512 KB)
14321 #define ZSTDMT_JOBLOG_MAX (MEM_32bits() ? 29 : 30)
14322 #define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB))
14334 ZSTD_customMem
cMem,
14335 ZSTD_threadPool *pool);
14353 const void* dict,
size_t dictSize, ZSTD_dictContentType_e dictContentType,
14355 ZSTD_CCtx_params
params,
unsigned long long pledgedSrcSize);
14389 #if defined (__cplusplus)
14398 #if defined (__cplusplus)
14405 #define kSearchStrength 8
14406 #define HASH_READ_SIZE 8
14407 #define ZSTD_DUBT_UNSORTED_MARK 1
14432 ZSTD_dictContentType_e dictContentType;
14479 size_t fseTablesSize;
14480 size_t lastCountSize;
14495 const ZSTD_CCtx_params* cctxParams,
14497 void* workspace,
size_t wkspSize);
14517 size_t posInSequence;
14544 unsigned* litLengthFreq;
14545 unsigned* matchLengthFreq;
14546 unsigned* offCodeFreq;
14555 U32 litLengthSumBasePrice;
14556 U32 matchLengthSumBasePrice;
14560 ZSTD_paramSwitch_e literalCompressionMode;
14569 BYTE const* nextSrc;
14571 BYTE const* dictBase;
14580 #define ZSTD_WINDOW_START_INDEX 2
14584 #define ZSTD_ROW_HASH_CACHE_SIZE 8
14649 #define LDM_BATCH_SIZE 64
14661 ZSTD_paramSwitch_e enableLdm;
14670 int collectSequences;
14671 ZSTD_Sequence* seqStart;
14673 size_t maxSequences;
14679 ZSTD_frameParameters
fParams;
14745 #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
14746 #define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE)
14762 #define ZSTD_MAX_NB_BLOCK_SPLITS 196
14777 ZSTD_sequenceProducer_F* mFinder;
14778 ZSTD_Sequence* seqBuffer;
14779 size_t seqBufferCapacity;
14840 #ifdef ZSTD_MULTITHREAD
14846 ZSTD_TraceCtx traceCtx;
14888 void const* src,
size_t srcSize);
14894 static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7,
14895 8, 9, 10, 11, 12, 13, 14, 15,
14896 16, 16, 17, 17, 18, 18, 19, 19,
14897 20, 20, 20, 20, 21, 21, 21, 21,
14898 22, 22, 22, 22, 22, 22, 22, 22,
14899 23, 23, 23, 23, 23, 23, 23, 23,
14900 24, 24, 24, 24, 24, 24, 24, 24,
14901 24, 24, 24, 24, 24, 24, 24, 24 };
14902 static const U32 LL_deltaCode = 19;
14911 static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14912 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
14913 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
14914 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
14915 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
14916 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
14917 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
14918 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
14919 static const U32 ML_deltaCode = 36;
14942 DEBUGLOG(5,
"ZSTD_noCompressBlock (srcSize=%zu, dstCapacity=%zu)",
srcSize, dstCapacity);
14944 dstSize_tooSmall,
"dst buf too small for uncompressed block");
14971 return (
srcSize >> minlog) + 2;
14976 switch (cctxParams->literalCompressionMode) {
14977 case ZSTD_ps_enable:
14979 case ZSTD_ps_disable:
14985 return (cctxParams->cParams.strategy ==
ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
14997 assert(iend > ilimit_w);
14998 if (
ip <= ilimit_w) {
15000 op += ilimit_w -
ip;
15007 #define REPCODE1_TO_OFFBASE REPCODE_TO_OFFBASE(1)
15008 #define REPCODE2_TO_OFFBASE REPCODE_TO_OFFBASE(2)
15009 #define REPCODE3_TO_OFFBASE REPCODE_TO_OFFBASE(3)
15010 #define REPCODE_TO_OFFBASE(r) (assert((r)>=1), assert((r)<=ZSTD_REP_NUM), (r))
15011 #define OFFSET_TO_OFFBASE(o) (assert((o)>0), o + ZSTD_REP_NUM)
15012 #define OFFBASE_IS_OFFSET(o) ((o) > ZSTD_REP_NUM)
15013 #define OFFBASE_IS_REPCODE(o) ( 1 <= (o) && (o) <= ZSTD_REP_NUM)
15014 #define OFFBASE_TO_OFFSET(o) (assert(OFFBASE_IS_OFFSET(o)), (o) - ZSTD_REP_NUM)
15015 #define OFFBASE_TO_REPCODE(o) (assert(OFFBASE_IS_REPCODE(o)), (o))
15027 size_t matchLength)
15031 #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
15032 static const BYTE* g_start = NULL;
15035 DEBUGLOG(6,
"Cpos%7u :%3u literals, match%4u bytes at offBase%7u",
15036 pos, (
U32)litLength, (
U32)matchLength, (
U32)offBase);
15042 assert(seqStorePtr->
lit + litLength <= seqStorePtr->litStart + seqStorePtr->
maxNbLit);
15044 if (litEnd <= litLimit_w) {
15050 if (litLength > 16) {
15056 seqStorePtr->
lit += litLength;
15059 if (litLength>0xFFFF) {
15071 {
size_t const mlBase = matchLength -
MINMATCH;
15072 if (mlBase>0xFFFF) {
15097 U32 const currentOffset = (repCode==
ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
15098 rep[2] = (repCode >= 2) ? rep[1] : rep[2];
15100 rep[0] = currentOffset;
15126 const BYTE*
const pStart = pIn;
15127 const BYTE*
const pInLoopLimit = pInLimit - (
sizeof(
size_t)-1);
15129 if (pIn < pInLoopLimit) {
15133 while (pIn < pInLoopLimit) {
15135 if (!diff) { pIn+=
sizeof(
size_t); pMatch+=
sizeof(
size_t);
continue; }
15137 return (
size_t)(pIn - pStart);
15141 if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
15142 return (
size_t)(pIn - pStart);
15151 const BYTE* iEnd,
const BYTE* mEnd,
const BYTE* iStart)
15155 if (
match + matchLength != mEnd)
return matchLength;
15156 DEBUGLOG(7,
"ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength);
15157 DEBUGLOG(7,
"distance from match beginning to end dictionary = %zi", mEnd -
match);
15158 DEBUGLOG(7,
"distance from current pos to end buffer = %zi", iEnd -
ip);
15159 DEBUGLOG(7,
"next byte : ip==%02X, istart==%02X",
ip[matchLength], *iStart);
15160 DEBUGLOG(7,
"final match length = %zu", matchLength +
ZSTD_count(
ip+matchLength, iStart, iEnd));
15161 return matchLength +
ZSTD_count(
ip+matchLength, iStart, iEnd);
15242 if (exponent & 1) power *=
base;
15249 #define ZSTD_ROLL_HASH_CHAR_OFFSET 10
15256 BYTE const* istart = (
BYTE const*)buf;
15258 for (pos = 0; pos <
size; ++pos) {
15296 #if (ZSTD_WINDOWLOG_MAX_64 > 31)
15297 # error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX"
15300 #define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
15302 #define ZSTD_CHUNKSIZE_MAX \
15304 - ZSTD_CURRENT_MAX)
15353 #ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY
15354 # ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
15355 # define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1
15357 # define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0
15372 U32 const cycleSize = 1u << cycleLog;
15374 U32 const minIndexToOverflowCorrect = cycleSize
15375 +
MAX(maxDist, cycleSize)
15384 U32 const adjustedIndex =
MAX(minIndexToOverflowCorrect * adjustment,
15385 minIndexToOverflowCorrect);
15386 U32 const indexLargeEnough = curr > adjustedIndex;
15391 U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd;
15393 return indexLargeEnough && dictionaryInvalidated;
15406 void const* srcEnd)
15427 U32 maxDist,
void const* src)
15448 U32 const cycleSize = 1u << cycleLog;
15449 U32 const cycleMask = cycleSize - 1;
15451 U32 const currentCycle = curr & cycleMask;
15456 U32 const newCurrent = currentCycle
15457 + currentCycleCorrection
15458 +
MAX(maxDist, cycleSize);
15459 U32 const correction = curr - newCurrent;
15464 assert((maxDist & (maxDist - 1)) == 0);
15465 assert((curr & cycleMask) == (newCurrent & cycleMask));
15466 assert(curr > newCurrent);
15469 assert(correction > 1<<28);
15472 window->
base += correction;
15486 assert(newCurrent >= maxDist);
15494 DEBUGLOG(4,
"Correction of 0x%x bytes to lowLimit=0x%x", correction,
15524 const void* blockEnd,
15526 U32* loadedDictEndPtr,
15529 U32 const blockEndIdx = (
U32)((
BYTE const*)blockEnd - window->
base);
15530 U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
15531 DEBUGLOG(5,
"ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
15532 (
unsigned)blockEndIdx, (
unsigned)maxDist, (
unsigned)loadedDictEnd);
15547 if (blockEndIdx > maxDist + loadedDictEnd) {
15548 U32 const newLowLimit = blockEndIdx - maxDist;
15551 DEBUGLOG(5,
"Update dictLimit to match lowLimit, from %u to %u",
15556 if (loadedDictEndPtr) *loadedDictEndPtr = 0;
15557 if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
15569 const void* blockEnd,
15571 U32* loadedDictEndPtr,
15574 assert(loadedDictEndPtr != NULL);
15575 assert(dictMatchStatePtr != NULL);
15576 {
U32 const blockEndIdx = (
U32)((
BYTE const*)blockEnd - window->
base);
15577 U32 const loadedDictEnd = *loadedDictEndPtr;
15578 DEBUGLOG(5,
"ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
15579 (
unsigned)blockEndIdx, (
unsigned)maxDist, (
unsigned)loadedDictEnd);
15580 assert(blockEndIdx >= loadedDictEnd);
15582 if (blockEndIdx > loadedDictEnd + maxDist || loadedDictEnd != window->
dictLimit) {
15592 DEBUGLOG(6,
"invalidating dictionary for current block (distance > windowSize)");
15593 *loadedDictEndPtr = 0;
15594 *dictMatchStatePtr = NULL;
15596 if (*loadedDictEndPtr != 0) {
15597 DEBUGLOG(6,
"dictionary considered valid for current block");
15620 void const* src,
size_t srcSize,
15621 int forceNonContiguous)
15624 U32 contiguous = 1;
15625 DEBUGLOG(5,
"ZSTD_window_update");
15631 if (src != window->
nextSrc || forceNonContiguous) {
15634 DEBUGLOG(5,
"Non contiguous blocks, new segment starts at %u", window->
dictLimit);
15636 assert(distanceFromBase == (
size_t)(
U32)distanceFromBase);
15639 window->
base =
ip - distanceFromBase;
15651 DEBUGLOG(5,
"Overlapping extDict and input : new lowLimit = %u", window->
lowLimit);
15661 U32 const maxDistance = 1U << windowLog;
15663 U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
15669 U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
15670 return matchLowest;
15678 U32 const maxDistance = 1U << windowLog;
15680 U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
15685 U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
15686 return matchLowest;
15692 #if (DEBUGLEVEL>=2)
15696 U32 const fp_accuracy = 8;
15697 U32 const fp_multiplier = (1 << fp_accuracy);
15698 U32 const newStat = rawStat + 1;
15700 U32 const BWeight = hb * fp_multiplier;
15701 U32 const FWeight = (newStat << fp_accuracy) >> hb;
15702 U32 const weight = BWeight + FWeight;
15703 assert(hb + fp_accuracy < 31);
15704 return (
double)weight / fp_multiplier;
15712 for (u=0, sum=0; u<=max; u++) sum +=
table[u];
15713 DEBUGLOG(2,
"total nb elts: %u", sum);
15714 for (u=0; u<=max; u++) {
15716 u,
table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(
table[u]) );
15739 #define ZSTD_SHORT_CACHE_TAG_BITS 8
15740 #define ZSTD_SHORT_CACHE_TAG_MASK ((1u << ZSTD_SHORT_CACHE_TAG_BITS) - 1)
15756 return tag1 == tag2;
15759 #if defined (__cplusplus)
15774 const void*
const dict,
size_t dictSize);
15789 const ZSTD_CCtx_params* CCtxParams,
U64 srcSizeHint,
size_t dictSize,
ZSTD_cParamMode_e mode);
15797 const void* dict,
size_t dictSize,
15799 const ZSTD_CCtx_params* params,
unsigned long long pledgedSrcSize);
15810 const void* dict,
size_t dictSize,
15811 ZSTD_dictContentType_e dictContentType,
15814 const ZSTD_CCtx_params* params,
15815 unsigned long long pledgedSrcSize);
15820 void*
dst,
size_t dstCapacity,
15821 const void* src,
size_t srcSize,
15822 const void* dict,
size_t dictSize,
15823 const ZSTD_CCtx_params* params);
15862 const ZSTD_Sequence*
const inSeqs,
size_t inSeqsSize,
15863 const void* src,
size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
15879 const ZSTD_Sequence*
const inSeqs,
size_t inSeqsSize,
15880 const void* src,
size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
15892 void*
dst,
size_t dstCapacity,
15893 const void* src,
size_t srcSize);
15896 void*
dst,
size_t dstCapacity,
15897 const void* src,
size_t srcSize);
15920 const void* src,
size_t srcSize,
15921 void* entropyWorkspace,
size_t entropyWorkspaceSize,
15925 int suspectUncompressible,
15935 #if DEBUGLEVEL >= 2
15937 static size_t showHexa(
const void* src,
size_t srcSize)
15959 DEBUGLOG(5,
"ZSTD_noCompressLiterals: srcSize=%zu, dstCapacity=%zu",
srcSize, dstCapacity);
15987 {
const BYTE b = ((
const BYTE*)src)[0];
15990 if (((
const BYTE*)src)[p] != b)
return 0;
16001 assert(dstCapacity >= 4); (
void)dstCapacity;
16019 ostart[flSize] = *(
const BYTE*)src;
16020 DEBUGLOG(5,
"RLE : Repeated Literal (%02X: %u times) -> %u bytes encoded", ((
const BYTE*)src)[0], (
U32)
srcSize, (
U32)flSize + 1);
16032 assert((
int)strategy >= 0);
16033 assert((
int)strategy <= 9);
16037 {
int const shift =
MIN(9-(
int)strategy, 3);
16039 DEBUGLOG(7,
"minLiteralsToCompress = %zu", mintc);
16045 void*
dst,
size_t dstCapacity,
16046 const void* src,
size_t srcSize,
16047 void* entropyWorkspace,
size_t entropyWorkspaceSize,
16051 int disableLiteralCompression,
16052 int suspectUncompressible,
16061 DEBUGLOG(5,
"ZSTD_compressLiterals (disableLiteralCompression=%i, srcSize=%u, dstCapacity=%zu)",
16062 disableLiteralCompression, (
U32)
srcSize, dstCapacity);
16064 DEBUGLOG(6,
"Completed literals listing (%zu bytes)", showHexa(src,
srcSize));
16069 if (disableLiteralCompression)
16076 RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall,
"not enough space for compression");
16078 int const flags = 0
16084 typedef size_t (*huf_compress_f)(
void*,
size_t,
const void*,
size_t, unsigned, unsigned,
void*,
size_t,
HUF_CElt*,
HUF_repeat*,
int);
16085 huf_compress_f huf_compress;
16088 cLitSize = huf_compress(ostart+lhSize, dstCapacity-lhSize,
16091 entropyWorkspace, entropyWorkspaceSize,
16094 DEBUGLOG(5,
"%zu literals compressed into %zu bytes (before header)",
srcSize, cLitSize);
16097 DEBUGLOG(5,
"reusing statistics from previous huffman block");
16128 {
U32 const lhc = hType + ((
U32)(!singleStream) << 2) + ((
U32)
srcSize<<4) + ((
U32)cLitSize<<14);
16134 {
U32 const lhc = hType + (2 << 2) + ((
U32)
srcSize<<4) + ((
U32)cLitSize<<18);
16140 {
U32 const lhc = hType + (3 << 2) + ((
U32)
srcSize<<4) + ((
U32)cLitSize<<22);
16142 ostart[4] = (
BYTE)(cLitSize >> 10);
16149 return lhSize+cLitSize;
16177 #ifndef ZSTD_COMPRESS_SEQUENCES_H
16178 #define ZSTD_COMPRESS_SEQUENCES_H
16191 size_t const mostFrequent,
size_t nbSeq,
unsigned const FSELog,
16193 short const* defaultNorm,
U32 defaultNormLog,
16201 const BYTE* codeTable,
size_t nbSeq,
16202 const S16* defaultNorm,
U32 defaultNormLog,
U32 defaultMax,
16203 const FSE_CTable* prevCTable,
size_t prevCTableSize,
16204 void* entropyWorkspace,
size_t entropyWorkspaceSize);
16207 void*
dst,
size_t dstCapacity,
16211 seqDef const* sequences,
size_t nbSeq,
int longOffsets,
int bmi2);
16215 unsigned const*
count,
16216 unsigned const max);
16219 unsigned const*
count,
unsigned const max);
16229 0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162,
16230 1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889,
16231 874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734,
16232 724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626,
16233 618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542,
16234 535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473,
16235 468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415,
16236 411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366,
16237 362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322,
16238 318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282,
16239 279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247,
16240 244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215,
16241 212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185,
16242 182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157,
16243 155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132,
16244 130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108,
16245 106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85,
16246 83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64,
16247 62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44,
16248 42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25,
16249 23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7,
16254 void const*
ptr = ctable;
16257 return maxSymbolValue;
16270 return nbSeq >= 2048;
16278 size_t const nbSeq,
unsigned const FSELog)
16297 for (
s = 0;
s <= max; ++
s) {
16298 unsigned norm = (unsigned)((256 *
count[
s]) / total);
16299 if (
count[
s] != 0 && norm == 0)
16313 unsigned const*
count,
16314 unsigned const max)
16316 unsigned const kAccuracyLog = 8;
16322 DEBUGLOG(5,
"Repeat FSE_CTable has maxSymbolValue %u < %u",
16324 return ERROR(GENERIC);
16326 for (s = 0;
s <= max; ++
s) {
16327 unsigned const tableLog = cstate.
stateLog;
16328 unsigned const badCost = (tableLog + 1) << kAccuracyLog;
16332 if (bitCost >= badCost) {
16333 DEBUGLOG(5,
"Repeat FSE_CTable has Prob[%u] == 0", s);
16334 return ERROR(GENERIC);
16338 return cost >> kAccuracyLog;
16347 unsigned const*
count,
unsigned const max)
16349 unsigned const shift = 8 - accuracyLog;
16352 assert(accuracyLog <= 8);
16353 for (
s = 0;
s <= max; ++
s) {
16354 unsigned const normAcc = (norm[
s] != -1) ? (
unsigned)norm[
s] : 1;
16355 unsigned const norm256 = normAcc << shift;
16366 size_t const mostFrequent,
size_t nbSeq,
unsigned const FSELog,
16368 short const* defaultNorm,
U32 defaultNormLog,
16373 if (mostFrequent == nbSeq) {
16375 if (isDefaultAllowed && nbSeq <= 2) {
16380 DEBUGLOG(5,
"Selected set_basic");
16387 if (isDefaultAllowed) {
16388 size_t const staticFse_nbSeq_max = 1000;
16389 size_t const mult = 10 - strategy;
16390 size_t const baseLog = 3;
16391 size_t const dynamicFse_nbSeq_min = (((
size_t)1 << defaultNormLog) * mult) >> baseLog;
16392 assert(defaultNormLog >= 5 && defaultNormLog <= 6);
16393 assert(mult <= 9 && mult >= 7);
16395 && (nbSeq < staticFse_nbSeq_max) ) {
16396 DEBUGLOG(5,
"Selected set_repeat");
16399 if ( (nbSeq < dynamicFse_nbSeq_min)
16400 || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) {
16401 DEBUGLOG(5,
"Selected set_basic");
16418 if (isDefaultAllowed) {
16424 DEBUGLOG(5,
"Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u",
16425 (
unsigned)basicCost, (
unsigned)repeatCost, (
unsigned)compressedCost);
16426 if (basicCost <= repeatCost && basicCost <= compressedCost) {
16427 DEBUGLOG(5,
"Selected set_basic");
16428 assert(isDefaultAllowed);
16432 if (repeatCost <= compressedCost) {
16433 DEBUGLOG(5,
"Selected set_repeat");
16437 assert(compressedCost < basicCost && compressedCost < repeatCost);
16453 const BYTE* codeTable,
size_t nbSeq,
16454 const S16* defaultNorm,
U32 defaultNormLog,
U32 defaultMax,
16455 const FSE_CTable* prevCTable,
size_t prevCTableSize,
16456 void* entropyWorkspace,
size_t entropyWorkspaceSize)
16459 const BYTE*
const oend =
op + dstCapacity;
16460 DEBUGLOG(6,
"ZSTD_buildCTable (dstCapacity=%u)", (
unsigned)dstCapacity);
16465 RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall,
"not enough space");
16466 *
op = codeTable[0];
16469 ZSTD_memcpy(nextCTable, prevCTable, prevCTableSize);
16476 size_t nbSeq_1 = nbSeq;
16478 if (
count[codeTable[nbSeq-1]] > 1) {
16479 count[codeTable[nbSeq-1]]--;
16484 (
void)entropyWorkspaceSize;
16499 void*
dst,
size_t dstCapacity,
16503 seqDef const* sequences,
size_t nbSeq,
int longOffsets)
16512 dstSize_tooSmall,
"not enough space remaining");
16513 DEBUGLOG(6,
"available space for bitstream : %i (dstCapacity=%u)",
16515 (
unsigned)dstCapacity);
16518 FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
16519 FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]);
16520 FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
16521 BIT_addBits(&blockStream, sequences[nbSeq-1].litLength,
LL_bits[llCodeTable[nbSeq-1]]);
16523 BIT_addBits(&blockStream, sequences[nbSeq-1].mlBase,
ML_bits[mlCodeTable[nbSeq-1]]);
16526 U32 const ofBits = ofCodeTable[nbSeq-1];
16529 BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, extraBits);
16532 BIT_addBits(&blockStream, sequences[nbSeq-1].offBase >> extraBits,
16533 ofBits - extraBits);
16535 BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, ofCodeTable[nbSeq-1]);
16540 for (n=nbSeq-2 ; n<nbSeq ; n--) {
16541 BYTE const llCode = llCodeTable[n];
16542 BYTE const ofCode = ofCodeTable[n];
16543 BYTE const mlCode = mlCodeTable[n];
16545 U32 const ofBits = ofCode;
16547 DEBUGLOG(6,
"encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
16548 (
unsigned)sequences[n].litLength,
16549 (
unsigned)sequences[n].mlBase +
MINMATCH,
16550 (
unsigned)sequences[n].offBase);
16559 BIT_addBits(&blockStream, sequences[n].litLength, llBits);
16561 BIT_addBits(&blockStream, sequences[n].mlBase, mlBits);
16566 BIT_addBits(&blockStream, sequences[n].offBase, extraBits);
16569 BIT_addBits(&blockStream, sequences[n].offBase >> extraBits,
16570 ofBits - extraBits);
16572 BIT_addBits(&blockStream, sequences[n].offBase, ofBits);
16575 DEBUGLOG(7,
"remaining space : %i", (
int)(blockStream.
endPtr - blockStream.
ptr));
16578 DEBUGLOG(6,
"ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.
stateLog);
16580 DEBUGLOG(6,
"ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.
stateLog);
16582 DEBUGLOG(6,
"ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.
stateLog);
16593 void*
dst,
size_t dstCapacity,
16597 seqDef const* sequences,
size_t nbSeq,
int longOffsets)
16600 CTable_MatchLength, mlCodeTable,
16601 CTable_OffsetBits, ofCodeTable,
16602 CTable_LitLength, llCodeTable,
16603 sequences, nbSeq, longOffsets);
16610 ZSTD_encodeSequences_bmi2(
16611 void*
dst,
size_t dstCapacity,
16615 seqDef const* sequences,
size_t nbSeq,
int longOffsets)
16618 CTable_MatchLength, mlCodeTable,
16619 CTable_OffsetBits, ofCodeTable,
16620 CTable_LitLength, llCodeTable,
16621 sequences, nbSeq, longOffsets);
16627 void*
dst,
size_t dstCapacity,
16631 seqDef const* sequences,
size_t nbSeq,
int longOffsets,
int bmi2)
16633 DEBUGLOG(5,
"ZSTD_encodeSequences: dstCapacity = %u", (
unsigned)dstCapacity);
16636 return ZSTD_encodeSequences_bmi2(
dst, dstCapacity,
16637 CTable_MatchLength, mlCodeTable,
16638 CTable_OffsetBits, ofCodeTable,
16639 CTable_LitLength, llCodeTable,
16640 sequences, nbSeq, longOffsets);
16645 CTable_MatchLength, mlCodeTable,
16646 CTable_OffsetBits, ofCodeTable,
16647 CTable_LitLength, llCodeTable,
16648 sequences, nbSeq, longOffsets);
16676 #ifndef ZSTD_COMPRESS_ADVANCED_H
16677 #define ZSTD_COMPRESS_ADVANCED_H
16693 void*
dst,
size_t dstCapacity,
16694 void const* src,
size_t srcSize,
16695 unsigned lastBlock);
16729 void*
dst,
size_t dstSize,
16730 const int bmi2,
int writeEntropy,
int* entropyWritten)
16732 size_t const header = writeEntropy ? 200 : 0;
16733 size_t const lhSize = 3 + (litSize >= (1
KB -
header)) + (litSize >= (16
KB -
header));
16735 BYTE*
const oend = ostart + dstSize;
16736 BYTE*
op = ostart + lhSize;
16737 U32 const singleStream = lhSize == 3;
16739 size_t cLitSize = 0;
16741 DEBUGLOG(5,
"ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy);
16743 *entropyWritten = 0;
16745 DEBUGLOG(5,
"ZSTD_compressSubBlock_literal using raw literal");
16748 DEBUGLOG(5,
"ZSTD_compressSubBlock_literal using rle literal");
16772 if (!writeEntropy && cLitSize >= litSize) {
16773 DEBUGLOG(5,
"ZSTD_compressSubBlock_literal using raw literal because uncompressible");
16777 if (lhSize < (
size_t)(3 + (cLitSize >= 1
KB) + (cLitSize >= 16
KB))) {
16778 assert(cLitSize > litSize);
16779 DEBUGLOG(5,
"Literals expanded beyond allowed header size");
16782 DEBUGLOG(5,
"ZSTD_compressSubBlock_literal (cSize=%zu)", cSize);
16789 {
U32 const lhc = hType + ((!singleStream) << 2) + ((
U32)litSize<<4) + ((
U32)cLitSize<<14);
16794 {
U32 const lhc = hType + (2 << 2) + ((
U32)litSize<<4) + ((
U32)cLitSize<<18);
16799 {
U32 const lhc = hType + (3 << 2) + ((
U32)litSize<<4) + ((
U32)cLitSize<<22);
16801 ostart[4] = (
BYTE)(cLitSize >> 10);
16807 *entropyWritten = 1;
16808 DEBUGLOG(5,
"Compressed literals: %u -> %u", (
U32)litSize, (
U32)(
op-ostart));
16814 const seqDef* sequences,
size_t nbSeq,
16815 size_t litSize,
int lastSequence)
16817 const seqDef*
const sstart = sequences;
16818 const seqDef*
const send = sequences + nbSeq;
16819 const seqDef* sp = sstart;
16820 size_t matchLengthSum = 0;
16821 size_t litLengthSum = 0;
16822 (
void)(litLengthSum);
16823 while (send-sp > 0) {
16829 assert(litLengthSum <= litSize);
16830 if (!lastSequence) {
16831 assert(litLengthSum == litSize);
16833 return matchLengthSum + litSize;
16849 const seqDef* sequences,
size_t nbSeq,
16850 const BYTE* llCode,
const BYTE* mlCode,
const BYTE* ofCode,
16851 const ZSTD_CCtx_params* cctxParams,
16852 void*
dst,
size_t dstCapacity,
16853 const int bmi2,
int writeEntropy,
int* entropyWritten)
16857 BYTE*
const oend = ostart + dstCapacity;
16861 DEBUGLOG(5,
"ZSTD_compressSubBlock_sequences (nbSeq=%zu, writeEntropy=%d, longOffsets=%d)", nbSeq, writeEntropy, longOffsets);
16863 *entropyWritten = 0;
16866 dstSize_tooSmall,
"");
16874 return op - ostart;
16880 DEBUGLOG(5,
"ZSTD_compressSubBlock_sequences (seqHeadSize=%u)", (
unsigned)(
op-ostart));
16882 if (writeEntropy) {
16883 const U32 LLtype = fseMetadata->
llType;
16884 const U32 Offtype = fseMetadata->
ofType;
16885 const U32 MLtype = fseMetadata->
mlType;
16887 *seqHead = (
BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
16892 *seqHead = (
BYTE)((repeat<<6) + (repeat<<4) + (repeat<<2));
16901 longOffsets, bmi2);
16903 op += bitstreamSize;
16912 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
16916 DEBUGLOG(5,
"Avoiding bug in zstd decoder in versions <= 1.3.4 by "
16917 "emitting an uncompressed block.");
16921 DEBUGLOG(5,
"ZSTD_compressSubBlock_sequences (bitstreamSize=%zu)", bitstreamSize);
16931 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
16932 if (
op-seqHead < 4) {
16933 DEBUGLOG(5,
"Avoiding bug in zstd decoder in versions <= 1.4.0 by emitting "
16934 "an uncompressed block when sequences are < 4 bytes");
16939 *entropyWritten = 1;
16940 return op - ostart;
16949 const seqDef* sequences,
size_t nbSeq,
16951 const BYTE* llCode,
const BYTE* mlCode,
const BYTE* ofCode,
16952 const ZSTD_CCtx_params* cctxParams,
16953 void*
dst,
size_t dstCapacity,
16955 int writeLitEntropy,
int writeSeqEntropy,
16956 int* litEntropyWritten,
int* seqEntropyWritten,
16960 BYTE*
const oend = ostart + dstCapacity;
16962 DEBUGLOG(5,
"ZSTD_compressSubBlock (litSize=%zu, nbSeq=%zu, writeLitEntropy=%d, writeSeqEntropy=%d, lastBlock=%d)",
16963 litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock);
16966 op, oend-
op, bmi2, writeLitEntropy, litEntropyWritten);
16968 if (cLitSize == 0)
return 0;
16974 llCode, mlCode, ofCode,
16977 bmi2, writeSeqEntropy, seqEntropyWritten);
16979 if (cSeqSize == 0)
return 0;
16993 void* workspace,
size_t wkspSize,
16996 unsigned*
const countWksp = (
unsigned*)workspace;
16997 unsigned maxSymbolValue = 255;
16998 size_t literalSectionHeaderSize = 3;
17006 if (writeEntropy) cLitSizeEstimate += hufMetadata->
hufDesSize;
17007 return cLitSizeEstimate + literalSectionHeaderSize;
17014 const BYTE* codeTable,
unsigned maxCode,
17016 const U8* additionalBits,
17017 short const* defaultNorm,
U32 defaultNormLog,
U32 defaultMax,
17018 void* workspace,
size_t wkspSize)
17020 unsigned*
const countWksp = (
unsigned*)workspace;
17021 const BYTE* ctp = codeTable;
17022 const BYTE*
const ctStart = ctp;
17023 const BYTE*
const ctEnd = ctStart + nbSeq;
17024 size_t cSymbolTypeSizeEstimateInBits = 0;
17025 unsigned max = maxCode;
17030 assert(max <= defaultMax);
17031 cSymbolTypeSizeEstimateInBits = max <= defaultMax
17035 cSymbolTypeSizeEstimateInBits = 0;
17037 cSymbolTypeSizeEstimateInBits =
ZSTD_fseBitCost(fseCTable, countWksp, max);
17039 if (
ZSTD_isError(cSymbolTypeSizeEstimateInBits))
return nbSeq * 10;
17040 while (ctp < ctEnd) {
17041 if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp];
17042 else cSymbolTypeSizeEstimateInBits += *ctp;
17045 return cSymbolTypeSizeEstimateInBits / 8;
17049 const BYTE* llCodeTable,
17050 const BYTE* mlCodeTable,
17054 void* workspace,
size_t wkspSize,
17057 size_t const sequencesSectionHeaderSize = 3;
17058 size_t cSeqSizeEstimate = 0;
17059 if (nbSeq == 0)
return sequencesSectionHeaderSize;
17063 workspace, wkspSize);
17067 workspace, wkspSize);
17071 workspace, wkspSize);
17072 if (writeEntropy) cSeqSizeEstimate += fseMetadata->
fseTablesSize;
17073 return cSeqSizeEstimate + sequencesSectionHeaderSize;
17077 const BYTE* ofCodeTable,
17078 const BYTE* llCodeTable,
17079 const BYTE* mlCodeTable,
17083 void* workspace,
size_t wkspSize,
17084 int writeLitEntropy,
int writeSeqEntropy) {
17085 size_t cSizeEstimate = 0;
17088 workspace, wkspSize, writeLitEntropy);
17091 workspace, wkspSize, writeSeqEntropy);
17117 const ZSTD_CCtx_params* cctxParams,
17118 void*
dst,
size_t dstCapacity,
17119 const void* src,
size_t srcSize,
17120 const int bmi2,
U32 lastBlock,
17121 void* workspace,
size_t wkspSize)
17125 const seqDef* sp = sstart;
17127 const BYTE*
const lend = seqStorePtr->
lit;
17128 const BYTE* lp = lstart;
17132 BYTE*
const oend = ostart + dstCapacity;
17134 const BYTE* llCodePtr = seqStorePtr->
llCode;
17135 const BYTE* mlCodePtr = seqStorePtr->
mlCode;
17136 const BYTE* ofCodePtr = seqStorePtr->
ofCode;
17137 size_t targetCBlockSize = cctxParams->targetCBlockSize;
17138 size_t litSize, seqCount;
17140 int writeSeqEntropy = 1;
17141 int lastSequence = 0;
17143 DEBUGLOG(5,
"ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)",
17144 (
unsigned)(lend-lp), (
unsigned)(send-sstart));
17149 size_t cBlockSizeEstimate = 0;
17150 if (sstart == send) {
17154 lastSequence =
sequence == send - 1;
17158 if (lastSequence) {
17160 assert(litSize <= (
size_t)(lend - lp));
17161 litSize = (
size_t)(lend - lp);
17169 &nextCBlock->
entropy, entropyMetadata,
17170 workspace, wkspSize, writeLitEntropy, writeSeqEntropy);
17171 if (cBlockSizeEstimate > targetCBlockSize || lastSequence) {
17172 int litEntropyWritten = 0;
17173 int seqEntropyWritten = 0;
17178 llCodePtr, mlCodePtr, ofCodePtr,
17181 bmi2, writeLitEntropy, writeSeqEntropy,
17182 &litEntropyWritten, &seqEntropyWritten,
17183 lastBlock && lastSequence);
17185 if (cSize > 0 && cSize < decompressedSize) {
17186 DEBUGLOG(5,
"Committed the sub-block");
17187 assert(
ip + decompressedSize <= iend);
17188 ip += decompressedSize;
17192 llCodePtr += seqCount;
17193 mlCodePtr += seqCount;
17194 ofCodePtr += seqCount;
17198 if (litEntropyWritten) {
17199 writeLitEntropy = 0;
17201 if (seqEntropyWritten) {
17202 writeSeqEntropy = 0;
17206 }
while (!lastSequence);
17207 if (writeLitEntropy) {
17208 DEBUGLOG(5,
"ZSTD_compressSubBlock_multi has literal entropy tables unwritten");
17215 DEBUGLOG(5,
"ZSTD_compressSubBlock_multi has sequence entropy tables unwritten");
17220 DEBUGLOG(5,
"ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (
size_t)(iend -
ip));
17229 for (seq = sstart; seq < sp; ++seq) {
17235 DEBUGLOG(5,
"ZSTD_compressSubBlock_multi compressed");
17240 void*
dst,
size_t dstCapacity,
17241 void const* src,
size_t srcSize,
17242 unsigned lastBlock) {
17259 zc->
bmi2, lastBlock,
17281 #define FSE_STATIC_LINKING_ONLY
17298 #ifndef ZSTD_FAST_H
17299 #define ZSTD_FAST_H
17301 #if defined (__cplusplus)
17313 void const* src,
size_t srcSize);
17316 void const* src,
size_t srcSize);
17319 void const* src,
size_t srcSize);
17321 #if defined (__cplusplus)
17338 #ifndef ZSTD_DOUBLE_FAST_H
17339 #define ZSTD_DOUBLE_FAST_H
17341 #if defined (__cplusplus)
17353 void const* src,
size_t srcSize);
17356 void const* src,
size_t srcSize);
17359 void const* src,
size_t srcSize);
17362 #if defined (__cplusplus)
17379 #ifndef ZSTD_LAZY_H
17380 #define ZSTD_LAZY_H
17382 #if defined (__cplusplus)
17394 #define ZSTD_LAZY_DDSS_BUCKET_LOG 2
17396 #define ZSTD_ROW_HASH_TAG_BITS 8
17407 void const* src,
size_t srcSize);
17410 void const* src,
size_t srcSize);
17413 void const* src,
size_t srcSize);
17416 void const* src,
size_t srcSize);
17419 void const* src,
size_t srcSize);
17422 void const* src,
size_t srcSize);
17425 void const* src,
size_t srcSize);
17429 void const* src,
size_t srcSize);
17432 void const* src,
size_t srcSize);
17435 void const* src,
size_t srcSize);
17438 void const* src,
size_t srcSize);
17441 void const* src,
size_t srcSize);
17444 void const* src,
size_t srcSize);
17447 void const* src,
size_t srcSize);
17451 void const* src,
size_t srcSize);
17454 void const* src,
size_t srcSize);
17457 void const* src,
size_t srcSize);
17460 void const* src,
size_t srcSize);
17463 void const* src,
size_t srcSize);
17466 void const* src,
size_t srcSize);
17470 void const* src,
size_t srcSize);
17473 void const* src,
size_t srcSize);
17476 void const* src,
size_t srcSize);
17479 void const* src,
size_t srcSize);
17482 void const* src,
size_t srcSize);
17485 void const* src,
size_t srcSize);
17488 void const* src,
size_t srcSize);
17491 #if defined (__cplusplus)
17511 #if defined (__cplusplus)
17522 void const* src,
size_t srcSize);
17525 void const* src,
size_t srcSize);
17528 void const* src,
size_t srcSize);
17533 void const* src,
size_t srcSize);
17536 void const* src,
size_t srcSize);
17540 void const* src,
size_t srcSize);
17543 void const* src,
size_t srcSize);
17549 #if defined (__cplusplus)
17569 #if defined (__cplusplus)
17580 #define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_LIMIT_DEFAULT
17624 ZSTD_paramSwitch_e useRowMatchFinder,
17625 void const* src,
size_t srcSize);
17635 U32 const minMatch);
17666 ZSTD_compressionParameters
const* cParams);
17668 #if defined (__cplusplus)
17686 #ifndef ZSTD_COMPRESS_HEAPMODE
17687 # define ZSTD_COMPRESS_HEAPMODE 0
17699 #ifndef ZSTD_HASHLOG3_MAX
17700 # define ZSTD_HASHLOG3_MAX 17
17716 if (r==0)
return ERROR(srcSize_wrong);
17762 if ((!customMem.customAlloc) ^ (!customMem.customFree))
return NULL;
17764 if (!cctx)
return NULL;
17774 if (workspaceSize <=
sizeof(
ZSTD_CCtx))
return NULL;
17775 if ((
size_t)workspace & 7)
return NULL;
17779 if (cctx == NULL)
return NULL;
17803 cctx->
cdict = NULL;
17810 return bufferSize + cdictSize;
17818 #ifdef ZSTD_MULTITHREAD
17826 if (cctx==NULL)
return 0;
17828 "not compatible with static CCtx");
17839 #ifdef ZSTD_MULTITHREAD
17850 if (cctx==NULL)
return 0;
17875 assert(mode != ZSTD_ps_auto);
17881 const ZSTD_compressionParameters*
const cParams) {
17882 #if defined(ZSTD_ARCH_X86_SSE2) || defined(ZSTD_ARCH_ARM_NEON)
17883 int const kHasSIMD128 = 1;
17885 int const kHasSIMD128 = 0;
17887 if (mode != ZSTD_ps_auto)
return mode;
17888 mode = ZSTD_ps_disable;
17891 if (cParams->windowLog > 14)
mode = ZSTD_ps_enable;
17893 if (cParams->windowLog > 17) mode = ZSTD_ps_enable;
17900 const ZSTD_compressionParameters*
const cParams) {
17901 if (mode != ZSTD_ps_auto)
return mode;
17902 return (cParams->strategy >=
ZSTD_btopt && cParams->windowLog >= 17) ? ZSTD_ps_enable : ZSTD_ps_disable;
17907 const ZSTD_paramSwitch_e useRowMatchFinder,
17908 const U32 forDDSDict) {
17909 assert(useRowMatchFinder != ZSTD_ps_auto);
17921 const ZSTD_compressionParameters*
const cParams) {
17922 if (mode != ZSTD_ps_auto)
return mode;
17923 return (cParams->strategy >=
ZSTD_btopt && cParams->windowLog >= 27) ? ZSTD_ps_enable : ZSTD_ps_disable;
17932 if (maxBlockSize == 0) {
17935 return maxBlockSize;
17940 if (value != ZSTD_ps_auto)
return value;
17942 return ZSTD_ps_disable;
17944 return ZSTD_ps_enable;
17955 ZSTD_compressionParameters cParams)
17957 ZSTD_CCtx_params cctxParams;
17960 cctxParams.cParams = cParams;
17964 if (cctxParams.ldmParams.enableLdm == ZSTD_ps_enable) {
17966 assert(cctxParams.ldmParams.hashLog >= cctxParams.ldmParams.bucketSizeLog);
17967 assert(cctxParams.ldmParams.hashRateLog < 32);
17974 cctxParams.compressionLevel);
17980 ZSTD_customMem customMem)
17982 ZSTD_CCtx_params* params;
17983 if ((!customMem.customAlloc) ^ (!customMem.customFree))
return NULL;
17985 sizeof(ZSTD_CCtx_params), customMem);
17986 if (!params) {
return NULL; }
17988 params->customMem = customMem;
17999 if (params == NULL) {
return 0; }
18013 cctxParams->fParams.contentSizeFlag = 1;
18017 #define ZSTD_NO_CLEVEL 0
18025 const ZSTD_parameters* params,
18030 cctxParams->cParams = params->cParams;
18031 cctxParams->fParams = params->fParams;
18038 cctxParams->ldmParams.enableLdm =
ZSTD_resolveEnableLdm(cctxParams->ldmParams.enableLdm, ¶ms->cParams);
18042 DEBUGLOG(4,
"ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d, useBlockSplitter=%d ldm=%d",
18043 cctxParams->useRowMatchFinder, cctxParams->useBlockSplitter, cctxParams->ldmParams.enableLdm);
18059 ZSTD_CCtx_params* cctxParams,
const ZSTD_parameters* params)
18062 cctxParams->cParams = params->cParams;
18063 cctxParams->fParams = params->fParams;
18133 #ifdef ZSTD_MULTITHREAD
18142 #ifdef ZSTD_MULTITHREAD
18150 #ifdef ZSTD_MULTITHREAD
18159 case ZSTD_c_enableDedicatedDictSearch:
18180 bounds.
lowerBound = ZSTD_LDM_BUCKETSIZELOG_MIN;
18181 bounds.
upperBound = ZSTD_LDM_BUCKETSIZELOG_MAX;
18185 bounds.
lowerBound = ZSTD_LDM_HASHRATELOG_MIN;
18186 bounds.
upperBound = ZSTD_LDM_HASHRATELOG_MAX;
18190 case ZSTD_c_rsyncable:
18195 case ZSTD_c_forceMaxWindow :
18200 case ZSTD_c_format:
18206 case ZSTD_c_forceAttachDict:
18212 case ZSTD_c_literalCompressionMode:
18213 ZSTD_STATIC_ASSERT(ZSTD_ps_auto < ZSTD_ps_enable && ZSTD_ps_enable < ZSTD_ps_disable);
18218 case ZSTD_c_targetCBlockSize:
18219 bounds.
lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN;
18220 bounds.
upperBound = ZSTD_TARGETCBLOCKSIZE_MAX;
18223 case ZSTD_c_srcSizeHint:
18228 case ZSTD_c_stableInBuffer:
18229 case ZSTD_c_stableOutBuffer:
18234 case ZSTD_c_blockDelimiters:
18239 case ZSTD_c_validateSequences:
18244 case ZSTD_c_useBlockSplitter:
18249 case ZSTD_c_useRowMatchFinder:
18254 case ZSTD_c_deterministicRefPrefix:
18259 case ZSTD_c_prefetchCDictTables:
18264 case ZSTD_c_enableSeqProducerFallback:
18269 case ZSTD_c_maxBlockSize:
18274 case ZSTD_c_searchForExternalRepcodes:
18280 bounds.
error =
ERROR(parameter_unsupported);
18297 #define BOUNDCHECK(cParam, val) { \
18298 RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \
18299 parameter_outOfBound, "Param out of bounds"); \
18316 case ZSTD_c_format:
18321 case ZSTD_c_forceMaxWindow :
18325 case ZSTD_c_rsyncable:
18326 case ZSTD_c_enableDedicatedDictSearch:
18332 case ZSTD_c_forceAttachDict:
18333 case ZSTD_c_literalCompressionMode:
18334 case ZSTD_c_targetCBlockSize:
18335 case ZSTD_c_srcSizeHint:
18336 case ZSTD_c_stableInBuffer:
18337 case ZSTD_c_stableOutBuffer:
18338 case ZSTD_c_blockDelimiters:
18339 case ZSTD_c_validateSequences:
18340 case ZSTD_c_useBlockSplitter:
18341 case ZSTD_c_useRowMatchFinder:
18342 case ZSTD_c_deterministicRefPrefix:
18343 case ZSTD_c_prefetchCDictTables:
18344 case ZSTD_c_enableSeqProducerFallback:
18345 case ZSTD_c_maxBlockSize:
18346 case ZSTD_c_searchForExternalRepcodes:
18359 RETURN_ERROR(stage_wrong,
"can only set params in cctx init stage");
18366 "MT not compatible with static alloc");
18378 case ZSTD_c_format:
18382 case ZSTD_c_forceMaxWindow:
18383 case ZSTD_c_forceAttachDict:
18384 case ZSTD_c_literalCompressionMode:
18387 case ZSTD_c_rsyncable:
18388 case ZSTD_c_enableDedicatedDictSearch:
18393 case ZSTD_c_targetCBlockSize:
18394 case ZSTD_c_srcSizeHint:
18395 case ZSTD_c_stableInBuffer:
18396 case ZSTD_c_stableOutBuffer:
18397 case ZSTD_c_blockDelimiters:
18398 case ZSTD_c_validateSequences:
18399 case ZSTD_c_useBlockSplitter:
18400 case ZSTD_c_useRowMatchFinder:
18401 case ZSTD_c_deterministicRefPrefix:
18402 case ZSTD_c_prefetchCDictTables:
18403 case ZSTD_c_enableSeqProducerFallback:
18404 case ZSTD_c_maxBlockSize:
18405 case ZSTD_c_searchForExternalRepcodes:
18408 default:
RETURN_ERROR(parameter_unsupported,
"unknown parameter");
18416 DEBUGLOG(4,
"ZSTD_CCtxParams_setParameter (%i, %i)", (
int)param, value);
18419 case ZSTD_c_format :
18421 CCtxParams->format = (ZSTD_format_e)value;
18422 return (
size_t)CCtxParams->format;
18429 CCtxParams->compressionLevel =
value;
18430 if (CCtxParams->compressionLevel >= 0)
return (
size_t)CCtxParams->compressionLevel;
18437 CCtxParams->cParams.windowLog = (
U32)value;
18438 return CCtxParams->cParams.windowLog;
18443 CCtxParams->cParams.hashLog = (
U32)value;
18444 return CCtxParams->cParams.hashLog;
18449 CCtxParams->cParams.chainLog = (
U32)value;
18450 return CCtxParams->cParams.chainLog;
18455 CCtxParams->cParams.searchLog = (
U32)value;
18456 return (
size_t)
value;
18461 CCtxParams->cParams.minMatch = (
U32)value;
18462 return CCtxParams->cParams.minMatch;
18466 CCtxParams->cParams.targetLength = (
U32)value;
18467 return CCtxParams->cParams.targetLength;
18473 return (
size_t)CCtxParams->cParams.strategy;
18477 DEBUGLOG(4,
"set content size flag = %u", (value!=0));
18478 CCtxParams->fParams.contentSizeFlag =
value != 0;
18479 return (
size_t)CCtxParams->fParams.contentSizeFlag;
18483 CCtxParams->fParams.checksumFlag =
value != 0;
18484 return (
size_t)CCtxParams->fParams.checksumFlag;
18487 DEBUGLOG(4,
"set dictIDFlag = %u", (value!=0));
18488 CCtxParams->fParams.noDictIDFlag = !
value;
18489 return !CCtxParams->fParams.noDictIDFlag;
18491 case ZSTD_c_forceMaxWindow :
18492 CCtxParams->forceWindow = (
value != 0);
18493 return (
size_t)CCtxParams->forceWindow;
18495 case ZSTD_c_forceAttachDict : {
18496 const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value;
18497 BOUNDCHECK(ZSTD_c_forceAttachDict, (
int)pref);
18498 CCtxParams->attachDictPref = pref;
18499 return CCtxParams->attachDictPref;
18502 case ZSTD_c_literalCompressionMode : {
18503 const ZSTD_paramSwitch_e lcm = (ZSTD_paramSwitch_e)value;
18504 BOUNDCHECK(ZSTD_c_literalCompressionMode, (
int)lcm);
18505 CCtxParams->literalCompressionMode = lcm;
18506 return CCtxParams->literalCompressionMode;
18510 #ifndef ZSTD_MULTITHREAD
18511 RETURN_ERROR_IF(value!=0, parameter_unsupported,
"not compiled with multithreading");
18515 CCtxParams->nbWorkers =
value;
18516 return CCtxParams->nbWorkers;
18520 #ifndef ZSTD_MULTITHREAD
18521 RETURN_ERROR_IF(value!=0, parameter_unsupported,
"not compiled with multithreading");
18529 CCtxParams->jobSize =
value;
18530 return CCtxParams->jobSize;
18534 #ifndef ZSTD_MULTITHREAD
18535 RETURN_ERROR_IF(value!=0, parameter_unsupported,
"not compiled with multithreading");
18539 CCtxParams->overlapLog =
value;
18540 return CCtxParams->overlapLog;
18543 case ZSTD_c_rsyncable :
18544 #ifndef ZSTD_MULTITHREAD
18545 RETURN_ERROR_IF(value!=0, parameter_unsupported,
"not compiled with multithreading");
18549 CCtxParams->rsyncable =
value;
18550 return CCtxParams->rsyncable;
18553 case ZSTD_c_enableDedicatedDictSearch :
18554 CCtxParams->enableDedicatedDictSearch = (
value!=0);
18555 return (
size_t)CCtxParams->enableDedicatedDictSearch;
18559 CCtxParams->ldmParams.enableLdm = (ZSTD_paramSwitch_e)value;
18560 return CCtxParams->ldmParams.enableLdm;
18565 CCtxParams->ldmParams.hashLog = (
U32)value;
18566 return CCtxParams->ldmParams.hashLog;
18571 CCtxParams->ldmParams.minMatchLength = (
U32)value;
18572 return CCtxParams->ldmParams.minMatchLength;
18577 CCtxParams->ldmParams.bucketSizeLog = (
U32)value;
18578 return CCtxParams->ldmParams.bucketSizeLog;
18583 CCtxParams->ldmParams.hashRateLog = (
U32)value;
18584 return CCtxParams->ldmParams.hashRateLog;
18586 case ZSTD_c_targetCBlockSize :
18589 CCtxParams->targetCBlockSize = (
U32)value;
18590 return CCtxParams->targetCBlockSize;
18592 case ZSTD_c_srcSizeHint :
18595 CCtxParams->srcSizeHint =
value;
18596 return (
size_t)CCtxParams->srcSizeHint;
18598 case ZSTD_c_stableInBuffer:
18601 return CCtxParams->inBufferMode;
18603 case ZSTD_c_stableOutBuffer:
18606 return CCtxParams->outBufferMode;
18608 case ZSTD_c_blockDelimiters:
18610 CCtxParams->blockDelimiters = (ZSTD_sequenceFormat_e)value;
18611 return CCtxParams->blockDelimiters;
18613 case ZSTD_c_validateSequences:
18614 BOUNDCHECK(ZSTD_c_validateSequences, value);
18615 CCtxParams->validateSequences =
value;
18616 return CCtxParams->validateSequences;
18618 case ZSTD_c_useBlockSplitter:
18620 CCtxParams->useBlockSplitter = (ZSTD_paramSwitch_e)value;
18621 return CCtxParams->useBlockSplitter;
18623 case ZSTD_c_useRowMatchFinder:
18624 BOUNDCHECK(ZSTD_c_useRowMatchFinder, value);
18625 CCtxParams->useRowMatchFinder = (ZSTD_paramSwitch_e)value;
18626 return CCtxParams->useRowMatchFinder;
18628 case ZSTD_c_deterministicRefPrefix:
18629 BOUNDCHECK(ZSTD_c_deterministicRefPrefix, value);
18630 CCtxParams->deterministicRefPrefix = !!
value;
18631 return CCtxParams->deterministicRefPrefix;
18633 case ZSTD_c_prefetchCDictTables:
18634 BOUNDCHECK(ZSTD_c_prefetchCDictTables, value);
18635 CCtxParams->prefetchCDictTables = (ZSTD_paramSwitch_e)value;
18636 return CCtxParams->prefetchCDictTables;
18638 case ZSTD_c_enableSeqProducerFallback:
18639 BOUNDCHECK(ZSTD_c_enableSeqProducerFallback, value);
18640 CCtxParams->enableMatchFinderFallback =
value;
18641 return CCtxParams->enableMatchFinderFallback;
18643 case ZSTD_c_maxBlockSize:
18646 CCtxParams->maxBlockSize =
value;
18647 return CCtxParams->maxBlockSize;
18649 case ZSTD_c_searchForExternalRepcodes:
18650 BOUNDCHECK(ZSTD_c_searchForExternalRepcodes, value);
18651 CCtxParams->searchForExternalRepcodes = (ZSTD_paramSwitch_e)value;
18652 return CCtxParams->searchForExternalRepcodes;
18654 default:
RETURN_ERROR(parameter_unsupported,
"unknown parameter");
18664 ZSTD_CCtx_params
const* CCtxParams,
ZSTD_cParameter param,
int* value)
18668 case ZSTD_c_format :
18669 *
value = CCtxParams->format;
18672 *
value = CCtxParams->compressionLevel;
18675 *
value = (
int)CCtxParams->cParams.windowLog;
18678 *value = (
int)CCtxParams->cParams.hashLog;
18681 *
value = (
int)CCtxParams->cParams.chainLog;
18684 *value = CCtxParams->cParams.searchLog;
18687 *value = CCtxParams->cParams.minMatch;
18690 *value = CCtxParams->cParams.targetLength;
18693 *value = (
unsigned)CCtxParams->cParams.strategy;
18696 *
value = CCtxParams->fParams.contentSizeFlag;
18699 *
value = CCtxParams->fParams.checksumFlag;
18702 *
value = !CCtxParams->fParams.noDictIDFlag;
18704 case ZSTD_c_forceMaxWindow :
18705 *
value = CCtxParams->forceWindow;
18707 case ZSTD_c_forceAttachDict :
18708 *
value = CCtxParams->attachDictPref;
18710 case ZSTD_c_literalCompressionMode :
18711 *
value = CCtxParams->literalCompressionMode;
18714 #ifndef ZSTD_MULTITHREAD
18715 assert(CCtxParams->nbWorkers == 0);
18717 *
value = CCtxParams->nbWorkers;
18720 #ifndef ZSTD_MULTITHREAD
18721 RETURN_ERROR(parameter_unsupported,
"not compiled with multithreading");
18723 assert(CCtxParams->jobSize <= INT_MAX);
18724 *
value = (
int)CCtxParams->jobSize;
18729 RETURN_ERROR(parameter_unsupported,
"not compiled with multithreading");
18731 *
value = CCtxParams->overlapLog;
18734 case ZSTD_c_rsyncable :
18735 #ifndef ZSTD_MULTITHREAD
18736 RETURN_ERROR(parameter_unsupported,
"not compiled with multithreading");
18738 *
value = CCtxParams->rsyncable;
18741 case ZSTD_c_enableDedicatedDictSearch :
18742 *
value = CCtxParams->enableDedicatedDictSearch;
18745 *
value = CCtxParams->ldmParams.enableLdm;
18748 *
value = CCtxParams->ldmParams.hashLog;
18751 *
value = CCtxParams->ldmParams.minMatchLength;
18754 *
value = CCtxParams->ldmParams.bucketSizeLog;
18757 *
value = CCtxParams->ldmParams.hashRateLog;
18759 case ZSTD_c_targetCBlockSize :
18760 *
value = (
int)CCtxParams->targetCBlockSize;
18762 case ZSTD_c_srcSizeHint :
18763 *value = (
int)CCtxParams->srcSizeHint;
18765 case ZSTD_c_stableInBuffer :
18766 *
value = (
int)CCtxParams->inBufferMode;
18768 case ZSTD_c_stableOutBuffer :
18769 *value = (
int)CCtxParams->outBufferMode;
18771 case ZSTD_c_blockDelimiters :
18772 *
value = (
int)CCtxParams->blockDelimiters;
18774 case ZSTD_c_validateSequences :
18775 *value = (
int)CCtxParams->validateSequences;
18777 case ZSTD_c_useBlockSplitter :
18778 *
value = (
int)CCtxParams->useBlockSplitter;
18780 case ZSTD_c_useRowMatchFinder :
18781 *value = (
int)CCtxParams->useRowMatchFinder;
18783 case ZSTD_c_deterministicRefPrefix:
18784 *
value = (
int)CCtxParams->deterministicRefPrefix;
18786 case ZSTD_c_prefetchCDictTables:
18787 *value = (
int)CCtxParams->prefetchCDictTables;
18789 case ZSTD_c_enableSeqProducerFallback:
18790 *
value = CCtxParams->enableMatchFinderFallback;
18792 case ZSTD_c_maxBlockSize:
18793 *
value = (
int)CCtxParams->maxBlockSize;
18795 case ZSTD_c_searchForExternalRepcodes:
18796 *value = (
int)CCtxParams->searchForExternalRepcodes;
18798 default:
RETURN_ERROR(parameter_unsupported,
"unknown parameter");
18811 ZSTD_CCtx* cctx,
const ZSTD_CCtx_params* params)
18813 DEBUGLOG(4,
"ZSTD_CCtx_setParametersUsingCCtxParams");
18815 "The context is in the wrong stage!");
18817 "Can't override parameters with cdict attached (some must "
18818 "be inherited from the cdict).");
18827 DEBUGLOG(4,
"ZSTD_CCtx_setCParams");
18843 DEBUGLOG(4,
"ZSTD_CCtx_setFParams");
18852 DEBUGLOG(4,
"ZSTD_CCtx_setParams");
18864 DEBUGLOG(4,
"ZSTD_CCtx_setPledgedSrcSize to %llu bytes", pledgedSrcSize);
18866 "Can't set pledgedSrcSize when not in init stage.");
18873 size_t const dictSize);
18875 const ZSTD_compressionParameters* cParams);
18877 ZSTD_compressionParameters* cParams);
18887 if (dl->
dict == NULL) {
18894 if (dl->
cdict != NULL) {
18910 RETURN_ERROR_IF(!dl->cdict, memory_allocation,
"ZSTD_createCDict_advanced failed");
18911 cctx->
cdict = dl->cdict;
18917 const void* dict,
size_t dictSize,
18918 ZSTD_dictLoadMethod_e dictLoadMethod,
18919 ZSTD_dictContentType_e dictContentType)
18921 DEBUGLOG(4,
"ZSTD_CCtx_loadDictionary_advanced (size: %u)", (
U32)dictSize);
18923 "Can't load a dictionary when cctx is not in init stage.");
18925 if (dict == NULL || dictSize == 0)
18927 if (dictLoadMethod == ZSTD_dlm_byRef) {
18933 "static CCtx can't allocate for an internal copy of dictionary");
18936 "allocation failed for dictionary content");
18950 cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
18956 cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);
18963 "Can't ref a dict when ctx not in init stage.");
18966 cctx->
cdict = cdict;
18973 "Can't ref a pool when ctx not in init stage.");
18984 ZSTD_CCtx* cctx,
const void* prefix,
size_t prefixSize, ZSTD_dictContentType_e dictContentType)
18987 "Can't ref a prefix when ctx not in init stage.");
18989 if (prefix != NULL && prefixSize > 0) {
19009 "Reset parameters is only possible during init stage.");
19036 static ZSTD_compressionParameters
19039 # define CLAMP_TYPE(cParam, val, type) { \
19040 ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \
19041 if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound; \
19042 else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \
19044 # define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned)
19060 return hashLog - btScale;
19073 const U64 maxWindowSize = 1ULL << ZSTD_WINDOWLOG_MAX;
19075 if (dictSize == 0) {
19078 assert(windowLog <= ZSTD_WINDOWLOG_MAX);
19081 U64 const windowSize = 1ULL << windowLog;
19082 U64 const dictAndWindowSize = dictSize + windowSize;
19087 if (windowSize >= dictSize +
srcSize) {
19089 }
else if (dictAndWindowSize >= maxWindowSize) {
19090 return ZSTD_WINDOWLOG_MAX;
19104 static ZSTD_compressionParameters
19109 ZSTD_paramSwitch_e useRowMatchFinder)
19111 const U64 minSrcSize = 513;
19112 const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
19143 if ( (
srcSize <= maxWindowResize)
19144 && (dictSize <= maxWindowResize) ) {
19146 static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN;
19147 U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN :
19149 if (cPar.windowLog > srcLog) cPar.windowLog = srcLog;
19154 if (cPar.hashLog > dictAndWindowLog+1) cPar.hashLog = dictAndWindowLog+1;
19155 if (cycleLog > dictAndWindowLog)
19156 cPar.chainLog -= (cycleLog - dictAndWindowLog);
19167 if (cPar.hashLog > maxShortCacheHashLog) {
19168 cPar.hashLog = maxShortCacheHashLog;
19170 if (cPar.chainLog > maxShortCacheHashLog) {
19171 cPar.chainLog = maxShortCacheHashLog;
19181 if (useRowMatchFinder == ZSTD_ps_auto)
19182 useRowMatchFinder = ZSTD_ps_enable;
19189 U32 const rowLog =
BOUNDED(4, cPar.searchLog, 6);
19191 U32 const maxHashLog = maxRowHashLog + rowLog;
19192 assert(cPar.hashLog >= rowLog);
19193 if (cPar.hashLog > maxHashLog) {
19194 cPar.hashLog = maxHashLog;
19201 ZSTD_compressionParameters
19215 ZSTD_compressionParameters* cParams,
19216 const ZSTD_compressionParameters* overrides)
19218 if (overrides->windowLog) cParams->windowLog = overrides->windowLog;
19219 if (overrides->hashLog) cParams->hashLog = overrides->hashLog;
19220 if (overrides->chainLog) cParams->chainLog = overrides->chainLog;
19221 if (overrides->searchLog) cParams->searchLog = overrides->searchLog;
19222 if (overrides->minMatch) cParams->minMatch = overrides->minMatch;
19223 if (overrides->targetLength) cParams->targetLength = overrides->targetLength;
19224 if (overrides->strategy) cParams->strategy = overrides->strategy;
19230 ZSTD_compressionParameters cParams;
19232 srcSizeHint = CCtxParams->srcSizeHint;
19244 const ZSTD_paramSwitch_e useRowMatchFinder,
19245 const U32 enableDedicatedDictSearch,
19249 size_t const chainSize =
ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder, enableDedicatedDictSearch && !forCCtx)
19250 ? ((
size_t)1 << cParams->chainLog)
19252 size_t const hSize = ((
size_t)1) << cParams->hashLog;
19254 size_t const h3Size = hashLog3 ? ((
size_t)1) << hashLog3 : 0;
19257 size_t const tableSpace = chainSize *
sizeof(
U32)
19258 + hSize *
sizeof(
U32)
19259 + h3Size *
sizeof(
U32);
19260 size_t const optPotentialSpace =
19270 size_t const optSpace = (forCCtx && (cParams->strategy >=
ZSTD_btopt))
19271 ? optPotentialSpace
19276 ZSTD_STATIC_ASSERT(ZSTD_HASHLOG_MIN >= 4 && ZSTD_WINDOWLOG_MIN >= 4 && ZSTD_CHAINLOG_MIN >= 4);
19277 assert(useRowMatchFinder != ZSTD_ps_auto);
19279 DEBUGLOG(4,
"chainSize: %u - hSize: %u - h3Size: %u",
19281 return tableSpace + optSpace + slackSpace + lazyAdditionalSpace;
19286 static size_t ZSTD_maxNbSeq(
size_t blockSize,
unsigned minMatch,
int useSequenceProducer) {
19287 U32 const divider = (minMatch==3 || useSequenceProducer) ? 3 : 4;
19288 return blockSize / divider;
19292 const ZSTD_compressionParameters* cParams,
19294 const int isStatic,
19295 const ZSTD_paramSwitch_e useRowMatchFinder,
19296 const size_t buffInSize,
19297 const size_t buffOutSize,
19298 const U64 pledgedSrcSize,
19299 int useSequenceProducer,
19300 size_t maxBlockSize)
19302 size_t const windowSize = (
size_t)
BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize);
19304 size_t const maxNbSeq =
ZSTD_maxNbSeq(blockSize, cParams->minMatch, useSequenceProducer);
19314 size_t const ldmSeqSpace = ldmParams->
enableLdm == ZSTD_ps_enable ?
19324 size_t const externalSeqSpace = useSequenceProducer
19328 size_t const neededSpace =
19339 DEBUGLOG(5,
"estimate workspace : %u", (
U32)neededSpace);
19340 return neededSpace;
19345 ZSTD_compressionParameters
const cParams =
19350 RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC,
"Estimate CCtx size is supported for single-threaded compression only.");
19355 &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0,
ZSTD_CONTENTSIZE_UNKNOWN, params->useSequenceProducer, params->maxBlockSize);
19363 size_t noRowCCtxSize;
19364 size_t rowCCtxSize;
19365 initialParams.useRowMatchFinder = ZSTD_ps_disable;
19367 initialParams.useRowMatchFinder = ZSTD_ps_enable;
19369 return MAX(noRowCCtxSize, rowCCtxSize);
19378 size_t largestSize = 0;
19380 for (; tier < 4; ++tier) {
19385 return largestSize;
19391 size_t memBudget = 0;
19395 if (newMB > memBudget) memBudget = newMB;
19402 RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC,
"Estimate CCtx size is supported for single-threaded compression only.");
19403 { ZSTD_compressionParameters
const cParams =
19407 ? ((
size_t)1 << cParams.windowLog) + blockSize
19415 &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize,
19425 size_t noRowCCtxSize;
19426 size_t rowCCtxSize;
19427 initialParams.useRowMatchFinder = ZSTD_ps_disable;
19429 initialParams.useRowMatchFinder = ZSTD_ps_enable;
19431 return MAX(noRowCCtxSize, rowCCtxSize);
19446 size_t memBudget = 0;
19449 if (newMB > memBudget) memBudget = newMB;
19460 #ifdef ZSTD_MULTITHREAD
19465 { ZSTD_frameProgression
fp;
19466 size_t const buffered = (cctx->
inBuff == NULL) ? 0 :
19474 fp.currentJobID = 0;
19475 fp.nbActiveWorkers = 0;
19484 #ifdef ZSTD_MULTITHREAD
19494 ZSTD_compressionParameters cParams2)
19498 assert(cParams1.windowLog == cParams2.windowLog);
19499 assert(cParams1.chainLog == cParams2.chainLog);
19500 assert(cParams1.hashLog == cParams2.hashLog);
19501 assert(cParams1.searchLog == cParams2.searchLog);
19502 assert(cParams1.minMatch == cParams2.minMatch);
19503 assert(cParams1.targetLength == cParams2.targetLength);
19504 assert(cParams1.strategy == cParams2.strategy);
19562 val *= 0x9FB21C651E98DF25ULL;
19563 val ^= (val >> 35) + len ;
19564 val *= 0x9FB21C651E98DF25ULL;
19565 return val ^ (val >> 28);
19576 const ZSTD_compressionParameters* cParams,
19577 const ZSTD_paramSwitch_e useRowMatchFinder,
19585 ? ((
size_t)1 << cParams->chainLog)
19587 size_t const hSize = ((
size_t)1) << cParams->hashLog;
19589 size_t const h3Size = hashLog3 ? ((
size_t)1) << hashLog3 : 0;
19592 assert(useRowMatchFinder != ZSTD_ps_auto);
19607 DEBUGLOG(5,
"reserving table space");
19613 "failed a workspace allocation in ZSTD_reset_matchState");
19623 size_t const tagTableSize = hSize;
19636 U32 const rowLog =
BOUNDED(4, cParams->searchLog, 6);
19637 assert(cParams->hashLog >= rowLog);
19644 DEBUGLOG(4,
"reserving optimal parser space");
19656 "failed a workspace allocation in ZSTD_reset_matchState");
19667 #define ZSTD_INDEXOVERFLOW_MARGIN (16 MB)
19690 ZSTD_CCtx_params
const* params,
19691 U64 const pledgedSrcSize,
19692 size_t const loadedDictSize,
19697 DEBUGLOG(4,
"ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u, useRowMatchFinder=%d useBlockSplitter=%d",
19698 (
U32)pledgedSrcSize, params->cParams.windowLog, (
int)params->useRowMatchFinder, (
int)params->useBlockSplitter);
19709 assert(params->useRowMatchFinder != ZSTD_ps_auto);
19710 assert(params->useBlockSplitter != ZSTD_ps_auto);
19711 assert(params->ldmParams.enableLdm != ZSTD_ps_auto);
19712 assert(params->maxBlockSize != 0);
19713 if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
19716 assert(params->ldmParams.hashLog >= params->ldmParams.bucketSizeLog);
19717 assert(params->ldmParams.hashRateLog < 32);
19720 {
size_t const windowSize =
MAX(1, (
size_t)
MIN(((
U64)1 << params->cParams.windowLog), pledgedSrcSize));
19721 size_t const blockSize =
MIN(params->maxBlockSize, windowSize);
19722 size_t const maxNbSeq =
ZSTD_maxNbSeq(blockSize, params->cParams.minMatch, params->useSequenceProducer);
19727 ? windowSize + blockSize
19736 size_t const neededSpace =
19738 ¶ms->cParams, ¶ms->ldmParams, zc->
staticSize != 0, params->useRowMatchFinder,
19739 buffInSize, buffOutSize, pledgedSrcSize, params->useSequenceProducer, params->maxBlockSize);
19740 int resizeWorkspace;
19749 resizeWorkspace = workspaceTooSmall || workspaceWasteful;
19750 DEBUGLOG(4,
"Need %zu B workspace", neededSpace);
19751 DEBUGLOG(4,
"windowSize: %zu - blockSize: %zu", windowSize, blockSize);
19753 if (resizeWorkspace) {
19754 DEBUGLOG(4,
"Resize workspaceSize from %zuKB to %zuKB",
19756 neededSpace >> 10);
19765 DEBUGLOG(5,
"reserving object space");
19788 DEBUGLOG(4,
"pledged content size : %u ; flag : %u",
19789 (
unsigned)pledgedSrcSize, zc->
appliedParams.fParams.contentSizeFlag);
19803 params->useRowMatchFinder,
19811 if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
19813 size_t const ldmHSize = ((
size_t)1) << params->ldmParams.hashLog;
19824 if (params->useSequenceProducer) {
19846 if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
19848 size_t const numBuckets =
19849 ((
size_t)1) << (params->ldmParams.hashLog -
19850 params->ldmParams.bucketSizeLog);
19899 const ZSTD_CCtx_params* params,
19900 U64 pledgedSrcSize)
19904 return dedicatedDictSearch
19905 || ( ( pledgedSrcSize <= cutoff
19907 || params->attachDictPref == ZSTD_dictForceAttach )
19908 && params->attachDictPref != ZSTD_dictForceCopy
19909 && !params->forceWindow );
19916 ZSTD_CCtx_params params,
19917 U64 pledgedSrcSize,
19920 DEBUGLOG(4,
"ZSTD_resetCCtx_byAttachingCDict() pledgedSrcSize=%llu",
19921 (
unsigned long long)pledgedSrcSize);
19924 unsigned const windowLog = params.cParams.windowLog;
19936 params.useRowMatchFinder);
19937 params.cParams.windowLog = windowLog;
19948 if (cdictLen == 0) {
19950 DEBUGLOG(4,
"skipping attaching empty dictionary");
19952 DEBUGLOG(4,
"attaching dictionary into context");
19976 ZSTD_compressionParameters
const* cParams) {
19981 for (i = 0; i < tableSize; i++) {
19982 U32 const taggedIndex = src[i];
19993 ZSTD_CCtx_params params,
19994 U64 pledgedSrcSize,
20000 DEBUGLOG(4,
"ZSTD_resetCCtx_byCopyingCDict() pledgedSrcSize=%llu",
20001 (
unsigned long long)pledgedSrcSize);
20003 {
unsigned const windowLog = params.cParams.windowLog;
20006 params.cParams = *cdict_cParams;
20007 params.cParams.windowLog = windowLog;
20018 assert(params.useRowMatchFinder != ZSTD_ps_auto);
20022 ? ((
size_t)1 << cdict_cParams->chainLog)
20024 size_t const hSize = (
size_t)1 << cdict_cParams->hashLog;
20028 hSize, cdict_cParams);
20034 chainSize, cdict_cParams);
20038 size_t const tagTableSize = hSize;
20048 size_t const h3Size = h3log ? ((
size_t)1 << h3log) : 0;
20077 const ZSTD_CCtx_params* params,
20078 U64 pledgedSrcSize,
20082 DEBUGLOG(4,
"ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)",
20083 (
unsigned)pledgedSrcSize);
20087 cctx, cdict, *params, pledgedSrcSize, zbuff);
20090 cctx, cdict, *params, pledgedSrcSize, zbuff);
20103 ZSTD_frameParameters fParams,
20104 U64 pledgedSrcSize,
20108 "Can't copy a ctx that's not in init stage.");
20109 DEBUGLOG(5,
"ZSTD_copyCCtx_internal");
20117 params.useRowMatchFinder = srcCCtx->
appliedParams.useRowMatchFinder;
20118 params.useBlockSplitter = srcCCtx->
appliedParams.useBlockSplitter;
20120 params.fParams = fParams;
20142 size_t const h3Size = h3log ? ((
size_t)1 << h3log) : 0;
20146 hSize *
sizeof(
U32));
20149 chainSize *
sizeof(
U32));
20152 h3Size *
sizeof(
U32));
20181 ZSTD_frameParameters fParams = { 1 , 0 , 0 };
20188 fParams, pledgedSrcSize,
20193 #define ZSTD_ROWSIZE 16
20211 #if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
20224 for (rowNb=0 ; rowNb < nbRows ; rowNb++) {
20232 }
else if (
table[cellNb] < reducerThreshold) {
20235 newVal =
table[cellNb] - reducerValue;
20237 table[cellNb] = newVal;
20256 {
U32 const hSize = (
U32)1 << params->cParams.hashLog;
20261 U32 const chainSize = (
U32)1 << params->cParams.chainLog;
20284 BYTE*
const llCodeTable = seqStorePtr->
llCode;
20285 BYTE*
const ofCodeTable = seqStorePtr->
ofCode;
20286 BYTE*
const mlCodeTable = seqStorePtr->
mlCode;
20289 int longOffsets = 0;
20290 assert(nbSeq <= seqStorePtr->maxNbSeq);
20291 for (u=0; u<nbSeq; u++) {
20296 ofCodeTable[u] = (
BYTE)ofCode;
20306 return longOffsets;
20315 DEBUGLOG(5,
"ZSTD_useTargetCBlockSize (targetCBlockSize=%zu)", cctxParams->targetCBlockSize);
20316 return (cctxParams->targetCBlockSize != 0);
20326 DEBUGLOG(5,
"ZSTD_blockSplitterEnabled (useBlockSplitter=%d)", cctxParams->useBlockSplitter);
20327 assert(cctxParams->useBlockSplitter != ZSTD_ps_auto);
20328 return (cctxParams->useBlockSplitter == ZSTD_ps_enable);
20339 size_t lastCountSize;
20352 const seqStore_t* seqStorePtr,
size_t nbSeq,
20356 void* entropyWorkspace,
size_t entropyWkspSize)
20359 const BYTE*
const oend = dstEnd;
20364 const BYTE*
const ofCodeTable = seqStorePtr->
ofCode;
20365 const BYTE*
const llCodeTable = seqStorePtr->
llCode;
20366 const BYTE*
const mlCodeTable = seqStorePtr->
mlCode;
20375 {
unsigned max =
MaxLL;
20376 size_t const mostFrequent =
HIST_countFast_wksp(countWorkspace, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);
20380 countWorkspace, max, mostFrequent, nbSeq,
20387 op, (
size_t)(oend -
op),
20389 countWorkspace, max, llCodeTable, nbSeq,
20393 entropyWorkspace, entropyWkspSize);
20395 DEBUGLOG(3,
"ZSTD_buildCTable for LitLens failed");
20396 stats.
size = countSize;
20405 {
unsigned max =
MaxOff;
20407 countWorkspace, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);
20413 countWorkspace, max, mostFrequent, nbSeq,
20416 defaultPolicy, strategy);
20419 op, (
size_t)(oend -
op),
20421 countWorkspace, max, ofCodeTable, nbSeq,
20425 entropyWorkspace, entropyWkspSize);
20427 DEBUGLOG(3,
"ZSTD_buildCTable for Offsets failed");
20428 stats.
size = countSize;
20437 {
unsigned max =
MaxML;
20439 countWorkspace, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);
20440 DEBUGLOG(5,
"Building ML table (remaining space : %i)", (
int)(oend-
op));
20443 countWorkspace, max, mostFrequent, nbSeq,
20449 op, (
size_t)(oend -
op),
20451 countWorkspace, max, mlCodeTable, nbSeq,
20455 entropyWorkspace, entropyWkspSize);
20457 DEBUGLOG(3,
"ZSTD_buildCTable for MatchLengths failed");
20458 stats.
size = countSize;
20474 #define SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO 20
20480 const ZSTD_CCtx_params* cctxParams,
20481 void*
dst,
size_t dstCapacity,
20482 void* entropyWorkspace,
size_t entropyWkspSize,
20485 ZSTD_strategy const strategy = cctxParams->cParams.strategy;
20486 unsigned*
count = (
unsigned*)entropyWorkspace;
20492 const BYTE*
const ofCodeTable = seqStorePtr->
ofCode;
20493 const BYTE*
const llCodeTable = seqStorePtr->
llCode;
20494 const BYTE*
const mlCodeTable = seqStorePtr->
mlCode;
20496 BYTE*
const oend = ostart + dstCapacity;
20498 size_t lastCountSize;
20499 int longOffsets = 0;
20502 entropyWkspSize -= (
MaxSeq + 1) *
sizeof(*
count);
20504 DEBUGLOG(5,
"ZSTD_entropyCompressSeqStore_internal (nbSeq=%zu, dstCapacity=%zu)", nbSeq, dstCapacity);
20519 entropyWorkspace, entropyWkspSize,
20520 &prevEntropy->
huf, &nextEntropy->
huf,
20521 cctxParams->cParams.strategy,
20523 suspectUncompressible, bmi2);
20525 assert(cSize <= dstCapacity);
20531 dstSize_tooSmall,
"Can't fit seq hdr in output buf!");
20535 op[0] = (
BYTE)((nbSeq>>8) + 0x80);
20547 return (
size_t)(
op - ostart);
20549 {
BYTE*
const seqHead =
op++;
20553 &prevEntropy->
fse, &nextEntropy->
fse,
20556 entropyWorkspace, entropyWkspSize);
20565 op, (
size_t)(oend -
op),
20566 CTable_MatchLength, mlCodeTable,
20567 CTable_OffsetBits, ofCodeTable,
20568 CTable_LitLength, llCodeTable,
20570 longOffsets, bmi2);
20572 op += bitstreamSize;
20582 if (lastCountSize && (lastCountSize + bitstreamSize) < 4) {
20584 assert(lastCountSize + bitstreamSize == 3);
20585 DEBUGLOG(5,
"Avoiding bug in zstd decoder in versions <= 1.3.4 by "
20586 "emitting an uncompressed block.");
20591 DEBUGLOG(5,
"compressed block size : %u", (
unsigned)(
op - ostart));
20592 return (
size_t)(
op - ostart);
20600 const ZSTD_CCtx_params* cctxParams,
20601 void*
dst,
size_t dstCapacity,
20603 void* entropyWorkspace,
size_t entropyWkspSize,
20607 seqStorePtr, prevEntropy, nextEntropy, cctxParams,
20609 entropyWorkspace, entropyWkspSize, bmi2);
20610 if (cSize == 0)
return 0;
20614 if ((cSize ==
ERROR(dstSize_tooSmall)) & (
srcSize <= dstCapacity)) {
20615 DEBUGLOG(4,
"not enough dstCapacity (%zu) for ZSTD_entropyCompressSeqStore_internal()=> do not compress block", dstCapacity);
20622 if (cSize >= maxCSize)
return 0;
20624 DEBUGLOG(5,
"ZSTD_entropyCompressSeqStore() cSize: %zu", cSize);
20683 DEBUGLOG(4,
"Selected block compressor: dictMode=%d strat=%d rowMatchfinder=%d", (
int)dictMode, (
int)strat, (
int)useRowMatchFinder);
20699 DEBUGLOG(4,
"Selecting a row-based matchfinder");
20700 assert(useRowMatchFinder != ZSTD_ps_auto);
20701 selectedCompressor = rowBasedBlockCompressors[(
int)dictMode][(
int)strat - (
int)
ZSTD_greedy];
20703 selectedCompressor = blockCompressor[(
int)dictMode][(
int)strat];
20705 assert(selectedCompressor != NULL);
20706 return selectedCompressor;
20713 seqStorePtr->
lit += lastLLSize;
20730 ZSTD_Sequence* outSeqs,
size_t nbExternalSeqs,
size_t outSeqsCapacity,
size_t srcSize
20733 nbExternalSeqs > outSeqsCapacity,
20734 sequenceProducer_failed,
20735 "External sequence producer returned error code %lu",
20736 (
unsigned long)nbExternalSeqs
20740 nbExternalSeqs == 0 &&
srcSize > 0,
20741 sequenceProducer_failed,
20742 "Got zero sequences from external sequence producer for a non-empty src buffer!"
20746 ZSTD_memset(&outSeqs[0], 0,
sizeof(ZSTD_Sequence));
20751 ZSTD_Sequence
const lastSeq = outSeqs[nbExternalSeqs - 1];
20754 if (lastSeq.offset == 0 && lastSeq.matchLength == 0) {
20755 return nbExternalSeqs;
20761 nbExternalSeqs == outSeqsCapacity,
20762 sequenceProducer_failed,
20763 "nbExternalSeqs == outSeqsCapacity but lastSeq is not a block delimiter!"
20767 ZSTD_memset(&outSeqs[nbExternalSeqs], 0,
sizeof(ZSTD_Sequence));
20768 return nbExternalSeqs + 1;
20779 size_t matchLenSum, litLenSum, i;
20782 for (i = 0; i < seqBufSize; i++) {
20783 litLenSum += seqBuf[i].litLength;
20784 matchLenSum += seqBuf[i].matchLength;
20786 return litLenSum + matchLenSum;
20820 const BYTE*
const istart = (
const BYTE*)src;
20821 const U32 curr = (
U32)(istart-base);
20822 if (
sizeof(ptrdiff_t)==8)
assert(istart - base < (ptrdiff_t)(
U32)(-1));
20841 parameter_combination_unsupported,
20842 "Long-distance matching with external sequence producer enabled is not currently supported."
20853 }
else if (zc->
appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) {
20860 parameter_combination_unsupported,
20861 "Long-distance matching with external sequence producer enabled is not currently supported."
20907 RETURN_ERROR_IF(seqLenSum >
srcSize, externalSequences_invalid,
"External sequences imply too large a block!");
20915 "Failed to copy external sequences to seqStore!"
20918 DEBUGLOG(5,
"Copied %lu sequences from external sequence producer to internal seqStore.", (
unsigned long)nbExternalSeqs);
20924 return nbPostProcessedSeqs;
20934 "External sequence producer returned error code %lu. Falling back to internal parser.",
20935 (
unsigned long)nbExternalSeqs
20956 size_t seqStoreSeqSize = seqStore->
sequences - seqStoreSeqs;
20958 size_t literalsRead = 0;
20969 for (i = 0; i < seqStoreSeqSize; ++i) {
20971 outSeqs[i].litLength = seqStoreSeqs[i].
litLength;
20973 outSeqs[i].rep = 0;
20977 outSeqs[i].litLength += 0x10000;
20979 outSeqs[i].matchLength += 0x10000;
20985 outSeqs[i].rep = seqStoreSeqs[i].
offBase;
20986 if (outSeqs[i].litLength != 0) {
20987 rawOffset = updatedRepcodes.
rep[outSeqs[i].rep - 1];
20989 if (outSeqs[i].rep == 3) {
20990 rawOffset = updatedRepcodes.
rep[0] - 1;
20992 rawOffset = updatedRepcodes.
rep[outSeqs[i].rep];
20996 outSeqs[i].offset = rawOffset;
21000 seqStoreSeqs[i].offBase,
21001 seqStoreSeqs[i].litLength == 0);
21002 literalsRead += outSeqs[i].litLength;
21008 assert(seqStoreLiteralsSize >= literalsRead);
21009 lastLLSize = seqStoreLiteralsSize - literalsRead;
21010 outSeqs[i].litLength = (
U32)lastLLSize;
21011 outSeqs[i].matchLength = outSeqs[i].offset = outSeqs[i].rep = 0;
21017 return (
srcSize / ZSTD_MINMATCH_MIN) + 1;
21021 size_t outSeqsSize,
const void* src,
size_t srcSize)
21043 for (;
in < seqsSize; ++
in) {
21044 if (sequences[
in].offset == 0 && sequences[
in].matchLength == 0) {
21045 if (
in != seqsSize - 1) {
21046 sequences[
in+1].litLength += sequences[
in].litLength;
21049 sequences[out] = sequences[
in];
21060 const size_t valueST = (
size_t)((
U64)
value * 0x0101010101010101ULL);
21061 const size_t unrollSize =
sizeof(
size_t) * 4;
21062 const size_t unrollMask = unrollSize - 1;
21063 const size_t prefixLength =
length & unrollMask;
21065 if (
length == 1)
return 1;
21067 if (prefixLength &&
ZSTD_count(
ip+1,
ip,
ip+prefixLength) != prefixLength-1) {
21070 for (i = prefixLength; i !=
length; i += unrollSize) {
21072 for (u = 0; u < unrollSize; u +=
sizeof(
size_t)) {
21088 return nbSeqs < 4 && nbLits < 10;
21103 U32 const cBlockHeader = cSize == 1 ?
21104 lastBlock + (((
U32)
bt_rle)<<1) + (
U32)(blockSize << 3) :
21107 DEBUGLOG(3,
"writeBlockHeader: cSize: %zu blockSize: %zu lastBlock: %u", cSize, blockSize, lastBlock);
21122 const int literalsCompressionIsDisabled,
21123 void* workspace,
size_t wkspSize,
21126 BYTE*
const wkspStart = (
BYTE*)workspace;
21127 BYTE*
const wkspEnd = wkspStart + wkspSize;
21128 BYTE*
const countWkspStart = wkspStart;
21129 unsigned*
const countWksp = (
unsigned*)workspace;
21131 BYTE*
const nodeWksp = countWkspStart + countWkspSize;
21132 const size_t nodeWkspSize = (
size_t)(wkspEnd - nodeWksp);
21136 DEBUGLOG(5,
"ZSTD_buildBlockEntropyStats_literals (srcSize=%zu)",
srcSize);
21141 if (literalsCompressionIsDisabled) {
21142 DEBUGLOG(5,
"set_basic - disabled");
21148 #ifndef COMPRESS_LITERALS_SIZE_MIN
21149 # define COMPRESS_LITERALS_SIZE_MIN 63
21153 DEBUGLOG(5,
"set_basic - too small");
21159 {
size_t const largest =
21162 workspace, wkspSize);
21170 if (largest <= (
srcSize >> 7)+4) {
21172 DEBUGLOG(5,
"set_basic - no gain");
21188 maxSymbolValue, huffLog,
21189 nodeWksp, nodeWkspSize);
21191 huffLog = (
U32)maxBits;
21199 nodeWksp, nodeWkspSize);
21204 if (oldCSize <
srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >=
srcSize)) {
21205 DEBUGLOG(5,
"set_repeat - smaller");
21210 if (newCSize + hSize >=
srcSize) {
21211 DEBUGLOG(5,
"set_basic - no gains");
21216 DEBUGLOG(5,
"set_compressed (hSize=%u)", (
U32)hSize);
21248 const ZSTD_CCtx_params* cctxParams,
21250 void* workspace,
size_t wkspSize)
21252 ZSTD_strategy const strategy = cctxParams->cParams.strategy;
21257 unsigned* countWorkspace = (
unsigned*)workspace;
21258 unsigned* entropyWorkspace = countWorkspace + (
MaxSeq + 1);
21259 size_t entropyWorkspaceSize = wkspSize - (
MaxSeq + 1) *
sizeof(*countWorkspace);
21262 DEBUGLOG(5,
"ZSTD_buildBlockEntropyStats_sequences (nbSeq=%zu)", nbSeq);
21264 prevEntropy, nextEntropy,
op, oend,
21265 strategy, countWorkspace,
21266 entropyWorkspace, entropyWorkspaceSize)
21287 const ZSTD_CCtx_params* cctxParams,
21289 void* workspace,
size_t wkspSize)
21297 &prevEntropy->
huf, &nextEntropy->
huf,
21300 workspace, wkspSize, hufFlags);
21305 &prevEntropy->
fse, &nextEntropy->
fse,
21308 workspace, wkspSize);
21318 void* workspace,
size_t wkspSize,
21321 unsigned*
const countWksp = (
unsigned*)workspace;
21323 size_t literalSectionHeaderSize = 3 + (litSize >= 1
KB) + (litSize >= 16
KB);
21324 U32 singleStream = litSize < 256;
21332 if (writeEntropy) cLitSizeEstimate += hufMetadata->
hufDesSize;
21333 if (!singleStream) cLitSizeEstimate += 6;
21334 return cLitSizeEstimate + literalSectionHeaderSize;
21343 const BYTE* codeTable,
size_t nbSeq,
unsigned maxCode,
21345 const U8* additionalBits,
21346 short const* defaultNorm,
U32 defaultNormLog,
U32 defaultMax,
21347 void* workspace,
size_t wkspSize)
21349 unsigned*
const countWksp = (
unsigned*)workspace;
21350 const BYTE* ctp = codeTable;
21351 const BYTE*
const ctStart = ctp;
21352 const BYTE*
const ctEnd = ctStart + nbSeq;
21353 size_t cSymbolTypeSizeEstimateInBits = 0;
21354 unsigned max = maxCode;
21359 assert(max <= defaultMax);
21361 cSymbolTypeSizeEstimateInBits =
ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max);
21363 cSymbolTypeSizeEstimateInBits = 0;
21365 cSymbolTypeSizeEstimateInBits =
ZSTD_fseBitCost(fseCTable, countWksp, max);
21370 while (ctp < ctEnd) {
21371 if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp];
21372 else cSymbolTypeSizeEstimateInBits += *ctp;
21375 return cSymbolTypeSizeEstimateInBits >> 3;
21381 const BYTE* llCodeTable,
21382 const BYTE* mlCodeTable,
21386 void* workspace,
size_t wkspSize,
21389 size_t sequencesSectionHeaderSize = 1 + 1 + (nbSeq >= 128) + (nbSeq >=
LONGNBSEQ);
21390 size_t cSeqSizeEstimate = 0;
21394 workspace, wkspSize);
21398 workspace, wkspSize);
21402 workspace, wkspSize);
21404 return cSeqSizeEstimate + sequencesSectionHeaderSize;
21410 const BYTE* ofCodeTable,
21411 const BYTE* llCodeTable,
21412 const BYTE* mlCodeTable,
21416 void* workspace,
size_t wkspSize,
21417 int writeLitEntropy,
int writeSeqEntropy)
21421 workspace, wkspSize, writeLitEntropy);
21424 workspace, wkspSize, writeSeqEntropy);
21436 DEBUGLOG(6,
"ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize()");
21456 size_t literalsBytes = 0;
21459 for (i = 0; i < nbSeqs; ++i) {
21463 literalsBytes += 0x10000;
21465 return literalsBytes;
21471 size_t matchBytes = 0;
21474 for (i = 0; i < nbSeqs; ++i) {
21478 matchBytes += 0x10000;
21488 size_t startIdx,
size_t endIdx)
21490 *resultSeqStore = *originalSeqStore;
21491 if (startIdx > 0) {
21511 resultSeqStore->
lit = resultSeqStore->
litStart + literalsBytes;
21513 resultSeqStore->
llCode += startIdx;
21514 resultSeqStore->
mlCode += startIdx;
21515 resultSeqStore->
ofCode += startIdx;
21538 return rep[adjustedRepCode];
21560 for (; idx < nbSeq; ++idx) {
21562 U32 const ll0 = (seq->
litLength == 0) && (idx != longLitLenIdx);
21572 if (dRawOffset != cRawOffset) {
21593 void*
dst,
size_t dstCapacity,
21594 const void* src,
size_t srcSize,
21595 U32 lastBlock,
U32 isPartition)
21597 const U32 rleMaxLength = 25;
21605 DEBUGLOG(5,
"ZSTD_compressSeqStore_singleBlock");
21620 cSeqsSize < rleMaxLength &&
21635 if (cSeqsSize == 0) {
21638 DEBUGLOG(4,
"Writing out nocompress block, size: %zu", cSize);
21639 *dRep = dRepOriginal;
21640 }
else if (cSeqsSize == 1) {
21643 DEBUGLOG(4,
"Writing out RLE block, size: %zu", cSize);
21644 *dRep = dRepOriginal;
21649 DEBUGLOG(4,
"Writing out compressed block, size: %zu", cSize);
21660 U32* splitLocations;
21664 #define MIN_SEQUENCES_BLOCK_SPLITTING 300
21687 size_t estimatedOriginalSize;
21688 size_t estimatedFirstHalfSize;
21689 size_t estimatedSecondHalfSize;
21690 size_t midIdx = (startIdx + endIdx)/2;
21692 DEBUGLOG(5,
"ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu", startIdx, endIdx);
21693 assert(endIdx >= startIdx);
21695 DEBUGLOG(6,
"ZSTD_deriveBlockSplitsHelper: Too few sequences (%zu)", endIdx - startIdx);
21704 DEBUGLOG(5,
"Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu",
21705 estimatedOriginalSize, estimatedFirstHalfSize, estimatedSecondHalfSize);
21709 if (estimatedFirstHalfSize + estimatedSecondHalfSize < estimatedOriginalSize) {
21710 DEBUGLOG(5,
"split decided at seqNb:%zu", midIdx);
21729 DEBUGLOG(5,
"ZSTD_deriveBlockSplits: Too few sequences to split (%u <= 4)", nbSeq);
21735 DEBUGLOG(5,
"ZSTD_deriveBlockSplits: final nb partitions: %zu", splits.
idx+1);
21746 void*
dst,
size_t dstCapacity,
21747 const void* src,
size_t blockSize,
21748 U32 lastBlock,
U32 nbSeq)
21754 size_t srcBytesTotal = 0;
21780 DEBUGLOG(5,
"ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
21784 if (numSplits == 0) {
21785 size_t cSizeSingleBlock =
21791 FORWARD_IF_ERROR(cSizeSingleBlock,
"Compressing single block from splitBlock_internal() failed!");
21792 DEBUGLOG(5,
"ZSTD_compressBlock_splitBlock_internal: No splits");
21795 return cSizeSingleBlock;
21799 for (i = 0; i <= numSplits; ++i) {
21801 U32 const lastPartition = (i == numSplits);
21802 U32 lastBlockEntireSrc = 0;
21805 srcBytesTotal += srcBytes;
21806 if (lastPartition) {
21808 srcBytes += blockSize - srcBytesTotal;
21809 lastBlockEntireSrc = lastBlock;
21818 lastBlockEntireSrc, 1 );
21819 DEBUGLOG(5,
"Estimated size: %zu vs %zu : actual size",
21825 dstCapacity -= cSizeChunk;
21826 cSize += cSizeChunk;
21827 *currSeqStore = *nextSeqStore;
21839 void*
dst,
size_t dstCapacity,
21840 const void* src,
size_t srcSize,
U32 lastBlock)
21844 DEBUGLOG(4,
"ZSTD_compressBlock_splitBlock");
21854 DEBUGLOG(4,
"ZSTD_compressBlock_splitBlock: Nocompress block");
21867 void*
dst,
size_t dstCapacity,
21874 const U32 rleMaxLength = 25;
21878 DEBUGLOG(5,
"ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
21908 cSize < rleMaxLength &&
21930 void*
dst,
size_t dstCapacity,
21931 const void* src,
size_t srcSize,
21932 const size_t bss,
U32 lastBlock)
21934 DEBUGLOG(6,
"Attempting ZSTD_compressSuperBlock()");
21964 {
size_t const cSize =
21966 if (cSize !=
ERROR(dstSize_tooSmall)) {
21967 size_t const maxCSize =
21978 DEBUGLOG(6,
"Resorting to ZSTD_noCompressBlock()");
21986 void*
dst,
size_t dstCapacity,
21987 const void* src,
size_t srcSize,
21992 DEBUGLOG(5,
"ZSTD_compressBlock_targetCBlockSize (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u, srcSize=%zu)",
21997 FORWARD_IF_ERROR(cSize,
"ZSTD_compressBlock_targetCBlockSize_body failed");
22007 ZSTD_CCtx_params
const* params,
22011 U32 const cycleLog =
ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy);
22012 U32 const maxDist = (
U32)1 << params->cParams.windowLog;
22037 void*
dst,
size_t dstCapacity,
22038 const void* src,
size_t srcSize,
22039 U32 lastFrameChunk)
22050 DEBUGLOG(4,
"ZSTD_compress_frameChunk (blockSize=%u)", (
unsigned)blockSize);
22054 while (remaining) {
22056 U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);
22062 "not enough space to store compressed block");
22063 if (remaining < blockSize) blockSize = remaining;
22086 ip, blockSize, 1 );
22093 U32 const cBlockHeader = cSize == 1 ?
22094 lastBlock + (((
U32)
bt_rle)<<1) + (
U32)(blockSize << 3) :
22103 assert(remaining >= blockSize);
22104 remaining -= blockSize;
22106 assert(dstCapacity >= cSize);
22107 dstCapacity -= cSize;
22109 DEBUGLOG(5,
"ZSTD_compress_frameChunk: adding a block of size %u",
22114 return (
size_t)(
op-ostart);
22119 const ZSTD_CCtx_params* params,
U64 pledgedSrcSize,
U32 dictID)
22121 U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536);
22122 U32 const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength;
22123 U32 const checksumFlag = params->fParams.checksumFlag>0;
22124 U32 const windowSize = (
U32)1 << params->cParams.windowLog;
22125 U32 const singleSegment = params->fParams.contentSizeFlag && (windowSize >= pledgedSrcSize);
22127 U32 const fcsCode = params->fParams.contentSizeFlag ?
22128 (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0;
22129 BYTE const frameHeaderDescriptionByte = (
BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) );
22133 RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall,
22134 "dst buf is too small to fit worst-case frame header size.");
22135 DEBUGLOG(4,
"ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u",
22136 !params->fParams.noDictIDFlag, (
unsigned)dictID, (
unsigned)dictIDSizeCode);
22137 if (params->format == ZSTD_f_zstd1) {
22141 op[pos++] = frameHeaderDescriptionByte;
22142 if (!singleSegment)
op[pos++] = windowLogByte;
22143 switch(dictIDSizeCode)
22149 case 1 :
op[pos] = (
BYTE)(dictID); pos++;
break;
22158 case 0 :
if (singleSegment)
op[pos++] = (
BYTE)(pledgedSrcSize);
break;
22173 const void* src,
size_t srcSize,
unsigned magicVariant) {
22176 dstSize_tooSmall,
"Not enough room for skippable frame");
22177 RETURN_ERROR_IF(
srcSize > (
unsigned)0xFFFFFFFF, srcSize_wrong,
"Src size too large for skippable frame");
22178 RETURN_ERROR_IF(magicVariant > 15, parameter_outOfBound,
"Skippable frame magic number variant not supported");
22183 return srcSize + ZSTD_SKIPPABLEHEADERSIZE;
22194 "dst buf is too small to write frame trailer empty block.");
22204 "wrong cctx stage");
22206 parameter_unsupported,
22207 "incompatible with ldm");
22218 void*
dst,
size_t dstCapacity,
22219 const void* src,
size_t srcSize,
22220 U32 frame,
U32 lastFrameChunk)
22225 DEBUGLOG(5,
"ZSTD_compressContinue_internal, stage: %u, srcSize: %u",
22228 "missing init (ZSTD_compressBegin)");
22234 assert(fhSize <= dstCapacity);
22235 dstCapacity -= fhSize;
22236 dst = (
char*)
dst + fhSize;
22246 if (cctx->
appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) {
22257 DEBUGLOG(5,
"ZSTD_compressContinue_internal (blockSize=%u)", (
unsigned)cctx->
blockSize);
22258 {
size_t const cSize = frame ?
22261 FORWARD_IF_ERROR(cSize,
"%s", frame ?
"ZSTD_compress_frameChunk failed" :
"ZSTD_compressBlock_internal failed");
22270 "error : pledgedSrcSize = %u, while realSrcSize >= %u",
22274 return cSize + fhSize;
22279 void*
dst,
size_t dstCapacity,
22280 const void* src,
size_t srcSize)
22282 DEBUGLOG(5,
"ZSTD_compressContinue (srcSize=%u)", (
unsigned)
srcSize);
22289 const void* src,
size_t srcSize)
22298 return MIN(cctx->
appliedParams.maxBlockSize, (
size_t)1 << cParams.windowLog);
22329 ZSTD_CCtx_params
const* params,
22330 const void* src,
size_t srcSize,
22336 int const loadLdmDict = params->ldmParams.enableLdm == ZSTD_ps_enable && ls != NULL;
22357 maxDictSize =
MIN(maxDictSize, shortCacheMaxDictSize);
22363 ip = iend - maxDictSize;
22376 DEBUGLOG(4,
"ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (
int)params->useRowMatchFinder);
22386 U32 maxDictSize = 8U <<
MIN(
MAX(params->cParams.hashLog, params->cParams.chainLog), 28);
22388 ip = iend - maxDictSize;
22402 switch(params->cParams.strategy)
22419 assert(params->useRowMatchFinder != ZSTD_ps_auto);
22420 if (params->useRowMatchFinder == ZSTD_ps_enable) {
22421 size_t const tagTableSize = ((
size_t)1 << params->cParams.hashLog);
22424 DEBUGLOG(4,
"Using row-based hash table for lazy dict");
22427 DEBUGLOG(4,
"Using chain-based hash table for lazy dict");
22456 if (dictMaxSymbolValue < maxSymbolValue) {
22459 for (
s = 0;
s <= maxSymbolValue; ++
s) {
22460 if (normalizedCounter[
s] == 0) {
22468 const void*
const dict,
size_t dictSize)
22470 short offcodeNCount[
MaxOff+1];
22471 unsigned offcodeMaxValue =
MaxOff;
22472 const BYTE* dictPtr = (
const BYTE*)dict;
22473 const BYTE*
const dictEnd = dictPtr + dictSize;
22477 {
unsigned maxSymbolValue = 255;
22478 unsigned hasZeroWeights = 1;
22480 dictEnd-dictPtr, &hasZeroWeights);
22484 if (!hasZeroWeights)
22489 dictPtr += hufHeaderSize;
22492 {
unsigned offcodeLog;
22493 size_t const offcodeHeaderSize =
FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
22499 offcodeNCount,
MaxOff, offcodeLog,
22501 dictionary_corrupted,
"");
22503 dictPtr += offcodeHeaderSize;
22506 {
short matchlengthNCount[
MaxML+1];
22507 unsigned matchlengthMaxValue =
MaxML, matchlengthLog;
22508 size_t const matchlengthHeaderSize =
FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
22513 matchlengthNCount, matchlengthMaxValue, matchlengthLog,
22515 dictionary_corrupted,
"");
22517 dictPtr += matchlengthHeaderSize;
22520 {
short litlengthNCount[
MaxLL+1];
22521 unsigned litlengthMaxValue =
MaxLL, litlengthLog;
22522 size_t const litlengthHeaderSize =
FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
22527 litlengthNCount, litlengthMaxValue, litlengthLog,
22529 dictionary_corrupted,
"");
22531 dictPtr += litlengthHeaderSize;
22540 {
size_t const dictContentSize = (
size_t)(dictEnd - dictPtr);
22542 if (dictContentSize <= ((
U32)-1) - 128
KB) {
22543 U32 const maxOffset = (
U32)dictContentSize + 128
KB;
22551 for (u=0; u<3; u++) {
22556 return dictPtr - (
const BYTE*)dict;
22571 ZSTD_CCtx_params
const* params,
22572 const void* dict,
size_t dictSize,
22577 const BYTE* dictPtr = (
const BYTE*)dict;
22578 const BYTE*
const dictEnd = dictPtr + dictSize;
22585 dictID = params->fParams.noDictIDFlag ? 0 :
MEM_readLE32(dictPtr + 4 );
22591 size_t const dictContentSize = (
size_t)(dictEnd - dictPtr);
22593 ms, NULL,
ws, params, dictPtr, dictContentSize, dtlm, tfp),
"");
22605 const ZSTD_CCtx_params* params,
22606 const void* dict,
size_t dictSize,
22607 ZSTD_dictContentType_e dictContentType,
22612 DEBUGLOG(4,
"ZSTD_compress_insertDictionary (dictSize=%u)", (
U32)dictSize);
22613 if ((dict==NULL) || (dictSize<8)) {
22614 RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong,
"");
22621 if (dictContentType == ZSTD_dct_rawContent)
22625 if (dictContentType == ZSTD_dct_auto) {
22626 DEBUGLOG(4,
"raw content dictionary detected");
22628 ms, ls,
ws, params, dict, dictSize, dtlm, tfp);
22630 RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong,
"");
22636 bs, ms,
ws, params, dict, dictSize, dtlm, tfp, workspace);
22639 #define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB)
22640 #define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6ULL)
22646 const void* dict,
size_t dictSize,
22647 ZSTD_dictContentType_e dictContentType,
22650 const ZSTD_CCtx_params* params,
U64 pledgedSrcSize,
22653 size_t const dictContentSize = cdict ? cdict->
dictContentSize : dictSize;
22655 cctx->traceCtx = (ZSTD_trace_compress_begin != NULL) ? ZSTD_trace_compress_begin(cctx) : 0;
22657 DEBUGLOG(4,
"ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog);
22660 assert(!((dict) && (cdict)));
22667 && (params->attachDictPref != ZSTD_dictForceLoad) ) {
22674 {
size_t const dictID = cdict ?
22685 assert(dictID <= UINT_MAX);
22693 const void* dict,
size_t dictSize,
22694 ZSTD_dictContentType_e dictContentType,
22697 const ZSTD_CCtx_params* params,
22698 unsigned long long pledgedSrcSize)
22700 DEBUGLOG(4,
"ZSTD_compressBegin_advanced_internal: wlog=%u", params->cParams.windowLog);
22704 dict, dictSize, dictContentType, dtlm,
22706 params, pledgedSrcSize,
22713 const void* dict,
size_t dictSize,
22714 ZSTD_parameters params,
unsigned long long pledgedSrcSize)
22716 ZSTD_CCtx_params cctxParams;
22721 &cctxParams, pledgedSrcSize);
22727 ZSTD_CCtx_params cctxParams;
22731 DEBUGLOG(4,
"ZSTD_compressBegin_usingDict (dictSize=%u)", (
unsigned)dictSize);
22757 DEBUGLOG(4,
"ZSTD_writeEpilogue");
22764 dstCapacity -= fhSize;
22771 U32 const cBlockHeader24 = 1 + (((
U32)
bt_raw)<<1) + 0;
22772 RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall,
"no room for epilogue");
22780 RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall,
"no room for checksum");
22781 DEBUGLOG(4,
"ZSTD_writeEpilogue: write checksum : %08X", (
unsigned)checksum);
22793 if (cctx->traceCtx && ZSTD_trace_compress_end != NULL) {
22798 trace.streaming = streaming;
22799 trace.dictionaryID = cctx->
dictID;
22805 ZSTD_trace_compress_end(cctx->traceCtx, &trace);
22807 cctx->traceCtx = 0;
22815 void*
dst,
size_t dstCapacity,
22816 const void* src,
size_t srcSize)
22828 DEBUGLOG(4,
"end of frame : controlling src size");
22832 "error : pledgedSrcSize = %u, while realSrcSize = %u",
22837 return cSize + endResult;
22843 const void* src,
size_t srcSize)
22849 void*
dst,
size_t dstCapacity,
22850 const void* src,
size_t srcSize,
22851 const void* dict,
size_t dictSize,
22852 ZSTD_parameters params)
22854 DEBUGLOG(4,
"ZSTD_compress_advanced");
22867 void*
dst,
size_t dstCapacity,
22868 const void* src,
size_t srcSize,
22869 const void* dict,
size_t dictSize,
22870 const ZSTD_CCtx_params* params)
22872 DEBUGLOG(4,
"ZSTD_compress_advanced_internal (srcSize:%u)", (
unsigned)
srcSize);
22880 void*
dst,
size_t dstCapacity,
22881 const void* src,
size_t srcSize,
22882 const void* dict,
size_t dictSize,
22887 assert(params.fParams.contentSizeFlag == 1);
22890 DEBUGLOG(4,
"ZSTD_compress_usingDict (srcSize=%u)", (
unsigned)
srcSize);
22895 void*
dst,
size_t dstCapacity,
22896 const void* src,
size_t srcSize,
22905 const void* src,
size_t srcSize,
22909 #if ZSTD_COMPRESS_HEAPMODE
22929 size_t dictSize, ZSTD_compressionParameters cParams,
22930 ZSTD_dictLoadMethod_e dictLoadMethod)
22939 + (dictLoadMethod == ZSTD_dlm_byRef ? 0
22951 if (cdict==NULL)
return 0;
22952 DEBUGLOG(5,
"sizeof(*cdict) : %u", (
unsigned)
sizeof(*cdict));
22960 const void* dictBuffer,
size_t dictSize,
22961 ZSTD_dictLoadMethod_e dictLoadMethod,
22962 ZSTD_dictContentType_e dictContentType,
22963 ZSTD_CCtx_params params)
22965 DEBUGLOG(3,
"ZSTD_initCDict_internal (dictContentType:%u)", (
unsigned)dictContentType);
22969 if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) {
22973 RETURN_ERROR_IF(!internalBuffer, memory_allocation,
"NULL pointer!");
22975 ZSTD_memcpy(internalBuffer, dictBuffer, dictSize);
22989 params.useRowMatchFinder,
22997 params.fParams.contentSizeFlag = 1;
23012 ZSTD_dictLoadMethod_e dictLoadMethod,
23013 ZSTD_compressionParameters cParams,
23014 ZSTD_paramSwitch_e useRowMatchFinder,
23015 U32 enableDedicatedDictSearch,
23016 ZSTD_customMem customMem)
23018 if ((!customMem.customAlloc) ^ (!customMem.customFree))
return NULL;
23020 {
size_t const workspaceSize =
23024 (dictLoadMethod == ZSTD_dlm_byRef ? 0
23048 ZSTD_dictLoadMethod_e dictLoadMethod,
23049 ZSTD_dictContentType_e dictContentType,
23050 ZSTD_compressionParameters cParams,
23051 ZSTD_customMem customMem)
23053 ZSTD_CCtx_params cctxParams;
23056 cctxParams.cParams = cParams;
23057 cctxParams.customMem = customMem;
23059 dictBuffer, dictSize,
23060 dictLoadMethod, dictContentType,
23061 &cctxParams, customMem);
23065 const void* dict,
size_t dictSize,
23066 ZSTD_dictLoadMethod_e dictLoadMethod,
23067 ZSTD_dictContentType_e dictContentType,
23068 const ZSTD_CCtx_params* originalCctxParams,
23069 ZSTD_customMem customMem)
23071 ZSTD_CCtx_params cctxParams = *originalCctxParams;
23072 ZSTD_compressionParameters cParams;
23075 DEBUGLOG(3,
"ZSTD_createCDict_advanced2, mode %u", (
unsigned)dictContentType);
23076 if (!customMem.customAlloc ^ !customMem.customFree)
return NULL;
23078 if (cctxParams.enableDedicatedDictSearch) {
23080 cctxParams.compressionLevel, dictSize);
23089 cctxParams.enableDedicatedDictSearch = 0;
23094 DEBUGLOG(3,
"ZSTD_createCDict_advanced2: DDS: %u", cctxParams.enableDedicatedDictSearch);
23095 cctxParams.cParams = cParams;
23099 dictLoadMethod, cctxParams.cParams,
23100 cctxParams.useRowMatchFinder, cctxParams.enableDedicatedDictSearch,
23105 dictLoadMethod, dictContentType,
23118 ZSTD_dlm_byCopy, ZSTD_dct_auto,
23119 cParams, ZSTD_defaultCMem);
23129 ZSTD_dlm_byRef, ZSTD_dct_auto,
23130 cParams, ZSTD_defaultCMem);
23138 if (cdict==NULL)
return 0;
23139 { ZSTD_customMem
const cMem = cdict->
customMem;
23142 if (!cdictInWorkspace) {
23163 void* workspace,
size_t workspaceSize,
23164 const void* dict,
size_t dictSize,
23165 ZSTD_dictLoadMethod_e dictLoadMethod,
23166 ZSTD_dictContentType_e dictContentType,
23167 ZSTD_compressionParameters cParams)
23173 + (dictLoadMethod == ZSTD_dlm_byRef ? 0
23178 ZSTD_CCtx_params params;
23180 if ((
size_t)workspace & 7)
return NULL;
23186 if (cdict == NULL)
return NULL;
23190 DEBUGLOG(4,
"(workspaceSize < neededSize) : (%u < %u) => %u",
23191 (
unsigned)workspaceSize, (
unsigned)neededSize, (
unsigned)(workspaceSize < neededSize));
23192 if (workspaceSize < neededSize)
return NULL;
23195 params.cParams = cParams;
23196 params.useRowMatchFinder = useRowMatchFinder;
23202 dictLoadMethod, dictContentType,
23221 if (cdict==NULL)
return 0;
23230 ZSTD_frameParameters
const fParams,
unsigned long long const pledgedSrcSize)
23232 ZSTD_CCtx_params cctxParams;
23233 DEBUGLOG(4,
"ZSTD_compressBegin_usingCDict_internal");
23237 ZSTD_parameters params;
23238 params.fParams = fParams;
23254 U32 const limitedSrcSize = (
U32)
MIN(pledgedSrcSize, 1U << 19);
23255 U32 const limitedSrcLog = limitedSrcSize > 1 ?
ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1;
23256 cctxParams.cParams.windowLog =
MAX(cctxParams.cParams.windowLog, limitedSrcLog);
23261 &cctxParams, pledgedSrcSize,
23271 ZSTD_frameParameters
const fParams,
unsigned long long const pledgedSrcSize)
23280 ZSTD_frameParameters
const fParams = { 0 , 0 , 0 };
23293 void*
dst,
size_t dstCapacity,
23294 const void* src,
size_t srcSize,
23295 const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
23305 void*
dst,
size_t dstCapacity,
23306 const void* src,
size_t srcSize,
23307 const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
23318 void*
dst,
size_t dstCapacity,
23319 const void* src,
size_t srcSize,
23322 ZSTD_frameParameters
const fParams = { 1 , 0 , 0 };
23334 DEBUGLOG(3,
"ZSTD_createCStream");
23381 DEBUGLOG(4,
"ZSTD_resetCStream: pledgedSrcSize = %u", (
unsigned)pledgedSrcSize);
23392 const void* dict,
size_t dictSize,
const ZSTD_CDict* cdict,
23393 const ZSTD_CCtx_params* params,
23394 unsigned long long pledgedSrcSize)
23396 DEBUGLOG(4,
"ZSTD_initCStream_internal");
23401 assert(!((dict) && (cdict)));
23415 ZSTD_frameParameters fParams,
23416 unsigned long long pledgedSrcSize)
23418 DEBUGLOG(4,
"ZSTD_initCStream_usingCDict_advanced");
23429 DEBUGLOG(4,
"ZSTD_initCStream_usingCDict");
23441 const void* dict,
size_t dictSize,
23442 ZSTD_parameters params,
unsigned long long pss)
23449 DEBUGLOG(4,
"ZSTD_initCStream_advanced");
23460 DEBUGLOG(4,
"ZSTD_initCStream_usingDict");
23474 DEBUGLOG(4,
"ZSTD_initCStream_srcSize");
23500 if (hintInSize==0) hintInSize = cctx->
blockSize;
23513 const char*
const istart = (
assert(input != NULL), (
const char*)input->
src);
23514 const char*
const iend = (istart != NULL) ? istart + input->
size : istart;
23515 const char*
ip = (istart != NULL) ? istart + input->
pos : istart;
23517 char*
const oend = (ostart != NULL) ? ostart +
output->size : ostart;
23518 char*
op = (ostart != NULL) ? ostart +
output->pos : ostart;
23519 U32 someMoreWork = 1;
23522 DEBUGLOG(5,
"ZSTD_compressStream_generic, flush=%i, srcSize = %zu", (
int)flushMode, input->
size - input->
pos);
23544 while (someMoreWork) {
23548 RETURN_ERROR(init_missing,
"call ZSTD_initCStream() first!");
23558 DEBUGLOG(4,
"ZSTD_compressEnd : cSize=%u", (
unsigned)cSize);
23564 someMoreWork = 0;
break;
23573 if (
ip)
ip += loaded;
23577 someMoreWork = 0;
break;
23582 someMoreWork = 0;
break;
23591 someMoreWork = 0;
break;
23594 && (
ip == iend) ) {
23596 someMoreWork = 0;
break;
23600 DEBUGLOG(5,
"stream compression stage (flushMode==%u)", flushMode);
23604 size_t oSize = oend-
op;
23611 if (inputBuffered) {
23612 unsigned const lastBlock = (flushMode ==
ZSTD_e_end) && (
ip==iend);
23613 cSize = lastBlock ?
23617 zcs->inBuff + zcs->inToCompress, iSize);
23618 FORWARD_IF_ERROR(cSize,
"%s", lastBlock ?
"ZSTD_compressEnd failed" :
"ZSTD_compressContinue failed");
23624 DEBUGLOG(5,
"inBuffTarget:%u / inBuffSize:%u",
23630 unsigned const lastBlock = (flushMode ==
ZSTD_e_end) && (
ip + iSize == iend);
23631 cSize = lastBlock ?
23635 if (
ip)
ip += iSize;
23636 FORWARD_IF_ERROR(cSize,
"%s", lastBlock ?
"ZSTD_compressEnd failed" :
"ZSTD_compressContinue failed");
23638 if (lastBlock)
assert(
ip == iend);
23643 DEBUGLOG(5,
"Frame completed directly in outBuffer");
23660 DEBUGLOG(5,
"toFlush: %u into %u ==> flushed: %u",
23661 (
unsigned)toFlush, (
unsigned)(oend-
op), (
unsigned)flushed);
23665 if (toFlush!=flushed) {
23673 DEBUGLOG(5,
"Frame completed on flush");
23695 #ifdef ZSTD_MULTITHREAD
23717 DEBUGLOG(5,
"ZSTD_setBufferExpectations (for advanced stable in/out modes)");
23737 RETURN_ERROR(stabilityCondition_notRespected,
"ZSTD_c_stableInBuffer enabled but input differs!");
23741 size_t const outBufferSize =
output->size -
output->pos;
23743 RETURN_ERROR(stabilityCondition_notRespected,
"ZSTD_c_stableOutBuffer enabled but output size differs!");
23764 DEBUGLOG(4,
"ZSTD_compressStream2 : transparent init stage");
23767 {
size_t const dictSize = prefixDict.
dict
23783 #ifdef ZSTD_MULTITHREAD
23786 params.useSequenceProducer == 1 && params.nbWorkers >= 1,
23787 parameter_combination_unsupported,
23788 "External sequence producer isn't supported with nbWorkers >= 1"
23792 params.nbWorkers = 0;
23794 if (params.nbWorkers > 0) {
23796 cctx->traceCtx = (ZSTD_trace_compress_begin != NULL) ? ZSTD_trace_compress_begin(cctx) : 0;
23799 if (cctx->
mtctx == NULL) {
23800 DEBUGLOG(4,
"ZSTD_compressStream2: creating new mtctx for nbWorkers=%u",
23806 DEBUGLOG(4,
"call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers);
23824 ¶ms, pledgedSrcSize,
23851 DEBUGLOG(5,
"ZSTD_compressStream2, endOp=%u ", (
unsigned)endOp);
23886 #ifdef ZSTD_MULTITHREAD
23901 size_t const ipos = input->
pos;
23902 size_t const opos =
output->pos;
23907 || (endOp ==
ZSTD_e_end && flushMin == 0) ) {
23930 DEBUGLOG(5,
"completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic");
23940 DEBUGLOG(5,
"completed ZSTD_compressStream2");
23947 void*
dst,
size_t dstCapacity,
size_t* dstPos,
23948 const void* src,
size_t srcSize,
size_t* srcPos,
23954 output.size = dstCapacity;
23958 input.
pos = *srcPos;
23962 *srcPos = input.
pos;
23968 void*
dst,
size_t dstCapacity,
23969 const void* src,
size_t srcSize)
23981 dst, dstCapacity, &oPos,
23990 assert(oPos == dstCapacity);
24004 size_t posInSrc,
U32 windowLog,
size_t dictSize,
int useSequenceProducer)
24006 U32 const windowSize = 1u << windowLog;
24012 size_t const offsetBound = posInSrc > windowSize ? (
size_t)windowSize : posInSrc + (
size_t)dictSize;
24013 size_t const matchLenLowerBound = (minMatch == 3 || useSequenceProducer) ? 3 : 4;
24016 RETURN_ERROR_IF(matchLength < matchLenLowerBound, externalSequences_invalid,
"Matchlength too small for the minMatch");
24025 if (!ll0 && rawOffset == rep[0]) {
24027 }
else if (rawOffset == rep[1]) {
24029 }
else if (rawOffset == rep[2]) {
24031 }
else if (ll0 && rawOffset == rep[0] - 1) {
24040 const ZSTD_Sequence*
const inSeqs,
size_t inSeqsSize,
24041 const void* src,
size_t blockSize,
24042 ZSTD_paramSwitch_e externalRepSearch)
24045 U32 const startIdx = idx;
24047 const BYTE*
const iend =
ip + blockSize;
24051 DEBUGLOG(5,
"ZSTD_copySequencesToSeqStoreExplicitBlockDelim (blockSize = %zu)", blockSize);
24061 for (; idx < inSeqsSize && (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0); ++idx) {
24062 U32 const litLength = inSeqs[idx].litLength;
24063 U32 const matchLength = inSeqs[idx].matchLength;
24066 if (externalRepSearch == ZSTD_ps_disable) {
24069 U32 const ll0 = (litLength == 0);
24074 DEBUGLOG(6,
"Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength);
24076 seqPos->
posInSrc += litLength + matchLength;
24079 "Sequence validation failed");
24082 "Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
24084 ip += matchLength + litLength;
24088 assert(externalRepSearch != ZSTD_ps_auto);
24089 assert(idx >= startIdx);
24090 if (externalRepSearch == ZSTD_ps_disable && idx != startIdx) {
24091 U32*
const rep = updatedRepcodes.
rep;
24092 U32 lastSeqIdx = idx - 1;
24094 if (lastSeqIdx >= startIdx + 2) {
24095 rep[2] = inSeqs[lastSeqIdx - 2].offset;
24096 rep[1] = inSeqs[lastSeqIdx - 1].offset;
24097 rep[0] = inSeqs[lastSeqIdx].offset;
24098 }
else if (lastSeqIdx == startIdx + 1) {
24100 rep[1] = inSeqs[lastSeqIdx - 1].offset;
24101 rep[0] = inSeqs[lastSeqIdx].offset;
24103 assert(lastSeqIdx == startIdx);
24106 rep[0] = inSeqs[lastSeqIdx].offset;
24112 if (inSeqs[idx].litLength) {
24113 DEBUGLOG(6,
"Storing last literals of size: %u", inSeqs[idx].litLength);
24115 ip += inSeqs[idx].litLength;
24116 seqPos->
posInSrc += inSeqs[idx].litLength;
24118 RETURN_ERROR_IF(
ip != iend, externalSequences_invalid,
"Blocksize doesn't agree with block delimiter!");
24119 seqPos->
idx = idx+1;
24125 const ZSTD_Sequence*
const inSeqs,
size_t inSeqsSize,
24126 const void* src,
size_t blockSize, ZSTD_paramSwitch_e externalRepSearch)
24133 BYTE const* iend =
ip + blockSize;
24135 U32 bytesAdjustment = 0;
24136 U32 finalMatchSplit = 0;
24139 (
void)externalRepSearch;
24148 DEBUGLOG(5,
"ZSTD_copySequencesToSeqStoreNoBlockDelim: idx: %u PIS: %u blockSize: %zu", idx, startPosInSequence, blockSize);
24149 DEBUGLOG(5,
"Start seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
24151 while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) {
24152 const ZSTD_Sequence currSeq = inSeqs[idx];
24153 U32 litLength = currSeq.litLength;
24154 U32 matchLength = currSeq.matchLength;
24155 U32 const rawOffset = currSeq.offset;
24159 if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) {
24160 if (startPosInSequence >= litLength) {
24161 startPosInSequence -= litLength;
24163 matchLength -= startPosInSequence;
24165 litLength -= startPosInSequence;
24168 endPosInSequence -= currSeq.litLength + currSeq.matchLength;
24169 startPosInSequence = 0;
24173 DEBUGLOG(6,
"Require a split: diff: %u, idx: %u PIS: %u",
24174 currSeq.litLength + currSeq.matchLength - endPosInSequence, idx, endPosInSequence);
24175 if (endPosInSequence > litLength) {
24176 U32 firstHalfMatchLength;
24177 litLength = startPosInSequence >= litLength ? 0 : litLength - startPosInSequence;
24178 firstHalfMatchLength = endPosInSequence - startPosInSequence - litLength;
24179 if (matchLength > blockSize && firstHalfMatchLength >= cctx->
appliedParams.cParams.minMatch) {
24181 U32 secondHalfMatchLength = currSeq.matchLength + currSeq.litLength - endPosInSequence;
24182 if (secondHalfMatchLength < cctx->appliedParams.cParams.minMatch) {
24184 endPosInSequence -= cctx->
appliedParams.cParams.minMatch - secondHalfMatchLength;
24185 bytesAdjustment = cctx->
appliedParams.cParams.minMatch - secondHalfMatchLength;
24186 firstHalfMatchLength -= bytesAdjustment;
24188 matchLength = firstHalfMatchLength;
24191 finalMatchSplit = 1;
24198 bytesAdjustment = endPosInSequence - currSeq.litLength;
24199 endPosInSequence = currSeq.litLength;
24208 {
U32 const ll0 = (litLength == 0);
24214 seqPos->
posInSrc += litLength + matchLength;
24217 "Sequence validation failed");
24219 DEBUGLOG(6,
"Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength);
24221 "Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
24223 ip += matchLength + litLength;
24224 if (!finalMatchSplit)
24227 DEBUGLOG(5,
"Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
24228 assert(idx == inSeqsSize || endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength);
24233 iend -= bytesAdjustment;
24236 U32 lastLLSize = (
U32)(iend -
ip);
24238 DEBUGLOG(6,
"Storing last literals of size: %u", lastLLSize);
24243 return bytesAdjustment;
24247 const ZSTD_Sequence*
const inSeqs,
size_t inSeqsSize,
24248 const void* src,
size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
24253 if (
mode == ZSTD_sf_explicitBlockDelimiters) {
24255 }
else if (mode == ZSTD_sf_noBlockDelimiters) {
24258 assert(sequenceCopier != NULL);
24259 return sequenceCopier;
24270 size_t blockSize = 0;
24271 size_t spos = seqPos.
idx;
24272 DEBUGLOG(6,
"blockSize_explicitDelimiter : seq %zu / %zu", spos, inSeqsSize);
24273 assert(spos <= inSeqsSize);
24274 while (spos < inSeqsSize) {
24275 end = (inSeqs[spos].offset == 0);
24276 blockSize += inSeqs[spos].litLength + inSeqs[spos].matchLength;
24278 if (inSeqs[spos].matchLength != 0)
24279 RETURN_ERROR(externalSequences_invalid,
"delimiter format error : both matchlength and offset must be == 0");
24285 RETURN_ERROR(externalSequences_invalid,
"Reached end of sequences without finding a block delimiter");
24292 int const lastBlock = (remaining <= blockSize);
24293 return lastBlock ? remaining : blockSize;
24297 size_t blockSize,
size_t remaining,
24300 DEBUGLOG(6,
"determine_blockSize : remainingSize = %zu", remaining);
24301 if (mode == ZSTD_sf_noBlockDelimiters)
24304 FORWARD_IF_ERROR(explicitBlockSize,
"Error while determining block size with explicit delimiters");
24305 if (explicitBlockSize > blockSize)
24306 RETURN_ERROR(externalSequences_invalid,
"sequences incorrectly define a too large block");
24307 if (explicitBlockSize > remaining)
24308 RETURN_ERROR(externalSequences_invalid,
"sequences define a frame longer than source");
24309 return explicitBlockSize;
24320 void*
dst,
size_t dstCapacity,
24321 const ZSTD_Sequence* inSeqs,
size_t inSeqsSize,
24322 const void* src,
size_t srcSize)
24332 DEBUGLOG(4,
"ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu",
srcSize, inSeqsSize);
24334 if (remaining == 0) {
24336 RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall,
"No room for empty frame block header");
24343 while (remaining) {
24344 size_t compressedSeqsSize;
24346 size_t additionalByteAdjustment;
24349 inSeqs, inSeqsSize, seqPos);
24350 U32 const lastBlock = (blockSize == remaining);
24351 FORWARD_IF_ERROR(blockSize,
"Error while trying to determine block size");
24352 assert(blockSize <= remaining);
24354 DEBUGLOG(5,
"Working on new block. Blocksize: %zu (total:%zu)", blockSize, (
ip - (
const BYTE*)src) + blockSize);
24356 additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize,
ip, blockSize, cctx->
appliedParams.searchForExternalRepcodes);
24358 blockSize -= additionalByteAdjustment;
24366 DEBUGLOG(5,
"Block too small, writing out nocompress block: cSize: %zu", cBlockSize);
24367 cSize += cBlockSize;
24370 remaining -= blockSize;
24371 dstCapacity -= cBlockSize;
24383 FORWARD_IF_ERROR(compressedSeqsSize,
"Compressing sequences of block failed");
24384 DEBUGLOG(5,
"Compressed sequences size: %zu", compressedSeqsSize);
24393 compressedSeqsSize = 1;
24396 if (compressedSeqsSize == 0) {
24400 DEBUGLOG(5,
"Writing out nocompress block, size: %zu", cBlockSize);
24401 }
else if (compressedSeqsSize == 1) {
24404 DEBUGLOG(5,
"Writing out RLE block, size: %zu", cBlockSize);
24416 DEBUGLOG(5,
"Writing out compressed block, size: %zu", cBlockSize);
24419 cSize += cBlockSize;
24426 remaining -= blockSize;
24427 dstCapacity -= cBlockSize;
24430 DEBUGLOG(5,
"cSize running total: %zu (remaining dstCapacity=%zu)", cSize, dstCapacity);
24433 DEBUGLOG(4,
"cSize final total: %zu", cSize);
24438 void*
dst,
size_t dstCapacity,
24439 const ZSTD_Sequence* inSeqs,
size_t inSeqsSize,
24440 const void* src,
size_t srcSize)
24444 size_t compressedBlocksSize = 0;
24445 size_t frameHeaderSize = 0;
24448 DEBUGLOG(4,
"ZSTD_compressSequences (dstCapacity=%zu)", dstCapacity);
24453 op += frameHeaderSize;
24454 dstCapacity -= frameHeaderSize;
24455 cSize += frameHeaderSize;
24462 inSeqs, inSeqsSize,
24465 cSize += compressedBlocksSize;
24466 dstCapacity -= compressedBlocksSize;
24470 RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall,
"no room for checksum");
24471 DEBUGLOG(4,
"Write checksum : %08X", (
unsigned)checksum);
24503 FORWARD_IF_ERROR(remainingToFlush ,
"ZSTD_compressStream2(,,ZSTD_e_end) failed");
24504 if (zcs->
appliedParams.nbWorkers > 0)
return remainingToFlush;
24508 size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize;
24509 DEBUGLOG(4,
"ZSTD_endStream : remaining to flush : %u", (
unsigned)toFlush);
24527 #ifndef ZSTD_CLEVELS_H
24528 #define ZSTD_CLEVELS_H
24530 #define ZSTD_STATIC_LINKING_ONLY
24535 #define ZSTD_MAX_CLEVEL 22
24538 __attribute__((__unused__))
24654 int ZSTD_minCLevel(
void) {
return (
int)-ZSTD_TARGETLENGTH_MAX; }
24660 switch (cParams.strategy) {
24679 ZSTD_compressionParameters
const* cParams)
24683 && (cParams->hashLog > cParams->chainLog)
24684 && (cParams->chainLog <= 24);
24693 ZSTD_compressionParameters* cParams) {
24694 switch (cParams->strategy) {
24702 if (cParams->hashLog < ZSTD_HASHLOG_MIN) {
24703 cParams->hashLog = ZSTD_HASHLOG_MIN;
24729 size_t const addedSize = unknown && dictSize > 0 ? 500 : 0;
24742 U32 const tableID = (rSize <= 256
KB) + (rSize <= 128
KB) + (rSize <= 16
KB);
24753 DEBUGLOG(5,
"ZSTD_getCParams_internal selected tableID: %u row: %u strat: %u", tableID, row, (
U32)cp.strategy);
24757 cp.targetLength = (unsigned)(-clampedCompressionLevel);
24778 ZSTD_parameters params;
24782 params.cParams = cParams;
24783 params.fParams.contentSizeFlag = 1;
24798 ZSTD_sequenceProducer_F* mFinder
24800 if (mFinder != NULL) {
24831 const ZSTD_compressionParameters*
const cParams = &ms->
cParams;
24834 U32 const mls = cParams->minMatch;
24840 const U32 fastHashFillStep = 3;
24846 for (;
ip + fastHashFillStep - 1 <= iend;
ip += fastHashFillStep) {
24847 U32 const curr = (
U32)(
ip - base);
24849 for (i = 0; i < fastHashFillStep; ++i) {
24867 const ZSTD_compressionParameters*
const cParams = &ms->
cParams;
24869 U32 const hBitsL = cParams->hashLog;
24870 U32 const mls = cParams->minMatch;
24872 U32 const hBitsS = cParams->chainLog;
24876 const U32 fastHashFillStep = 3;
24882 for (;
ip + fastHashFillStep - 1 <= iend;
ip += fastHashFillStep) {
24885 for (i = 0; i < fastHashFillStep; ++i) {
24889 hashSmall[smHash] = curr + i;
24890 if (i == 0 || hashLarge[lgHash] == 0)
24891 hashLarge[lgHash] = curr + i;
24899 const void*
const end,
24914 void const* src,
size_t srcSize,
U32 const mls )
24916 ZSTD_compressionParameters
const* cParams = &ms->
cParams;
24918 const U32 hBitsL = cParams->hashLog;
24920 const U32 hBitsS = cParams->chainLog;
24922 const BYTE*
const istart = (
const BYTE*)src;
24927 const BYTE*
const prefixLowest =
base + prefixLowestIndex;
24930 U32 offset_1=rep[0], offset_2=rep[1];
24931 U32 offsetSaved1 = 0, offsetSaved2 = 0;
24940 const BYTE* nextStep;
24949 const BYTE* matchl0;
24950 const BYTE* matchs0;
24951 const BYTE* matchl1;
24953 const BYTE*
ip = istart;
24956 DEBUGLOG(5,
"ZSTD_compressBlock_doubleFast_noDict_generic");
24959 ip += ((
ip - prefixLowest) == 0);
24961 U32 const current = (
U32)(
ip - base);
24963 U32 const maxRep = current - windowLow;
24964 if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0;
24965 if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0;
24971 nextStep =
ip + kStepIncr;
24974 if (ip1 > ilimit) {
24979 idxl0 = hashLong[hl0];
24980 matchl0 =
base + idxl0;
24985 const U32 idxs0 = hashSmall[hs0];
24986 curr = (
U32)(
ip-base);
24987 matchs0 =
base + idxs0;
24989 hashLong[hl0] = hashSmall[hs0] = curr;
24996 goto _match_stored;
25001 if (idxl0 > prefixLowestIndex) {
25005 offset = (
U32)(
ip-matchl0);
25006 while (((
ip>
anchor) & (matchl0>prefixLowest)) && (
ip[-1] == matchl0[-1])) {
ip--; matchl0--; mLength++; }
25011 idxl1 = hashLong[hl1];
25012 matchl1 =
base + idxl1;
25014 if (idxs0 > prefixLowestIndex) {
25017 goto _search_next_long;
25021 if (ip1 >= nextStep) {
25025 nextStep += kStepIncr;
25033 #if defined(__aarch64__)
25036 }
while (ip1 <= ilimit);
25041 offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
25044 rep[0] = offset_1 ? offset_1 : offsetSaved1;
25045 rep[1] = offset_2 ? offset_2 : offsetSaved2;
25048 return (
size_t)(iend -
anchor);
25053 if (idxl1 > prefixLowestIndex) {
25057 offset = (
U32)(
ip-matchl1);
25058 while (((
ip>
anchor) & (matchl1>prefixLowest)) && (
ip[-1] == matchl1[-1])) {
ip--; matchl1--; mLength++; }
25065 offset = (
U32)(
ip - matchs0);
25066 while (((
ip>
anchor) & (matchs0>prefixLowest)) && (
ip[-1] == matchs0[-1])) {
ip--; matchs0--; mLength++; }
25071 offset_2 = offset_1;
25082 hashLong[hl1] = (
U32)(ip1 - base);
25092 if (
ip <= ilimit) {
25095 {
U32 const indexToInsert = curr+2;
25096 hashLong[
ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
25098 hashSmall[
ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
25103 while ( (
ip <= ilimit)
25107 size_t const rLength =
ZSTD_count(
ip+4,
ip+4-offset_2, iend) + 4;
25108 U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff;
25124 void const* src,
size_t srcSize,
25127 ZSTD_compressionParameters
const* cParams = &ms->
cParams;
25129 const U32 hBitsL = cParams->hashLog;
25131 const U32 hBitsS = cParams->chainLog;
25133 const BYTE*
const istart = (
const BYTE*)src;
25134 const BYTE*
ip = istart;
25139 const BYTE*
const prefixLowest =
base + prefixLowestIndex;
25142 U32 offset_1=rep[0], offset_2=rep[1];
25145 const ZSTD_compressionParameters*
const dictCParams = &dms->
cParams;
25150 const BYTE*
const dictStart = dictBase + dictStartIndex;
25152 const U32 dictIndexDelta = prefixLowestIndex - (
U32)(dictEnd - dictBase);
25155 const U32 dictAndPrefixLength = (
U32)((
ip - prefixLowest) + (dictEnd - dictStart));
25157 DEBUGLOG(5,
"ZSTD_compressBlock_doubleFast_dictMatchState_generic");
25163 size_t const hashTableBytes = (((
size_t)1) << dictCParams->hashLog) *
sizeof(
U32);
25164 size_t const chainTableBytes = (((
size_t)1) << dictCParams->chainLog) *
sizeof(
U32);
25170 ip += (dictAndPrefixLength == 0);
25174 assert(offset_1 <= dictAndPrefixLength);
25175 assert(offset_2 <= dictAndPrefixLength);
25178 while (
ip < ilimit) {
25184 size_t const dictHashAndTagS =
ZSTD_hashPtr(
ip, dictHBitsS, mls);
25190 U32 const matchIndexL = hashLong[h2];
25191 U32 matchIndexS = hashSmall[h];
25192 const BYTE* matchLong =
base + matchIndexL;
25194 const U32 repIndex = curr + 1 - offset_1;
25195 const BYTE* repMatch = (repIndex < prefixLowestIndex) ?
25196 dictBase + (repIndex - dictIndexDelta) :
25198 hashLong[h2] = hashSmall[h] = curr;
25201 if (((
U32)((prefixLowestIndex-1) - repIndex) >= 3 )
25203 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
25207 goto _match_stored;
25210 if (matchIndexL > prefixLowestIndex) {
25214 offset = (
U32)(
ip-matchLong);
25215 while (((
ip>
anchor) & (matchLong>prefixLowest)) && (
ip[-1] == matchLong[-1])) {
ip--; matchLong--; mLength++; }
25218 }
else if (dictTagsMatchL) {
25221 const BYTE* dictMatchL = dictBase + dictMatchIndexL;
25222 assert(dictMatchL < dictEnd);
25226 offset = (
U32)(curr - dictMatchIndexL - dictIndexDelta);
25227 while (((
ip>
anchor) & (dictMatchL>dictStart)) && (
ip[-1] == dictMatchL[-1])) {
ip--; dictMatchL--; mLength++; }
25231 if (matchIndexS > prefixLowestIndex) {
25234 goto _search_next_long;
25236 }
else if (dictTagsMatchS) {
25239 match = dictBase + dictMatchIndexS;
25240 matchIndexS = dictMatchIndexS + dictIndexDelta;
25243 goto _search_next_long;
25247 #if defined(__aarch64__)
25254 size_t const dictHashAndTagL3 =
ZSTD_hashPtr(
ip+1, dictHBitsL, 8);
25255 U32 const matchIndexL3 = hashLong[hl3];
25258 const BYTE* matchL3 =
base + matchIndexL3;
25259 hashLong[hl3] = curr + 1;
25262 if (matchIndexL3 > prefixLowestIndex) {
25266 offset = (
U32)(
ip-matchL3);
25267 while (((
ip>
anchor) & (matchL3>prefixLowest)) && (
ip[-1] == matchL3[-1])) {
ip--; matchL3--; mLength++; }
25270 }
else if (dictTagsMatchL3) {
25273 const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
25274 assert(dictMatchL3 < dictEnd);
25278 offset = (
U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta);
25279 while (((
ip>
anchor) & (dictMatchL3>dictStart)) && (
ip[-1] == dictMatchL3[-1])) {
ip--; dictMatchL3--; mLength++; }
25284 if (matchIndexS < prefixLowestIndex) {
25286 offset = (
U32)(curr - matchIndexS);
25295 offset_2 = offset_1;
25305 if (
ip <= ilimit) {
25308 {
U32 const indexToInsert = curr+2;
25309 hashLong[
ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
25311 hashSmall[
ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
25316 while (
ip <= ilimit) {
25317 U32 const current2 = (
U32)(
ip-base);
25318 U32 const repIndex2 = current2 - offset_2;
25319 const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ?
25320 dictBase + repIndex2 - dictIndexDelta :
25322 if ( ((
U32)((prefixLowestIndex-1) - (
U32)repIndex2) >= 3 )
25324 const BYTE*
const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
25326 U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;
25344 return (
size_t)(iend -
anchor);
25347 #define ZSTD_GEN_DFAST_FN(dictMode, mls) \
25348 static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls( \
25349 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
25350 void const* src, size_t srcSize) \
25352 return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \
25368 void const* src,
size_t srcSize)
25370 const U32 mls = ms->cParams.minMatch;
25375 return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src,
srcSize);
25377 return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src,
srcSize);
25379 return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src,
srcSize);
25381 return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src,
srcSize);
25388 void const* src,
size_t srcSize)
25395 return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src,
srcSize);
25397 return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src,
srcSize);
25399 return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src,
srcSize);
25401 return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src,
srcSize);
25408 void const* src,
size_t srcSize,
25411 ZSTD_compressionParameters
const* cParams = &ms->
cParams;
25413 U32 const hBitsL = cParams->hashLog;
25415 U32 const hBitsS = cParams->chainLog;
25416 const BYTE*
const istart = (
const BYTE*)src;
25417 const BYTE*
ip = istart;
25420 const BYTE*
const ilimit = iend - 8;
25424 const U32 dictStartIndex = lowLimit;
25426 const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;
25427 const BYTE*
const prefixStart =
base + prefixStartIndex;
25429 const BYTE*
const dictStart = dictBase + dictStartIndex;
25430 const BYTE*
const dictEnd = dictBase + prefixStartIndex;
25431 U32 offset_1=rep[0], offset_2=rep[1];
25433 DEBUGLOG(5,
"ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)",
srcSize);
25436 if (prefixStartIndex == dictStartIndex)
25440 while (
ip < ilimit) {
25442 const U32 matchIndex = hashSmall[hSmall];
25443 const BYTE*
const matchBase = matchIndex < prefixStartIndex ? dictBase :
base;
25444 const BYTE*
match = matchBase + matchIndex;
25447 const U32 matchLongIndex = hashLong[hLong];
25448 const BYTE*
const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase :
base;
25449 const BYTE* matchLong = matchLongBase + matchLongIndex;
25452 const U32 repIndex = curr + 1 - offset_1;
25453 const BYTE*
const repBase = repIndex < prefixStartIndex ? dictBase :
base;
25454 const BYTE*
const repMatch = repBase + repIndex;
25456 hashSmall[hSmall] = hashLong[hLong] = curr;
25458 if ((((
U32)((prefixStartIndex-1) - repIndex) >= 3)
25459 & (offset_1 <= curr+1 - dictStartIndex))
25461 const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
25467 const BYTE*
const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
25468 const BYTE*
const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
25471 offset = curr - matchLongIndex;
25472 while (((
ip>
anchor) & (matchLong>lowMatchPtr)) && (
ip[-1] == matchLong[-1])) {
ip--; matchLong--; mLength++; }
25473 offset_2 = offset_1;
25479 U32 const matchIndex3 = hashLong[h3];
25480 const BYTE*
const match3Base = matchIndex3 < prefixStartIndex ? dictBase :
base;
25481 const BYTE* match3 = match3Base + matchIndex3;
25483 hashLong[h3] = curr + 1;
25485 const BYTE*
const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
25486 const BYTE*
const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
25489 offset = curr+1 - matchIndex3;
25490 while (((
ip>
anchor) & (match3>lowMatchPtr)) && (
ip[-1] == match3[-1])) {
ip--; match3--; mLength++; }
25492 const BYTE*
const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
25493 const BYTE*
const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
25495 offset = curr - matchIndex;
25498 offset_2 = offset_1;
25511 if (
ip <= ilimit) {
25514 {
U32 const indexToInsert = curr+2;
25515 hashLong[
ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
25517 hashSmall[
ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
25522 while (
ip <= ilimit) {
25523 U32 const current2 = (
U32)(
ip-base);
25524 U32 const repIndex2 = current2 - offset_2;
25525 const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 :
base + repIndex2;
25526 if ( (((
U32)((prefixStartIndex-1) - repIndex2) >= 3)
25527 & (offset_2 <= current2 - dictStartIndex))
25529 const BYTE*
const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
25531 U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;
25547 return (
size_t)(iend -
anchor);
25557 void const* src,
size_t srcSize)
25559 U32 const mls = ms->cParams.minMatch;
25564 return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src,
srcSize);
25566 return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src,
srcSize);
25568 return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src,
srcSize);
25570 return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src,
srcSize);
25589 const void*
const end,
25592 const ZSTD_compressionParameters*
const cParams = &ms->
cParams;
25595 U32 const mls = cParams->minMatch;
25599 const U32 fastHashFillStep = 3;
25608 for ( ;
ip + fastHashFillStep < iend + 2;
ip += fastHashFillStep) {
25609 U32 const curr = (
U32)(
ip - base);
25616 for (p = 1; p < fastHashFillStep; ++p) {
25624 const void*
const end,
25627 const ZSTD_compressionParameters*
const cParams = &ms->
cParams;
25629 U32 const hBits = cParams->hashLog;
25630 U32 const mls = cParams->minMatch;
25634 const U32 fastHashFillStep = 3;
25643 for ( ;
ip + fastHashFillStep < iend + 2;
ip += fastHashFillStep) {
25644 U32 const curr = (
U32)(
ip - base);
25646 hashTable[hash0] = curr;
25650 for (p = 1; p < fastHashFillStep; ++p) {
25652 if (hashTable[
hash] == 0) {
25653 hashTable[
hash] = curr + p;
25658 const void*
const end,
25719 void const* src,
size_t srcSize,
25720 U32 const mls,
U32 const hasStep)
25722 const ZSTD_compressionParameters*
const cParams = &ms->
cParams;
25724 U32 const hlog = cParams->hashLog;
25726 size_t const stepSize = hasStep ? (cParams->targetLength + !(cParams->targetLength) + 1) : 2;
25728 const BYTE*
const istart = (
const BYTE*)src;
25731 const BYTE*
const prefixStart =
base + prefixStartIndex;
25736 const BYTE* ip0 = istart;
25742 U32 rep_offset1 = rep[0];
25743 U32 rep_offset2 = rep[1];
25744 U32 offsetSaved1 = 0, offsetSaved2 = 0;
25752 const BYTE* match0;
25760 const BYTE* nextStep;
25763 DEBUGLOG(5,
"ZSTD_compressBlock_fast_generic");
25764 ip0 += (ip0 == prefixStart);
25765 {
U32 const curr = (
U32)(ip0 - base);
25767 U32 const maxRep = curr - windowLow;
25768 if (rep_offset2 > maxRep) offsetSaved2 = rep_offset2, rep_offset2 = 0;
25769 if (rep_offset1 > maxRep) offsetSaved1 = rep_offset1, rep_offset1 = 0;
25776 nextStep = ip0 + kStepIncr;
25783 if (ip3 >= ilimit) {
25790 idx = hashTable[hash0];
25797 current0 = (
U32)(ip0 - base);
25798 hashTable[hash0] = current0;
25801 if ((
MEM_read32(ip2) == rval) & (rep_offset1 > 0)) {
25803 match0 = ip0 - rep_offset1;
25804 mLength = ip0[-1] == match0[-1];
25813 hashTable[hash1] = (
U32)(ip1 - base);
25819 if (idx >= prefixStartIndex) {
25832 hashTable[hash1] = (
U32)(ip1 - base);
25838 idx = hashTable[hash1];
25850 current0 = (
U32)(ip0 - base);
25851 hashTable[hash0] = current0;
25854 if (idx >= prefixStartIndex) {
25875 hashTable[hash1] = (
U32)(ip1 - base);
25882 idx = hashTable[hash1];
25895 if (ip2 >= nextStep) {
25899 nextStep += kStepIncr;
25901 }
while (ip3 < ilimit);
25921 offsetSaved2 = ((offsetSaved1 != 0) && (rep_offset1 != 0)) ? offsetSaved1 : offsetSaved2;
25924 rep[0] = rep_offset1 ? rep_offset1 : offsetSaved1;
25925 rep[1] = rep_offset2 ? rep_offset2 : offsetSaved2;
25928 return (
size_t)(iend -
anchor);
25933 match0 =
base + idx;
25934 rep_offset2 = rep_offset1;
25935 rep_offset1 = (
U32)(ip0-match0);
25940 while (((ip0>
anchor) & (match0>prefixStart)) && (ip0[-1] == match0[-1])) {
25949 mLength +=
ZSTD_count(ip0 + mLength, match0 + mLength, iend);
25957 if (ip0 <= ilimit) {
25959 assert(base+current0+2 > istart);
25960 hashTable[
ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2;
25963 if (rep_offset2 > 0) {
25966 size_t const rLength =
ZSTD_count(ip0+4, ip0+4-rep_offset2, iend) + 4;
25967 {
U32 const tmpOff = rep_offset2; rep_offset2 = rep_offset1; rep_offset1 = tmpOff; }
25978 #define ZSTD_GEN_FAST_FN(dictMode, mls, step) \
25979 static size_t ZSTD_compressBlock_fast_##dictMode##_##mls##_##step( \
25980 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
25981 void const* src, size_t srcSize) \
25983 return ZSTD_compressBlock_fast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls, step); \
25998 void const* src,
size_t srcSize)
26000 U32 const mls = ms->cParams.minMatch;
26001 assert(ms->dictMatchState == NULL);
26002 if (ms->cParams.targetLength > 1) {
26007 return ZSTD_compressBlock_fast_noDict_4_1(ms, seqStore, rep, src,
srcSize);
26009 return ZSTD_compressBlock_fast_noDict_5_1(ms, seqStore, rep, src,
srcSize);
26011 return ZSTD_compressBlock_fast_noDict_6_1(ms, seqStore, rep, src,
srcSize);
26013 return ZSTD_compressBlock_fast_noDict_7_1(ms, seqStore, rep, src,
srcSize);
26020 return ZSTD_compressBlock_fast_noDict_4_0(ms, seqStore, rep, src,
srcSize);
26022 return ZSTD_compressBlock_fast_noDict_5_0(ms, seqStore, rep, src,
srcSize);
26024 return ZSTD_compressBlock_fast_noDict_6_0(ms, seqStore, rep, src,
srcSize);
26026 return ZSTD_compressBlock_fast_noDict_7_0(ms, seqStore, rep, src,
srcSize);
26035 void const* src,
size_t srcSize,
U32 const mls,
U32 const hasStep)
26037 const ZSTD_compressionParameters*
const cParams = &ms->
cParams;
26039 U32 const hlog = cParams->hashLog;
26041 U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
26043 const BYTE*
const istart = (
const BYTE*)src;
26044 const BYTE* ip0 = istart;
26045 const BYTE* ip1 = ip0 + stepSize;
26048 const BYTE*
const prefixStart =
base + prefixStartIndex;
26051 U32 offset_1=rep[0], offset_2=rep[1];
26054 const ZSTD_compressionParameters*
const dictCParams = &dms->
cParams ;
26058 const BYTE*
const dictStart = dictBase + dictStartIndex;
26060 const U32 dictIndexDelta = prefixStartIndex - (
U32)(dictEnd - dictBase);
26061 const U32 dictAndPrefixLength = (
U32)(istart - prefixStart + dictEnd - dictStart);
26066 const U32 maxDistance = 1U << cParams->windowLog;
26068 assert(endIndex - prefixStartIndex <= maxDistance);
26069 (
void)maxDistance; (
void)endIndex;
26075 assert(prefixStartIndex >= (
U32)(dictEnd - dictBase));
26078 size_t const hashTableBytes = (((
size_t)1) << dictCParams->hashLog) *
sizeof(
U32);
26083 DEBUGLOG(5,
"ZSTD_compressBlock_fast_dictMatchState_generic");
26084 ip0 += (dictAndPrefixLength == 0);
26087 assert(offset_1 <= dictAndPrefixLength);
26088 assert(offset_2 <= dictAndPrefixLength);
26092 while (ip1 <= ilimit) {
26096 size_t const dictHashAndTag0 =
ZSTD_hashPtr(ip0, dictHBits, mls);
26100 U32 matchIndex = hashTable[hash0];
26101 U32 curr = (
U32)(ip0 - base);
26102 size_t step = stepSize;
26104 const BYTE* nextStep = ip0 + kStepIncr;
26109 const U32 repIndex = curr + 1 - offset_1;
26110 const BYTE* repMatch = (repIndex < prefixStartIndex) ?
26111 dictBase + (repIndex - dictIndexDelta) :
26114 size_t const dictHashAndTag1 =
ZSTD_hashPtr(ip1, dictHBits, mls);
26115 hashTable[hash0] = curr;
26117 if (((
U32) ((prefixStartIndex - 1) - repIndex) >=
26120 const BYTE*
const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
26121 mLength =
ZSTD_count_2segments(ip0 + 1 + 4, repMatch + 4, iend, repMatchEnd, prefixStart) + 4;
26127 if (dictTagsMatch) {
26130 const BYTE* dictMatch = dictBase + dictMatchIndex;
26131 if (dictMatchIndex > dictStartIndex &&
26134 if (matchIndex <= prefixStartIndex) {
26135 U32 const offset = (
U32) (curr - dictMatchIndex - dictIndexDelta);
26137 while (((ip0 >
anchor) & (dictMatch > dictStart))
26138 && (ip0[-1] == dictMatch[-1])) {
26143 offset_2 = offset_1;
26156 && (ip0[-1] ==
match[-1])) {
26161 offset_2 = offset_1;
26170 matchIndex = hashTable[hash1];
26172 if (ip1 >= nextStep) {
26174 nextStep += kStepIncr;
26178 if (ip1 > ilimit)
goto _cleanup;
26180 curr = (
U32)(ip0 - base);
26189 if (ip0 <= ilimit) {
26191 assert(base+curr+2 > istart);
26192 hashTable[
ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;
26196 while (ip0 <= ilimit) {
26197 U32 const current2 = (
U32)(ip0-base);
26198 U32 const repIndex2 = current2 - offset_2;
26199 const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
26200 dictBase - dictIndexDelta + repIndex2 :
26202 if ( ((
U32)((prefixStartIndex-1) - (
U32)repIndex2) >= 3 )
26204 const BYTE*
const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
26205 size_t const repLength2 =
ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
26206 U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;
26219 ip1 = ip0 + stepSize;
26228 return (
size_t)(iend -
anchor);
26239 void const* src,
size_t srcSize)
26241 U32 const mls = ms->cParams.minMatch;
26242 assert(ms->dictMatchState != NULL);
26247 return ZSTD_compressBlock_fast_dictMatchState_4_0(ms, seqStore, rep, src,
srcSize);
26249 return ZSTD_compressBlock_fast_dictMatchState_5_0(ms, seqStore, rep, src,
srcSize);
26251 return ZSTD_compressBlock_fast_dictMatchState_6_0(ms, seqStore, rep, src,
srcSize);
26253 return ZSTD_compressBlock_fast_dictMatchState_7_0(ms, seqStore, rep, src,
srcSize);
26260 void const* src,
size_t srcSize,
U32 const mls,
U32 const hasStep)
26262 const ZSTD_compressionParameters*
const cParams = &ms->
cParams;
26264 U32 const hlog = cParams->hashLog;
26266 size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1;
26269 const BYTE*
const istart = (
const BYTE*)src;
26273 const U32 dictStartIndex = lowLimit;
26274 const BYTE*
const dictStart = dictBase + dictStartIndex;
26276 const U32 prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit;
26277 const BYTE*
const prefixStart =
base + prefixStartIndex;
26278 const BYTE*
const dictEnd = dictBase + prefixStartIndex;
26280 const BYTE*
const ilimit = iend - 8;
26281 U32 offset_1=rep[0], offset_2=rep[1];
26282 U32 offsetSaved1 = 0, offsetSaved2 = 0;
26284 const BYTE* ip0 = istart;
26294 const BYTE* idxBase;
26297 const BYTE* match0;
26299 const BYTE* matchEnd = 0;
26302 const BYTE* nextStep;
26307 DEBUGLOG(5,
"ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1);
26310 if (prefixStartIndex == dictStartIndex)
26313 {
U32 const curr = (
U32)(ip0 - base);
26314 U32 const maxRep = curr - dictStartIndex;
26315 if (offset_2 >= maxRep) offsetSaved2 = offset_2, offset_2 = 0;
26316 if (offset_1 >= maxRep) offsetSaved1 = offset_1, offset_1 = 0;
26323 nextStep = ip0 + kStepIncr;
26330 if (ip3 >= ilimit) {
26337 idx = hashTable[hash0];
26338 idxBase = idx < prefixStartIndex ? dictBase :
base;
26342 U32 const current2 = (
U32)(ip2 - base);
26343 U32 const repIndex = current2 - offset_1;
26344 const BYTE*
const repBase = repIndex < prefixStartIndex ? dictBase :
base;
26346 if ( ((
U32)(prefixStartIndex - repIndex) >= 4)
26347 & (offset_1 > 0) ) {
26354 current0 = (
U32)(ip0 - base);
26355 hashTable[hash0] = current0;
26360 match0 = repBase + repIndex;
26361 matchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
26362 assert((match0 != prefixStart) & (match0 != dictStart));
26363 mLength = ip0[-1] == match0[-1];
26372 U32 const mval = idx >= dictStartIndex ?
26383 idx = hashTable[hash1];
26384 idxBase = idx < prefixStartIndex ? dictBase :
base;
26396 current0 = (
U32)(ip0 - base);
26397 hashTable[hash0] = current0;
26400 U32 const mval = idx >= dictStartIndex ?
26411 idx = hashTable[hash1];
26412 idxBase = idx < prefixStartIndex ? dictBase :
base;
26425 if (ip2 >= nextStep) {
26429 nextStep += kStepIncr;
26431 }
while (ip3 < ilimit);
26440 offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
26443 rep[0] = offset_1 ? offset_1 : offsetSaved1;
26444 rep[1] = offset_2 ? offset_2 : offsetSaved2;
26447 return (
size_t)(iend -
anchor);
26452 {
U32 const offset = current0 - idx;
26453 const BYTE*
const lowMatchPtr = idx < prefixStartIndex ? dictStart : prefixStart;
26454 matchEnd = idx < prefixStartIndex ? dictEnd : iend;
26455 match0 = idxBase + idx;
26456 offset_2 = offset_1;
26462 while (((ip0>
anchor) & (match0>lowMatchPtr)) && (ip0[-1] == match0[-1])) {
26472 mLength +=
ZSTD_count_2segments(ip0 + mLength, match0 + mLength, iend, matchEnd, prefixStart);
26481 hashTable[hash1] = (
U32)(ip1 - base);
26485 if (ip0 <= ilimit) {
26487 assert(base+current0+2 > istart);
26488 hashTable[
ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2;
26491 while (ip0 <= ilimit) {
26492 U32 const repIndex2 = (
U32)(ip0-base) - offset_2;
26493 const BYTE*
const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 :
base + repIndex2;
26494 if ( (((
U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 > 0))
26496 const BYTE*
const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
26497 size_t const repLength2 =
ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
26498 {
U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; }
26518 void const* src,
size_t srcSize)
26520 U32 const mls = ms->cParams.minMatch;
26521 assert(ms->dictMatchState == NULL);
26526 return ZSTD_compressBlock_fast_extDict_4_0(ms, seqStore, rep, src,
srcSize);
26528 return ZSTD_compressBlock_fast_extDict_5_0(ms, seqStore, rep, src,
srcSize);
26530 return ZSTD_compressBlock_fast_extDict_6_0(ms, seqStore, rep, src,
srcSize);
26532 return ZSTD_compressBlock_fast_extDict_7_0(ms, seqStore, rep, src,
srcSize);
26551 #define kLazySkippingStep 8
26563 const ZSTD_compressionParameters*
const cParams = &ms->
cParams;
26565 U32 const hashLog = cParams->hashLog;
26568 U32 const btLog = cParams->chainLog - 1;
26569 U32 const btMask = (1 << btLog) - 1;
26572 U32 const target = (
U32)(
ip - base);
26576 DEBUGLOG(7,
"ZSTD_updateDUBT, from %u to %u (dictLimit:%u)",
26582 for ( ; idx < target ; idx++) {
26583 size_t const h =
ZSTD_hashPtr(base + idx, hashLog, mls);
26584 U32 const matchIndex = hashTable[h];
26586 U32*
const nextCandidatePtr = bt + 2*(idx&btMask);
26587 U32*
const sortMarkPtr = nextCandidatePtr + 1;
26589 DEBUGLOG(8,
"ZSTD_updateDUBT: insert %u", idx);
26590 hashTable[h] = idx;
26591 *nextCandidatePtr = matchIndex;
26604 U32 curr,
const BYTE* inputEnd,
26605 U32 nbCompares,
U32 btLow,
26608 const ZSTD_compressionParameters*
const cParams = &ms->
cParams;
26610 U32 const btLog = cParams->chainLog - 1;
26611 U32 const btMask = (1 << btLog) - 1;
26612 size_t commonLengthSmaller=0, commonLengthLarger=0;
26616 const BYTE*
const ip = (curr>=dictLimit) ? base + curr : dictBase + curr;
26617 const BYTE*
const iend = (curr>=dictLimit) ? inputEnd : dictBase + dictLimit;
26618 const BYTE*
const dictEnd = dictBase + dictLimit;
26619 const BYTE*
const prefixStart = base + dictLimit;
26621 U32* smallerPtr = bt + 2*(curr&btMask);
26622 U32* largerPtr = smallerPtr + 1;
26623 U32 matchIndex = *smallerPtr;
26626 U32 const maxDistance = 1U << cParams->windowLog;
26627 U32 const windowLow = (curr - windowValid > maxDistance) ? curr - maxDistance : windowValid;
26630 DEBUGLOG(8,
"ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
26631 curr, dictLimit, windowLow);
26635 for (; nbCompares && (matchIndex > windowLow); --nbCompares) {
26636 U32*
const nextPtr = bt + 2*(matchIndex & btMask);
26637 size_t matchLength =
MIN(commonLengthSmaller, commonLengthLarger);
26638 assert(matchIndex < curr);
26644 || (matchIndex+matchLength >= dictLimit)
26645 || (curr < dictLimit) ) {
26647 || (matchIndex+matchLength >= dictLimit)) ?
26649 assert( (matchIndex+matchLength >= dictLimit)
26650 || (curr < dictLimit) );
26651 match = mBase + matchIndex;
26654 match = dictBase + matchIndex;
26656 if (matchIndex+matchLength >= dictLimit)
26657 match = base + matchIndex;
26660 DEBUGLOG(8,
"ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
26661 curr, matchIndex, (
U32)matchLength);
26663 if (
ip+matchLength == iend) {
26667 if (
match[matchLength] <
ip[matchLength]) {
26669 *smallerPtr = matchIndex;
26670 commonLengthSmaller = matchLength;
26671 if (matchIndex <= btLow) { smallerPtr=&dummy32;
break; }
26672 DEBUGLOG(8,
"ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u",
26673 matchIndex, btLow, nextPtr[1]);
26674 smallerPtr = nextPtr+1;
26675 matchIndex = nextPtr[1];
26678 *largerPtr = matchIndex;
26679 commonLengthLarger = matchLength;
26680 if (matchIndex <= btLow) { largerPtr=&dummy32;
break; }
26681 DEBUGLOG(8,
"ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u",
26682 matchIndex, btLow, nextPtr[0]);
26683 largerPtr = nextPtr;
26684 matchIndex = nextPtr[0];
26687 *smallerPtr = *largerPtr = 0;
26694 const BYTE*
const ip,
const BYTE*
const iend,
26702 const ZSTD_compressionParameters*
const dmsCParams = &dms->
cParams;
26704 U32 const hashLog = dmsCParams->hashLog;
26706 U32 dictMatchIndex = dictHashTable[h];
26718 U32 const btLog = dmsCParams->chainLog - 1;
26719 U32 const btMask = (1 << btLog) - 1;
26720 U32 const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask;
26722 size_t commonLengthSmaller=0, commonLengthLarger=0;
26727 for (; nbCompares && (dictMatchIndex > dictLowLimit); --nbCompares) {
26728 U32*
const nextPtr = dictBt + 2*(dictMatchIndex & btMask);
26729 size_t matchLength =
MIN(commonLengthSmaller, commonLengthLarger);
26730 const BYTE*
match = dictBase + dictMatchIndex;
26732 if (dictMatchIndex+matchLength >= dictHighLimit)
26733 match =
base + dictMatchIndex + dictIndexDelta;
26735 if (matchLength > bestLength) {
26736 U32 matchIndex = dictMatchIndex + dictIndexDelta;
26738 DEBUGLOG(9,
"ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
26739 curr, (
U32)bestLength, (
U32)matchLength, (
U32)*offsetPtr,
OFFSET_TO_OFFBASE(curr - matchIndex), dictMatchIndex, matchIndex);
26742 if (
ip+matchLength == iend) {
26747 if (
match[matchLength] <
ip[matchLength]) {
26748 if (dictMatchIndex <= btLow) {
break; }
26749 commonLengthSmaller = matchLength;
26750 dictMatchIndex = nextPtr[1];
26753 if (dictMatchIndex <= btLow) {
break; }
26754 commonLengthLarger = matchLength;
26755 dictMatchIndex = nextPtr[0];
26761 DEBUGLOG(8,
"ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
26762 curr, (
U32)bestLength, (
U32)*offsetPtr, mIndex);
26771 const BYTE*
const ip,
const BYTE*
const iend,
26772 size_t* offBasePtr,
26776 const ZSTD_compressionParameters*
const cParams = &ms->
cParams;
26778 U32 const hashLog = cParams->hashLog;
26780 U32 matchIndex = hashTable[h];
26787 U32 const btLog = cParams->chainLog - 1;
26788 U32 const btMask = (1 << btLog) - 1;
26789 U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
26790 U32 const unsortLimit =
MAX(btLow, windowLow);
26792 U32* nextCandidate = bt + 2*(matchIndex&btMask);
26793 U32* unsortedMark = bt + 2*(matchIndex&btMask) + 1;
26794 U32 nbCompares = 1U << cParams->searchLog;
26795 U32 nbCandidates = nbCompares;
26796 U32 previousCandidate = 0;
26798 DEBUGLOG(7,
"ZSTD_DUBT_findBestMatch (%u) ", curr);
26803 while ( (matchIndex > unsortLimit)
26805 && (nbCandidates > 1) ) {
26806 DEBUGLOG(8,
"ZSTD_DUBT_findBestMatch: candidate %u is unsorted",
26808 *unsortedMark = previousCandidate;
26809 previousCandidate = matchIndex;
26810 matchIndex = *nextCandidate;
26811 nextCandidate = bt + 2*(matchIndex&btMask);
26812 unsortedMark = bt + 2*(matchIndex&btMask) + 1;
26818 if ( (matchIndex > unsortLimit)
26820 DEBUGLOG(7,
"ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u",
26822 *nextCandidate = *unsortedMark = 0;
26826 matchIndex = previousCandidate;
26827 while (matchIndex) {
26828 U32*
const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1;
26829 U32 const nextCandidateIdx = *nextCandidateIdxPtr;
26831 nbCandidates, unsortLimit, dictMode);
26832 matchIndex = nextCandidateIdx;
26837 {
size_t commonLengthSmaller = 0, commonLengthLarger = 0;
26840 const BYTE*
const dictEnd = dictBase + dictLimit;
26841 const BYTE*
const prefixStart =
base + dictLimit;
26842 U32* smallerPtr = bt + 2*(curr&btMask);
26843 U32* largerPtr = bt + 2*(curr&btMask) + 1;
26844 U32 matchEndIdx = curr + 8 + 1;
26846 size_t bestLength = 0;
26848 matchIndex = hashTable[h];
26849 hashTable[h] = curr;
26851 for (; nbCompares && (matchIndex > windowLow); --nbCompares) {
26852 U32*
const nextPtr = bt + 2*(matchIndex & btMask);
26853 size_t matchLength =
MIN(commonLengthSmaller, commonLengthLarger);
26856 if ((dictMode !=
ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) {
26860 match = dictBase + matchIndex;
26862 if (matchIndex+matchLength >= dictLimit)
26866 if (matchLength > bestLength) {
26867 if (matchLength > matchEndIdx - matchIndex)
26868 matchEndIdx = matchIndex + (
U32)matchLength;
26871 if (
ip+matchLength == iend) {
26881 if (
match[matchLength] <
ip[matchLength]) {
26883 *smallerPtr = matchIndex;
26884 commonLengthSmaller = matchLength;
26885 if (matchIndex <= btLow) { smallerPtr=&dummy32;
break; }
26886 smallerPtr = nextPtr+1;
26887 matchIndex = nextPtr[1];
26890 *largerPtr = matchIndex;
26891 commonLengthLarger = matchLength;
26892 if (matchIndex <= btLow) { largerPtr=&dummy32;
break; }
26893 largerPtr = nextPtr;
26894 matchIndex = nextPtr[0];
26897 *smallerPtr = *largerPtr = 0;
26899 assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX));
26903 offBasePtr, bestLength, nbCompares,
26907 assert(matchEndIdx > curr+8);
26911 DEBUGLOG(8,
"ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
26912 curr, (
U32)bestLength, (
U32)*offBasePtr, mIndex);
26922 const BYTE*
const ip,
const BYTE*
const iLimit,
26923 size_t* offBasePtr,
26927 DEBUGLOG(7,
"ZSTD_BtFindBestMatch");
26943 U32 const chainSize = 1 << ms->
cParams.chainLog;
26945 U32 const minChain = chainSize < target - idx ? target - chainSize : idx;
26947 U32 const cacheSize = bucketSize - 1;
26948 U32 const chainAttempts = (1 << ms->
cParams.searchLog) - cacheSize;
26949 U32 const chainLimit = chainAttempts > 255 ? 255 : chainAttempts;
26957 U32*
const tmpHashTable = hashTable;
26958 U32*
const tmpChainTable = hashTable + ((
size_t)1 << hashLog);
26960 U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx;
26966 assert(tmpMinChain <= minChain);
26969 for ( ; idx < target; idx++) {
26971 if (idx >= tmpMinChain) {
26972 tmpChainTable[idx - tmpMinChain] = hashTable[h];
26974 tmpHashTable[h] = idx;
26980 for (hashIdx = 0; hashIdx < (1U << hashLog); hashIdx++) {
26982 U32 countBeyondMinChain = 0;
26983 U32 i = tmpHashTable[hashIdx];
26987 if (i < minChain) {
26988 countBeyondMinChain++;
26990 i = tmpChainTable[i - tmpMinChain];
26992 if (
count == cacheSize) {
26994 if (i < minChain) {
26995 if (!i || ++countBeyondMinChain > cacheSize) {
27007 chainTable[chainPos++] = i;
27009 if (i < tmpMinChain) {
27012 i = tmpChainTable[i - tmpMinChain];
27018 tmpHashTable[hashIdx] = ((chainPos -
count) << 8) +
count;
27020 tmpHashTable[hashIdx] = 0;
27023 assert(chainPos <= chainSize);
27027 for (hashIdx = (1 << hashLog); hashIdx; ) {
27029 U32 const chainPackedPointer = tmpHashTable[hashIdx];
27031 for (i = 0; i < cacheSize; i++) {
27032 hashTable[bucketIdx + i] = 0;
27034 hashTable[bucketIdx + bucketSize - 1] = chainPackedPointer;
27043 for (i = cacheSize - 1; i; i--)
27044 hashTable[h + i] = hashTable[h + i - 1];
27045 hashTable[h] = idx;
27057 const BYTE*
const ip,
const BYTE*
const iLimit,
27058 const BYTE*
const prefixStart,
const U32 curr,
27059 const U32 dictLimit,
const size_t ddsIdx) {
27063 const U32 ddsSize = (
U32)(ddsEnd - ddsBase);
27064 const U32 ddsIndexDelta = dictLimit - ddsSize;
27066 const U32 bucketLimit = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1;
27070 for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) {
27075 U32 const chainPackedPointer = dms->
hashTable[ddsIdx + bucketSize - 1];
27076 U32 const chainIndex = chainPackedPointer >> 8;
27081 for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) {
27082 size_t currentMl=0;
27084 matchIndex = dms->
hashTable[ddsIdx + ddsAttempt];
27085 match = ddsBase + matchIndex;
27092 (
void)ddsLowestIndex;
27093 assert(matchIndex >= ddsLowestIndex);
27101 if (currentMl > ml) {
27104 if (
ip+currentMl == iLimit) {
27112 U32 const chainPackedPointer = dms->
hashTable[ddsIdx + bucketSize - 1];
27113 U32 chainIndex = chainPackedPointer >> 8;
27114 U32 const chainLength = chainPackedPointer & 0xFF;
27115 U32 const chainAttempts = nbAttempts - ddsAttempt;
27116 U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts;
27119 for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) {
27123 for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) {
27124 size_t currentMl=0;
27127 match = ddsBase + matchIndex;
27130 assert(matchIndex >= ddsLowestIndex);
27138 if (currentMl > ml) {
27141 if (
ip+currentMl == iLimit)
break;
27152 #define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)]
27158 const ZSTD_compressionParameters*
const cParams,
27159 const BYTE*
ip,
U32 const mls,
U32 const lazySkipping)
27162 const U32 hashLog = cParams->hashLog;
27164 const U32 chainMask = (1 << cParams->chainLog) - 1;
27166 const U32 target = (
U32)(
ip - base);
27169 while(idx < target) {
27170 size_t const h =
ZSTD_hashPtr(base+idx, hashLog, mls);
27172 hashTable[h] = idx;
27184 const ZSTD_compressionParameters*
const cParams = &ms->
cParams;
27192 const BYTE*
const ip,
const BYTE*
const iLimit,
27196 const ZSTD_compressionParameters*
const cParams = &ms->
cParams;
27198 const U32 chainSize = (1 << cParams->chainLog);
27199 const U32 chainMask = chainSize-1;
27203 const BYTE*
const prefixStart =
base + dictLimit;
27204 const BYTE*
const dictEnd = dictBase + dictLimit;
27206 const U32 maxDistance = 1U << cParams->windowLog;
27208 const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
27210 const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
27211 const U32 minChain = curr > chainSize ? curr - chainSize : 0;
27212 U32 nbAttempts = 1U << cParams->searchLog;
27231 for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) {
27232 size_t currentMl=0;
27233 if ((dictMode !=
ZSTD_extDict) || matchIndex >= dictLimit) {
27235 assert(matchIndex >= dictLimit);
27240 const BYTE*
const match = dictBase + matchIndex;
27247 if (currentMl > ml) {
27250 if (
ip+currentMl == iLimit)
break;
27253 if (matchIndex <= minChain)
break;
27257 assert(nbAttempts <= (1U << ZSTD_SEARCHLOG_MAX));
27260 ip, iLimit, prefixStart, curr, dictLimit, ddsIdx);
27263 const U32 dmsChainSize = (1 << dms->
cParams.chainLog);
27264 const U32 dmsChainMask = dmsChainSize - 1;
27268 const U32 dmsSize = (
U32)(dmsEnd - dmsBase);
27269 const U32 dmsIndexDelta = dictLimit - dmsSize;
27270 const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0;
27274 for ( ; (matchIndex>=dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
27275 size_t currentMl=0;
27276 const BYTE*
const match = dmsBase + matchIndex;
27282 if (currentMl > ml) {
27284 assert(curr > matchIndex + dmsIndexDelta);
27286 if (
ip+currentMl == iLimit)
break;
27289 if (matchIndex <= dmsMinChain)
break;
27291 matchIndex = dmsChainTable[matchIndex & dmsChainMask];
27302 #define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1)
27303 #define ZSTD_ROW_HASH_MAX_ENTRIES 64
27305 #define ZSTD_ROW_HASH_CACHE_MASK (ZSTD_ROW_HASH_CACHE_SIZE - 1)
27322 U32 next = (*tagRow-1) & rowMask;
27323 next += (
next == 0) ? rowMask : 0;
27349 assert(rowLog == 4 || rowLog == 5 || rowLog == 6);
27359 U32 const rowLog,
U32 const mls,
27360 U32 idx,
const BYTE*
const iLimit)
27365 U32 const maxElemsToPrefetch = (
base + idx) > iLimit ? 0 : (
U32)(iLimit - (
base + idx) + 1);
27368 for (; idx < lim; ++idx) {
27385 BYTE const* tagTable,
BYTE const* base,
27386 U32 idx,
U32 const hashLog,
27387 U32 const rowLog,
U32 const mls,
27388 U64 const hashSalt)
27403 U32 updateStartIdx,
U32 const updateEndIdx,
27404 U32 const mls,
U32 const rowLog,
27405 U32 const rowMask,
U32 const useCache)
27412 DEBUGLOG(6,
"ZSTD_row_update_internalImpl(): updateStartIdx=%u, updateEndIdx=%u", updateStartIdx, updateEndIdx);
27413 for (; updateStartIdx < updateEndIdx; ++updateStartIdx) {
27417 U32*
const row = hashTable + relRow;
27418 BYTE* tagRow = tagTable + relRow;
27423 row[pos] = updateStartIdx;
27432 U32 const mls,
U32 const rowLog,
27433 U32 const rowMask,
U32 const useCache)
27438 const U32 kSkipThreshold = 384;
27439 const U32 kMaxMatchStartPositionsToUpdate = 96;
27440 const U32 kMaxMatchEndPositionsToUpdate = 32;
27448 if (
UNLIKELY(target - idx > kSkipThreshold)) {
27449 U32 const bound = idx + kMaxMatchStartPositionsToUpdate;
27451 idx = target - kMaxMatchEndPositionsToUpdate;
27466 const U32 rowMask = (1u << rowLog) - 1;
27469 DEBUGLOG(5,
"ZSTD_row_update(), rowLog=%u", rowLog);
27480 assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
27483 #if defined(ZSTD_ARCH_ARM_NEON)
27488 if (rowEntries == 16) {
27491 if (rowEntries == 32) {
27494 if (rowEntries == 64) {
27501 #if defined(ZSTD_ARCH_X86_SSE2)
27503 ZSTD_row_getSSEMask(
int nbChunks,
const BYTE*
const src,
const BYTE tag,
const U32 head)
27505 const __m128i comparisonMask = _mm_set1_epi8((
char)tag);
27506 int matches[4] = {0};
27508 assert(nbChunks == 1 || nbChunks == 2 || nbChunks == 4);
27509 for (i=0; i<nbChunks; i++) {
27510 const __m128i chunk = _mm_loadu_si128((
const __m128i*)(
const void*)(src + 16*i));
27511 const __m128i equalMask = _mm_cmpeq_epi8(chunk, comparisonMask);
27512 matches[i] = _mm_movemask_epi8(equalMask);
27521 #if defined(ZSTD_ARCH_ARM_NEON)
27523 ZSTD_row_getNEONMask(
const U32 rowEntries,
const BYTE*
const src,
const BYTE tag,
const U32 headGrouped)
27525 assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
27526 if (rowEntries == 16) {
27532 const uint8x16_t chunk = vld1q_u8(src);
27533 const uint16x8_t equalMask = vreinterpretq_u16_u8(vceqq_u8(chunk, vdupq_n_u8(tag)));
27534 const uint8x8_t res = vshrn_n_u16(equalMask, 4);
27535 const U64 matches = vget_lane_u64(vreinterpret_u64_u8(res), 0);
27537 }
else if (rowEntries == 32) {
27541 const uint16x8x2_t chunk = vld2q_u16((
const uint16_t*)(
const void*)src);
27542 const uint8x16_t chunk0 = vreinterpretq_u8_u16(chunk.val[0]);
27543 const uint8x16_t chunk1 = vreinterpretq_u8_u16(chunk.val[1]);
27544 const uint8x16_t dup = vdupq_n_u8(tag);
27545 const uint8x8_t t0 = vshrn_n_u16(vreinterpretq_u16_u8(vceqq_u8(chunk0, dup)), 6);
27546 const uint8x8_t t1 = vshrn_n_u16(vreinterpretq_u16_u8(vceqq_u8(chunk1, dup)), 6);
27547 const uint8x8_t res = vsli_n_u8(t0, t1, 4);
27548 const U64 matches = vget_lane_u64(vreinterpret_u64_u8(res), 0) ;
27551 const uint8x16x4_t chunk = vld4q_u8(src);
27552 const uint8x16_t dup = vdupq_n_u8(tag);
27553 const uint8x16_t cmp0 = vceqq_u8(chunk.val[0], dup);
27554 const uint8x16_t cmp1 = vceqq_u8(chunk.val[1], dup);
27555 const uint8x16_t cmp2 = vceqq_u8(chunk.val[2], dup);
27556 const uint8x16_t cmp3 = vceqq_u8(chunk.val[3], dup);
27558 const uint8x16_t t0 = vsriq_n_u8(cmp1, cmp0, 1);
27559 const uint8x16_t t1 = vsriq_n_u8(cmp3, cmp2, 1);
27560 const uint8x16_t t2 = vsriq_n_u8(t1, t0, 2);
27561 const uint8x16_t t3 = vsriq_n_u8(t2, t2, 4);
27562 const uint8x8_t t4 = vshrn_n_u16(vreinterpretq_u16_u8(t3), 4);
27563 const U64 matches = vget_lane_u64(vreinterpret_u64_u8(t4), 0);
27578 const BYTE*
const src = tagRow;
27579 assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
27583 #if defined(ZSTD_ARCH_X86_SSE2)
27585 return ZSTD_row_getSSEMask(rowEntries / 16, src, tag, headGrouped);
27589 # if defined(ZSTD_ARCH_ARM_NEON)
27592 return ZSTD_row_getNEONMask(rowEntries, src, tag, headGrouped);
27596 {
const int chunkSize =
sizeof(
size_t);
27597 const size_t shiftAmount = ((chunkSize * 8) - chunkSize);
27598 const size_t xFF = ~((
size_t)0);
27599 const size_t x01 = xFF / 0xFF;
27600 const size_t x80 = x01 << 7;
27601 const size_t splatChar = tag * x01;
27603 int i = rowEntries - chunkSize;
27604 assert((
sizeof(
size_t) == 4) || (
sizeof(
size_t) == 8));
27606 const size_t extractMagic = (xFF / 0x7F) >> chunkSize;
27609 chunk ^= splatChar;
27610 chunk = (((chunk | x80) - x01) | chunk) & x80;
27611 matches <<= chunkSize;
27612 matches |= (chunk * extractMagic) >> shiftAmount;
27616 const size_t msb = xFF ^ (xFF >> 1);
27617 const size_t extractMagic = (msb / 0x1FF) | msb;
27620 chunk ^= splatChar;
27621 chunk = (((chunk | x80) - x01) | chunk) & x80;
27622 matches <<= chunkSize;
27623 matches |= ((chunk >> 7) * extractMagic) >> shiftAmount;
27627 matches = ~matches;
27628 if (rowEntries == 16) {
27630 }
else if (rowEntries == 32) {
27657 const BYTE*
const ip,
const BYTE*
const iLimit,
27666 const ZSTD_compressionParameters*
const cParams = &ms->
cParams;
27670 const BYTE*
const prefixStart = base + dictLimit;
27671 const BYTE*
const dictEnd = dictBase + dictLimit;
27673 const U32 maxDistance = 1U << cParams->windowLog;
27675 const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
27677 const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
27678 const U32 rowEntries = (1U << rowLog);
27679 const U32 rowMask = rowEntries - 1;
27680 const U32 cappedSearchLog =
MIN(cParams->searchLog, rowLog);
27683 U32 nbAttempts = 1U << cappedSearchLog;
27692 U32 ddsExtraAttempts = 0;
27694 U32* dmsRow = NULL;
27695 BYTE* dmsTagRow = NULL;
27703 ddsExtraAttempts = cParams->searchLog > rowLog ? 1U << (cParams->searchLog - rowLog) : 0;
27713 dmsTagRow = (
BYTE*)(dmsTagTable + dmsRelRow);
27714 dmsRow = dmsHashTable + dmsRelRow;
27734 U32*
const row = hashTable + relRow;
27735 BYTE* tagRow = (
BYTE*)(tagTable + relRow);
27736 U32 const headGrouped = (*tagRow & rowMask) * groupWidth;
27738 size_t numMatches = 0;
27739 size_t currMatch = 0;
27743 for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) {
27745 U32 const matchIndex = row[matchPos];
27746 if(matchPos == 0)
continue;
27747 assert(numMatches < rowEntries);
27748 if (matchIndex < lowLimit)
27750 if ((dictMode !=
ZSTD_extDict) || matchIndex >= dictLimit) {
27755 matchBuffer[numMatches++] = matchIndex;
27763 tagRow[pos] = (
BYTE)tag;
27768 for (; currMatch < numMatches; ++currMatch) {
27769 U32 const matchIndex = matchBuffer[currMatch];
27770 size_t currentMl=0;
27771 assert(matchIndex < curr);
27772 assert(matchIndex >= lowLimit);
27774 if ((dictMode !=
ZSTD_extDict) || matchIndex >= dictLimit) {
27775 const BYTE*
const match = base + matchIndex;
27776 assert(matchIndex >= dictLimit);
27781 const BYTE*
const match = dictBase + matchIndex;
27788 if (currentMl > ml) {
27791 if (
ip+currentMl == iLimit)
break;
27796 assert(nbAttempts <= (1U << ZSTD_SEARCHLOG_MAX));
27799 ip, iLimit, prefixStart, curr, dictLimit, ddsIdx);
27805 const U32 dmsSize = (
U32)(dmsEnd - dmsBase);
27806 const U32 dmsIndexDelta = dictLimit - dmsSize;
27808 {
U32 const headGrouped = (*dmsTagRow & rowMask) * groupWidth;
27810 size_t numMatches = 0;
27811 size_t currMatch = 0;
27814 for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) {
27816 U32 const matchIndex = dmsRow[matchPos];
27817 if(matchPos == 0)
continue;
27818 if (matchIndex < dmsLowestIndex)
27821 matchBuffer[numMatches++] = matchIndex;
27826 for (; currMatch < numMatches; ++currMatch) {
27827 U32 const matchIndex = matchBuffer[currMatch];
27828 size_t currentMl=0;
27829 assert(matchIndex >= dmsLowestIndex);
27830 assert(matchIndex < curr);
27832 {
const BYTE*
const match = dmsBase + matchIndex;
27838 if (currentMl > ml) {
27840 assert(curr > matchIndex + dmsIndexDelta);
27842 if (
ip+currentMl == iLimit)
break;
27873 #define ZSTD_BT_SEARCH_FN(dictMode, mls) ZSTD_BtFindBestMatch_##dictMode##_##mls
27874 #define ZSTD_HC_SEARCH_FN(dictMode, mls) ZSTD_HcFindBestMatch_##dictMode##_##mls
27875 #define ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog) ZSTD_RowFindBestMatch_##dictMode##_##mls##_##rowLog
27877 #define ZSTD_SEARCH_FN_ATTRS FORCE_NOINLINE
27879 #define GEN_ZSTD_BT_SEARCH_FN(dictMode, mls) \
27880 ZSTD_SEARCH_FN_ATTRS size_t ZSTD_BT_SEARCH_FN(dictMode, mls)( \
27881 ZSTD_matchState_t* ms, \
27882 const BYTE* ip, const BYTE* const iLimit, \
27883 size_t* offBasePtr) \
27885 assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \
27886 return ZSTD_BtFindBestMatch(ms, ip, iLimit, offBasePtr, mls, ZSTD_##dictMode); \
27889 #define GEN_ZSTD_HC_SEARCH_FN(dictMode, mls) \
27890 ZSTD_SEARCH_FN_ATTRS size_t ZSTD_HC_SEARCH_FN(dictMode, mls)( \
27891 ZSTD_matchState_t* ms, \
27892 const BYTE* ip, const BYTE* const iLimit, \
27893 size_t* offsetPtr) \
27895 assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \
27896 return ZSTD_HcFindBestMatch(ms, ip, iLimit, offsetPtr, mls, ZSTD_##dictMode); \
27899 #define GEN_ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog) \
27900 ZSTD_SEARCH_FN_ATTRS size_t ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog)( \
27901 ZSTD_matchState_t* ms, \
27902 const BYTE* ip, const BYTE* const iLimit, \
27903 size_t* offsetPtr) \
27905 assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \
27906 assert(MAX(4, MIN(6, ms->cParams.searchLog)) == rowLog); \
27907 return ZSTD_RowFindBestMatch(ms, ip, iLimit, offsetPtr, mls, ZSTD_##dictMode, rowLog); \
27910 #define ZSTD_FOR_EACH_ROWLOG(X, dictMode, mls) \
27911 X(dictMode, mls, 4) \
27912 X(dictMode, mls, 5) \
27913 X(dictMode, mls, 6)
27915 #define ZSTD_FOR_EACH_MLS_ROWLOG(X, dictMode) \
27916 ZSTD_FOR_EACH_ROWLOG(X, dictMode, 4) \
27917 ZSTD_FOR_EACH_ROWLOG(X, dictMode, 5) \
27918 ZSTD_FOR_EACH_ROWLOG(X, dictMode, 6)
27920 #define ZSTD_FOR_EACH_MLS(X, dictMode) \
27925 #define ZSTD_FOR_EACH_DICT_MODE(X, ...) \
27926 X(__VA_ARGS__, noDict) \
27927 X(__VA_ARGS__, extDict) \
27928 X(__VA_ARGS__, dictMatchState) \
27929 X(__VA_ARGS__, dedicatedDictSearch)
27940 #define GEN_ZSTD_CALL_BT_SEARCH_FN(dictMode, mls) \
27942 return ZSTD_BT_SEARCH_FN(dictMode, mls)(ms, ip, iend, offsetPtr);
27943 #define GEN_ZSTD_CALL_HC_SEARCH_FN(dictMode, mls) \
27945 return ZSTD_HC_SEARCH_FN(dictMode, mls)(ms, ip, iend, offsetPtr);
27946 #define GEN_ZSTD_CALL_ROW_SEARCH_FN(dictMode, mls, rowLog) \
27948 return ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog)(ms, ip, iend, offsetPtr);
27950 #define ZSTD_SWITCH_MLS(X, dictMode) \
27952 ZSTD_FOR_EACH_MLS(X, dictMode) \
27955 #define ZSTD_SWITCH_ROWLOG(dictMode, mls) \
27957 switch (rowLog) { \
27958 ZSTD_FOR_EACH_ROWLOG(GEN_ZSTD_CALL_ROW_SEARCH_FN, dictMode, mls) \
27960 ZSTD_UNREACHABLE; \
27963 #define ZSTD_SWITCH_SEARCH_METHOD(dictMode) \
27964 switch (searchMethod) { \
27965 case search_hashChain: \
27966 ZSTD_SWITCH_MLS(GEN_ZSTD_CALL_HC_SEARCH_FN, dictMode) \
27968 case search_binaryTree: \
27969 ZSTD_SWITCH_MLS(GEN_ZSTD_CALL_BT_SEARCH_FN, dictMode) \
27971 case search_rowHash: \
27972 ZSTD_SWITCH_MLS(ZSTD_SWITCH_ROWLOG, dictMode) \
28032 const void* src,
size_t srcSize,
28036 const BYTE*
const istart = (
const BYTE*)src;
28037 const BYTE*
ip = istart;
28043 const BYTE*
const prefixLowest = base + prefixLowestIndex;
28047 U32 offset_1 = rep[0], offset_2 = rep[1];
28048 U32 offsetSaved1 = 0, offsetSaved2 = 0;
28052 const int isDxS = isDMS || isDDS;
28056 const BYTE*
const dictLowest = isDxS ? dictBase + dictLowestIndex : NULL;
28058 const U32 dictIndexDelta = isDxS ?
28059 prefixLowestIndex - (
U32)(dictEnd - dictBase) :
28061 const U32 dictAndPrefixLength = (
U32)((
ip - prefixLowest) + (dictEnd - dictLowest));
28063 DEBUGLOG(5,
"ZSTD_compressBlock_lazy_generic (dictMode=%u) (searchFunc=%u)", (
U32)dictMode, (
U32)searchMethod);
28064 ip += (dictAndPrefixLength == 0);
28066 U32 const curr = (
U32)(
ip - base);
28068 U32 const maxRep = curr - windowLow;
28069 if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0;
28070 if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0;
28075 assert(offset_1 <= dictAndPrefixLength);
28076 assert(offset_2 <= dictAndPrefixLength);
28087 #if defined(__GNUC__) && defined(__x86_64__)
28091 __asm__(
".p2align 5");
28093 while (
ip < ilimit) {
28094 size_t matchLength=0;
28097 DEBUGLOG(7,
"search baseline (depth 0)");
28101 const U32 repIndex = (
U32)(
ip - base) + 1 - offset_1;
28103 && repIndex < prefixLowestIndex) ?
28104 dictBase + (repIndex - dictIndexDelta) :
28106 if (((
U32)((prefixLowestIndex-1) - repIndex) >= 3 )
28108 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
28110 if (depth==0)
goto _storeSequence;
28116 if (depth==0)
goto _storeSequence;
28120 {
size_t offbaseFound = 999999999;
28121 size_t const ml2 =
ZSTD_searchMax(ms,
ip, iend, &offbaseFound, mls, rowLog, searchMethod, dictMode);
28122 if (ml2 > matchLength)
28123 matchLength = ml2,
start =
ip, offBase = offbaseFound;
28126 if (matchLength < 4) {
28142 while (
ip<ilimit) {
28148 int const gain2 = (
int)(mlRep * 3);
28150 if ((mlRep >= 4) && (gain2 > gain1))
28154 const U32 repIndex = (
U32)(
ip - base) - offset_1;
28155 const BYTE* repMatch = repIndex < prefixLowestIndex ?
28156 dictBase + (repIndex - dictIndexDelta) :
28158 if (((
U32)((prefixLowestIndex-1) - repIndex) >= 3 )
28160 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
28162 int const gain2 = (
int)(mlRep * 3);
28164 if ((mlRep >= 4) && (gain2 > gain1))
28168 {
size_t ofbCandidate=999999999;
28169 size_t const ml2 =
ZSTD_searchMax(ms,
ip, iend, &ofbCandidate, mls, rowLog, searchMethod, dictMode);
28172 if ((ml2 >= 4) && (gain2 > gain1)) {
28173 matchLength = ml2, offBase = ofbCandidate,
start =
ip;
28178 if ((depth==2) && (
ip<ilimit)) {
28184 int const gain2 = (
int)(mlRep * 4);
28186 if ((mlRep >= 4) && (gain2 > gain1))
28190 const U32 repIndex = (
U32)(
ip - base) - offset_1;
28191 const BYTE* repMatch = repIndex < prefixLowestIndex ?
28192 dictBase + (repIndex - dictIndexDelta) :
28194 if (((
U32)((prefixLowestIndex-1) - repIndex) >= 3 )
28196 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
28198 int const gain2 = (
int)(mlRep * 4);
28200 if ((mlRep >= 4) && (gain2 > gain1))
28204 {
size_t ofbCandidate=999999999;
28205 size_t const ml2 =
ZSTD_searchMax(ms,
ip, iend, &ofbCandidate, mls, rowLog, searchMethod, dictMode);
28208 if ((ml2 >= 4) && (gain2 > gain1)) {
28209 matchLength = ml2, offBase = ofbCandidate,
start =
ip;
28224 {
start--; matchLength++; }
28228 const BYTE*
match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
28229 const BYTE*
const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
28250 while (
ip <= ilimit) {
28251 U32 const current2 = (
U32)(
ip-base);
28252 U32 const repIndex = current2 - offset_2;
28253 const BYTE* repMatch = repIndex < prefixLowestIndex ?
28254 dictBase - dictIndexDelta + repIndex :
28256 if ( ((
U32)((prefixLowestIndex-1) - (
U32)repIndex) >= 3 )
28258 const BYTE*
const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
28260 offBase = offset_2; offset_2 = offset_1; offset_1 = (
U32)offBase;
28271 while ( ((
ip <= ilimit) & (offset_2>0))
28275 offBase = offset_2; offset_2 = offset_1; offset_1 = (
U32)offBase;
28284 offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
28287 rep[0] = offset_1 ? offset_1 : offsetSaved1;
28288 rep[1] = offset_2 ? offset_2 : offsetSaved2;
28291 return (
size_t)(iend -
anchor);
28297 void const* src,
size_t srcSize)
28304 void const* src,
size_t srcSize)
28311 void const* src,
size_t srcSize)
28318 void const* src,
size_t srcSize)
28325 void const* src,
size_t srcSize)
28332 void const* src,
size_t srcSize)
28339 void const* src,
size_t srcSize)
28354 void const* src,
size_t srcSize)
28361 void const* src,
size_t srcSize)
28376 void const* src,
size_t srcSize)
28383 void const* src,
size_t srcSize)
28390 void const* src,
size_t srcSize)
28397 void const* src,
size_t srcSize)
28404 void const* src,
size_t srcSize)
28419 void const* src,
size_t srcSize)
28426 void const* src,
size_t srcSize)
28442 const void* src,
size_t srcSize,
28445 const BYTE*
const istart = (
const BYTE*)src;
28446 const BYTE*
ip = istart;
28452 const BYTE*
const prefixStart =
base + dictLimit;
28454 const BYTE*
const dictEnd = dictBase + dictLimit;
28456 const U32 windowLog = ms->
cParams.windowLog;
28460 U32 offset_1 = rep[0], offset_2 = rep[1];
28462 DEBUGLOG(5,
"ZSTD_compressBlock_lazy_extDict_generic (searchFunc=%u)", (
U32)searchMethod);
28468 ip += (
ip == prefixStart);
28474 #if defined(__GNUC__) && defined(__x86_64__)
28478 __asm__(
".p2align 5");
28480 while (
ip < ilimit) {
28481 size_t matchLength=0;
28488 const U32 repIndex = (
U32)(curr+1 - offset_1);
28489 const BYTE*
const repBase = repIndex < dictLimit ? dictBase :
base;
28490 const BYTE*
const repMatch = repBase + repIndex;
28491 if ( ((
U32)((dictLimit-1) - repIndex) >= 3)
28492 & (offset_1 <= curr+1 - windowLow) )
28495 const BYTE*
const repEnd = repIndex < dictLimit ? dictEnd : iend;
28497 if (depth==0)
goto _storeSequence;
28501 {
size_t ofbCandidate = 999999999;
28503 if (ml2 > matchLength)
28504 matchLength = ml2,
start =
ip, offBase = ofbCandidate;
28507 if (matchLength < 4) {
28523 while (
ip<ilimit) {
28529 const U32 repIndex = (
U32)(curr - offset_1);
28530 const BYTE*
const repBase = repIndex < dictLimit ? dictBase :
base;
28531 const BYTE*
const repMatch = repBase + repIndex;
28532 if ( ((
U32)((dictLimit-1) - repIndex) >= 3)
28533 & (offset_1 <= curr - windowLow) )
28536 const BYTE*
const repEnd = repIndex < dictLimit ? dictEnd : iend;
28538 int const gain2 = (
int)(repLength * 3);
28540 if ((repLength >= 4) && (gain2 > gain1))
28545 {
size_t ofbCandidate = 999999999;
28549 if ((ml2 >= 4) && (gain2 > gain1)) {
28550 matchLength = ml2, offBase = ofbCandidate,
start =
ip;
28555 if ((depth==2) && (
ip<ilimit)) {
28561 const U32 repIndex = (
U32)(curr - offset_1);
28562 const BYTE*
const repBase = repIndex < dictLimit ? dictBase :
base;
28563 const BYTE*
const repMatch = repBase + repIndex;
28564 if ( ((
U32)((dictLimit-1) - repIndex) >= 3)
28565 & (offset_1 <= curr - windowLow) )
28568 const BYTE*
const repEnd = repIndex < dictLimit ? dictEnd : iend;
28570 int const gain2 = (
int)(repLength * 4);
28572 if ((repLength >= 4) && (gain2 > gain1))
28577 {
size_t ofbCandidate = 999999999;
28581 if ((ml2 >= 4) && (gain2 > gain1)) {
28582 matchLength = ml2, offBase = ofbCandidate,
start =
ip;
28591 const BYTE*
match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
28592 const BYTE*
const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
28612 while (
ip <= ilimit) {
28613 const U32 repCurrent = (
U32)(
ip-base);
28615 const U32 repIndex = repCurrent - offset_2;
28616 const BYTE*
const repBase = repIndex < dictLimit ? dictBase :
base;
28617 const BYTE*
const repMatch = repBase + repIndex;
28618 if ( ((
U32)((dictLimit-1) - repIndex) >= 3)
28619 & (offset_2 <= repCurrent - windowLow) )
28622 const BYTE*
const repEnd = repIndex < dictLimit ? dictEnd : iend;
28624 offBase = offset_2; offset_2 = offset_1; offset_1 = (
U32)offBase;
28638 return (
size_t)(iend -
anchor);
28644 void const* src,
size_t srcSize)
28675 void const* src,
size_t srcSize)
28690 void const* src,
size_t srcSize)
28723 #ifndef ZSTD_LDM_GEARTAB_H
28724 #define ZSTD_LDM_GEARTAB_H
28730 0xf5b8f72c5f77775c, 0x84935f266b7ac412, 0xb647ada9ca730ccc,
28731 0xb065bb4b114fb1de, 0x34584e7e8c3a9fd0, 0x4e97e17c6ae26b05,
28732 0x3a03d743bc99a604, 0xcecd042422c4044f, 0x76de76c58524259e,
28733 0x9c8528f65badeaca, 0x86563706e2097529, 0x2902475fa375d889,
28734 0xafb32a9739a5ebe6, 0xce2714da3883e639, 0x21eaf821722e69e,
28735 0x37b628620b628, 0x49a8d455d88caf5, 0x8556d711e6958140,
28736 0x4f7ae74fc605c1f, 0x829f0c3468bd3a20, 0x4ffdc885c625179e,
28737 0x8473de048a3daf1b, 0x51008822b05646b2, 0x69d75d12b2d1cc5f,
28738 0x8c9d4a19159154bc, 0xc3cc10f4abbd4003, 0xd06ddc1cecb97391,
28739 0xbe48e6e7ed80302e, 0x3481db31cee03547, 0xacc3f67cdaa1d210,
28740 0x65cb771d8c7f96cc, 0x8eb27177055723dd, 0xc789950d44cd94be,
28741 0x934feadc3700b12b, 0x5e485f11edbdf182, 0x1e2e2a46fd64767a,
28742 0x2969ca71d82efa7c, 0x9d46e9935ebbba2e, 0xe056b67e05e6822b,
28743 0x94d73f55739d03a0, 0xcd7010bdb69b5a03, 0x455ef9fcd79b82f4,
28744 0x869cb54a8749c161, 0x38d1a4fa6185d225, 0xb475166f94bbe9bb,
28745 0xa4143548720959f1, 0x7aed4780ba6b26ba, 0xd0ce264439e02312,
28746 0x84366d746078d508, 0xa8ce973c72ed17be, 0x21c323a29a430b01,
28747 0x9962d617e3af80ee, 0xab0ce91d9c8cf75b, 0x530e8ee6d19a4dbc,
28748 0x2ef68c0cf53f5d72, 0xc03a681640a85506, 0x496e4e9f9c310967,
28749 0x78580472b59b14a0, 0x273824c23b388577, 0x66bf923ad45cb553,
28750 0x47ae1a5a2492ba86, 0x35e304569e229659, 0x4765182a46870b6f,
28751 0x6cbab625e9099412, 0xddac9a2e598522c1, 0x7172086e666624f2,
28752 0xdf5003ca503b7837, 0x88c0c1db78563d09, 0x58d51865acfc289d,
28753 0x177671aec65224f1, 0xfb79d8a241e967d7, 0x2be1e101cad9a49a,
28754 0x6625682f6e29186b, 0x399553457ac06e50, 0x35dffb4c23abb74,
28755 0x429db2591f54aade, 0xc52802a8037d1009, 0x6acb27381f0b25f3,
28756 0xf45e2551ee4f823b, 0x8b0ea2d99580c2f7, 0x3bed519cbcb4e1e1,
28757 0xff452823dbb010a, 0x9d42ed614f3dd267, 0x5b9313c06257c57b,
28758 0xa114b8008b5e1442, 0xc1fe311c11c13d4b, 0x66e8763ea34c5568,
28759 0x8b982af1c262f05d, 0xee8876faaa75fbb7, 0x8a62a4d0d172bb2a,
28760 0xc13d94a3b7449a97, 0x6dbbba9dc15d037c, 0xc786101f1d92e0f1,
28761 0xd78681a907a0b79b, 0xf61aaf2962c9abb9, 0x2cfd16fcd3cb7ad9,
28762 0x868c5b6744624d21, 0x25e650899c74ddd7, 0xba042af4a7c37463,
28763 0x4eb1a539465a3eca, 0xbe09dbf03b05d5ca, 0x774e5a362b5472ba,
28764 0x47a1221229d183cd, 0x504b0ca18ef5a2df, 0xdffbdfbde2456eb9,
28765 0x46cd2b2fbee34634, 0xf2aef8fe819d98c3, 0x357f5276d4599d61,
28766 0x24a5483879c453e3, 0x88026889192b4b9, 0x28da96671782dbec,
28767 0x4ef37c40588e9aaa, 0x8837b90651bc9fb3, 0xc164f741d3f0e5d6,
28768 0xbc135a0a704b70ba, 0x69cd868f7622ada, 0xbc37ba89e0b9c0ab,
28769 0x47c14a01323552f6, 0x4f00794bacee98bb, 0x7107de7d637a69d5,
28770 0x88af793bb6f2255e, 0xf3c6466b8799b598, 0xc288c616aa7f3b59,
28771 0x81ca63cf42fca3fd, 0x88d85ace36a2674b, 0xd056bd3792389e7,
28772 0xe55c396c4e9dd32d, 0xbefb504571e6c0a6, 0x96ab32115e91e8cc,
28773 0xbf8acb18de8f38d1, 0x66dae58801672606, 0x833b6017872317fb,
28774 0xb87c16f2d1c92864, 0xdb766a74e58b669c, 0x89659f85c61417be,
28775 0xc8daad856011ea0c, 0x76a4b565b6fe7eae, 0xa469d085f6237312,
28776 0xaaf0365683a3e96c, 0x4dbb746f8424f7b8, 0x638755af4e4acc1,
28777 0x3d7807f5bde64486, 0x17be6d8f5bbb7639, 0x903f0cd44dc35dc,
28778 0x67b672eafdf1196c, 0xa676ff93ed4c82f1, 0x521d1004c5053d9d,
28779 0x37ba9ad09ccc9202, 0x84e54d297aacfb51, 0xa0b4b776a143445,
28780 0x820d471e20b348e, 0x1874383cb83d46dc, 0x97edeec7a1efe11c,
28781 0xb330e50b1bdc42aa, 0x1dd91955ce70e032, 0xa514cdb88f2939d5,
28782 0x2791233fd90db9d3, 0x7b670a4cc50f7a9b, 0x77c07d2a05c6dfa5,
28783 0xe3778b6646d0a6fa, 0xb39c8eda47b56749, 0x933ed448addbef28,
28784 0xaf846af6ab7d0bf4, 0xe5af208eb666e49, 0x5e6622f73534cd6a,
28785 0x297daeca42ef5b6e, 0x862daef3d35539a6, 0xe68722498f8e1ea9,
28786 0x981c53093dc0d572, 0xfa09b0bfbf86fbf5, 0x30b1e96166219f15,
28787 0x70e7d466bdc4fb83, 0x5a66736e35f2a8e9, 0xcddb59d2b7c1baef,
28788 0xd6c7d247d26d8996, 0xea4e39eac8de1ba3, 0x539c8bb19fa3aff2,
28789 0x9f90e4c5fd508d8, 0xa34e5956fbaf3385, 0x2e2f8e151d3ef375,
28790 0x173691e9b83faec1, 0xb85a8d56bf016379, 0x8382381267408ae3,
28791 0xb90f901bbdc0096d, 0x7c6ad32933bcec65, 0x76bb5e2f2c8ad595,
28792 0x390f851a6cf46d28, 0xc3e6064da1c2da72, 0xc52a0c101cfa5389,
28793 0xd78eaf84a3fbc530, 0x3781b9e2288b997e, 0x73c2f6dea83d05c4,
28794 0x4228e364c5b5ed7, 0x9d7a3edf0da43911, 0x8edcfeda24686756,
28795 0x5e7667a7b7a9b3a1, 0x4c4f389fa143791d, 0xb08bc1023da7cddc,
28796 0x7ab4be3ae529b1cc, 0x754e6132dbe74ff9, 0x71635442a839df45,
28797 0x2f6fb1643fbe52de, 0x961e0a42cf7a8177, 0xf3b45d83d89ef2ea,
28798 0xee3de4cf4a6e3e9b, 0xcd6848542c3295e7, 0xe4cee1664c78662f,
28799 0x9947548b474c68c4, 0x25d73777a5ed8b0b, 0xc915b1d636b7fc,
28800 0x21c2ba75d9b0d2da, 0x5f6b5dcf608a64a1, 0xdcf333255ff9570c,
28801 0x633b922418ced4ee, 0xc136dde0b004b34a, 0x58cc83b05d4b2f5a,
28802 0x5eb424dda28e42d2, 0x62df47369739cd98, 0xb4e0b42485e4ce17,
28803 0x16e1f0c1f9a8d1e7, 0x8ec3916707560ebf, 0x62ba6e2df2cc9db3,
28804 0xcbf9f4ff77d83a16, 0x78d9d7d07d2bbcc4, 0xef554ce1e02c41f4,
28805 0x8d7581127eccf94d, 0xa9b53336cb3c8a05, 0x38c42c0bf45c4f91,
28806 0x640893cdf4488863, 0x80ec34bc575ea568, 0x39f324f5b48eaa40,
28807 0xe9d9ed1f8eff527f, 0x9224fc058cc5a214, 0xbaba00b04cfe7741,
28808 0x309a9f120fcf52af, 0xa558f3ec65626212, 0x424bec8b7adabe2f,
28809 0x41622513a6aea433, 0xb88da2d5324ca798, 0xd287733b245528a4,
28810 0x9a44697e6d68aec3, 0x7b1093be2f49bb28, 0x50bbec632e3d8aad,
28811 0x6cd90723e1ea8283, 0x897b9e7431b02bf3, 0x219efdcb338a7047,
28812 0x3b0311f0a27c0656, 0xdb17bf91c0db96e7, 0x8cd4fd6b4e85a5b2,
28813 0xfab071054ba6409d, 0x40d6fe831fa9dfd9, 0xaf358debad7d791e,
28814 0xeb8d0e25a65e3e58, 0xbbcbd3df14e08580, 0xcf751f27ecdab2b,
28821 #define LDM_BUCKET_SIZE_LOG 3
28822 #define LDM_MIN_MATCH_LENGTH 64
28823 #define LDM_HASH_RLOG 7
28854 if (hashRateLog > 0 && hashRateLog <= maxBitsInMask) {
28855 state->stopMask = (((
U64)1 << hashRateLog) - 1) << (maxBitsInMask - hashRateLog);
28858 state->stopMask = ((
U64)1 << hashRateLog) - 1;
28868 BYTE const*
data,
size_t minMatchLength)
28873 #define GEAR_ITER_ONCE() do { \
28874 hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \
28877 while (n + 3 < minMatchLength) {
28883 while (n < minMatchLength) {
28886 #undef GEAR_ITER_ONCE
28900 size_t* splits,
unsigned* numSplits)
28906 mask =
state->stopMask;
28909 #define GEAR_ITER_ONCE() do { \
28910 hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \
28912 if (UNLIKELY((hash & mask) == 0)) { \
28913 splits[*numSplits] = n; \
28915 if (*numSplits == LDM_BATCH_SIZE) \
28920 while (n + 3 <
size) {
28930 #undef GEAR_ITER_ONCE
28938 ZSTD_compressionParameters
const* cParams)
28940 params->
windowLog = cParams->windowLog;
28942 DEBUGLOG(4,
"ZSTD_ldm_adjustParameters");
28964 return params.
enableLdm == ZSTD_ps_enable ? totalSize : 0;
28987 unsigned const offset = *pOffset;
28999 const BYTE* pIn,
const BYTE* pAnchor,
29000 const BYTE* pMatch,
const BYTE* pMatchBase)
29002 size_t matchLength = 0;
29003 while (pIn > pAnchor && pMatch > pMatchBase && pIn[-1] == pMatch[-1]) {
29008 return matchLength;
29017 const BYTE* pIn,
const BYTE* pAnchor,
29018 const BYTE* pMatch,
const BYTE* pMatchBase,
29019 const BYTE* pExtDictStart,
const BYTE* pExtDictEnd)
29022 if (pMatch - matchLength != pMatchBase || pMatchBase == pExtDictStart) {
29024 return matchLength;
29026 DEBUGLOG(7,
"ZSTD_ldm_countBackwardsMatch_2segments: found 2-parts backwards match (length in prefix==%zu)", matchLength);
29028 DEBUGLOG(7,
"final backwards match length = %zu", matchLength);
29029 return matchLength;
29042 const BYTE*
const iend = (
const BYTE*)end;
29076 BYTE const*
const istart =
ip;
29079 unsigned numSplits;
29081 DEBUGLOG(5,
"ZSTD_ldm_fillHashTable");
29084 while (
ip < iend) {
29091 for (n = 0; n < numSplits; n++) {
29092 if (
ip + splits[n] >= istart + minMatchLength) {
29093 BYTE const*
const split =
ip + splits[n] - minMatchLength;
29094 U64 const xxhash =
XXH64(split, minMatchLength, 0);
29137 BYTE const*
const dictStart = extDict ? dictBase + lowestIndex : NULL;
29138 BYTE const*
const dictEnd = extDict ? dictBase + dictLimit : NULL;
29139 BYTE const*
const lowPrefixPtr =
base + dictLimit;
29141 BYTE const*
const istart = (
BYTE const*)src;
29146 BYTE const*
ip = istart;
29152 unsigned numSplits;
29154 if (
srcSize < minMatchLength)
29160 ip += minMatchLength;
29162 while (
ip < ilimit) {
29168 splits, &numSplits);
29170 for (n = 0; n < numSplits; n++) {
29171 BYTE const*
const split =
ip + splits[n] - minMatchLength;
29172 U64 const xxhash =
XXH64(split, minMatchLength, 0);
29175 candidates[n].
split = split;
29182 for (n = 0; n < numSplits; n++) {
29183 size_t forwardMatchLength = 0, backwardMatchLength = 0,
29184 bestMatchLength = 0, mLength;
29186 BYTE const*
const split = candidates[n].
split;
29205 for (cur = bucket; cur < bucket + entsPerBucket; cur++) {
29206 size_t curForwardMatchLength, curBackwardMatchLength,
29207 curTotalMatchLength;
29212 BYTE const*
const curMatchBase =
29214 BYTE const*
const pMatch = curMatchBase + cur->
offset;
29215 BYTE const*
const matchEnd =
29216 cur->
offset < dictLimit ? dictEnd : iend;
29217 BYTE const*
const lowMatchPtr =
29218 cur->
offset < dictLimit ? dictStart : lowPrefixPtr;
29219 curForwardMatchLength =
29221 if (curForwardMatchLength < minMatchLength) {
29225 split,
anchor, pMatch, lowMatchPtr, dictStart, dictEnd);
29228 curForwardMatchLength =
ZSTD_count(split, pMatch, iend);
29229 if (curForwardMatchLength < minMatchLength) {
29232 curBackwardMatchLength =
29235 curTotalMatchLength = curForwardMatchLength + curBackwardMatchLength;
29237 if (curTotalMatchLength > bestMatchLength) {
29238 bestMatchLength = curTotalMatchLength;
29239 forwardMatchLength = curForwardMatchLength;
29240 backwardMatchLength = curBackwardMatchLength;
29247 if (bestEntry == NULL) {
29253 offset = (
U32)(split - base) - bestEntry->
offset;
29254 mLength = forwardMatchLength + backwardMatchLength;
29260 return ERROR(dstSize_tooSmall);
29264 rawSeqStore->
size++;
29271 anchor = split + forwardMatchLength;
29298 U32 const reducerValue)
29302 if (
table[u].offset < reducerValue)
table[u].offset = 0;
29303 else table[u].offset -= reducerValue;
29312 BYTE const*
const istart = (
BYTE const*)src;
29314 size_t const kMaxChunkSize = 1 << 20;
29315 size_t const nbChunks = (
srcSize / kMaxChunkSize) + ((
srcSize % kMaxChunkSize) != 0);
29317 size_t leftoverSize = 0;
29329 for (chunk = 0; chunk < nbChunks && sequences->
size < sequences->
capacity; ++chunk) {
29330 BYTE const*
const chunkStart = istart + chunk * kMaxChunkSize;
29331 size_t const remaining = (
size_t)(iend - chunkStart);
29332 BYTE const *
const chunkEnd =
29333 (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize;
29334 size_t const chunkSize = chunkEnd - chunkStart;
29335 size_t newLeftoverSize;
29336 size_t const prevSize = sequences->
size;
29338 assert(chunkStart < iend);
29343 &ldmState->
window, 0, maxDist, chunkStart);
29365 ldmState, sequences, params, chunkStart, chunkSize);
29367 return newLeftoverSize;
29373 if (prevSize < sequences->
size) {
29375 leftoverSize = newLeftoverSize;
29377 assert(newLeftoverSize == chunkSize);
29378 leftoverSize += chunkSize;
29389 if (srcSize <= seq->litLength) {
29396 if (srcSize < seq->matchLength) {
29401 if (rawSeqStore->
pos + 1 < rawSeqStore->
size) {
29404 rawSeqStore->
pos++;
29410 rawSeqStore->
pos++;
29422 U32 const remaining,
U32 const minMatch)
29428 rawSeqStore->
pos++;
29432 if (remaining <=
sequence.litLength) {
29436 if (
sequence.matchLength < minMatch) {
29447 while (currPos && rawSeqStore->
pos < rawSeqStore->
size) {
29451 rawSeqStore->
pos++;
29457 if (currPos == 0 || rawSeqStore->
pos == rawSeqStore->
size) {
29464 ZSTD_paramSwitch_e useRowMatchFinder,
29465 void const* src,
size_t srcSize)
29467 const ZSTD_compressionParameters*
const cParams = &ms->
cParams;
29468 unsigned const minMatch = cParams->minMatch;
29472 BYTE const*
const istart = (
BYTE const*)src;
29475 BYTE const*
ip = istart;
29482 lastLLSize = blockCompressor(ms, seqStore, rep, src,
srcSize);
29490 while (rawSeqStore->
pos < rawSeqStore->
size &&
ip < iend) {
29493 (
U32)(iend -
ip), minMatch);
29505 DEBUGLOG(5,
"pos %u : calling block compressor on segment of size %u", (
unsigned)(
ip-istart),
sequence.litLength);
29507 size_t const newLitLength =
29508 blockCompressor(ms, seqStore, rep,
ip,
sequence.litLength);
29525 return blockCompressor(ms, seqStore, rep,
ip, iend -
ip);
29544 #define ZSTD_LITFREQ_ADD 2
29545 #define ZSTD_MAX_PRICE (1<<30)
29547 #define ZSTD_PREDEF_THRESHOLD 8
29555 # define BITCOST_ACCURACY 0
29556 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
29557 # define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat))
29559 # define BITCOST_ACCURACY 8
29560 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
29561 # define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat))
29563 # define BITCOST_ACCURACY 8
29564 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
29565 # define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
29580 U32 const stat = rawStat + 1;
29586 U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
29587 U32 const weight = BWeight + FWeight;
29592 #if (DEBUGLEVEL>=2)
29621 for (n=0; n<nbElts; n++) {
29633 DEBUGLOG(5,
"ZSTD_downscaleStats (nbElts=%u, shift=%u)",
29634 (
unsigned)lastEltIndex+1, (
unsigned)shift );
29636 for (s=0;
s<lastEltIndex+1;
s++) {
29637 unsigned const base = base1 ? 1 : (
table[
s]>0);
29638 unsigned const newStat =
base + (
table[
s] >> shift);
29651 U32 const factor = prevsum >> logTarget;
29652 DEBUGLOG(5,
"ZSTD_scaleStats (nbElts=%u, target=%u)", (
unsigned)lastEltIndex+1, (
unsigned)logTarget);
29654 if (factor <= 1)
return prevsum;
29668 int const optLevel)
29688 if (compressedLiterals) {
29693 for (lit=0; lit<=
MaxLit; lit++) {
29694 U32 const scaleLog = 11;
29696 assert(bitCost <= scaleLog);
29697 optPtr->
litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 ;
29705 for (ll=0; ll<=
MaxLL; ll++) {
29706 U32 const scaleLog = 10;
29708 assert(bitCost < scaleLog);
29709 optPtr->
litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 ;
29717 for (ml=0; ml<=
MaxML; ml++) {
29718 U32 const scaleLog = 10;
29720 assert(bitCost < scaleLog);
29721 optPtr->
matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 ;
29729 for (of=0; of<=
MaxOff; of++) {
29730 U32 const scaleLog = 10;
29732 assert(bitCost < scaleLog);
29733 optPtr->
offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 ;
29740 if (compressedLiterals) {
29747 {
unsigned const baseLLfreqs[
MaxLL+1] = {
29748 4, 2, 1, 1, 1, 1, 1, 1,
29749 1, 1, 1, 1, 1, 1, 1, 1,
29750 1, 1, 1, 1, 1, 1, 1, 1,
29751 1, 1, 1, 1, 1, 1, 1, 1,
29759 for (ml=0; ml<=
MaxML; ml++)
29764 {
unsigned const baseOFCfreqs[
MaxOff+1] = {
29765 6, 2, 1, 1, 2, 3, 4, 4,
29766 4, 3, 2, 1, 1, 1, 1, 1,
29767 1, 1, 1, 1, 1, 1, 1, 1,
29768 1, 1, 1, 1, 1, 1, 1, 1
29778 if (compressedLiterals)
29795 if (litLength == 0)
return 0;
29808 for (u=0; u < litLength; u++) {
29810 if (
UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax;
29823 return WEIGHT(litLength, optLevel);
29849 U32 const matchLength,
29851 int const optLevel)
29859 return WEIGHT(mlBase, optLevel)
29864 if ((optLevel<2) && offCode >= 20)
29874 DEBUGLOG(8,
"ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
29882 U32 offBase,
U32 matchLength)
29887 for (u=0; u < litLength; u++)
29934 U32* nextToUpdate3,
29940 U32 idx = *nextToUpdate3;
29945 while(idx < target) {
29950 *nextToUpdate3 = target;
29951 return hashTable3[hash3];
29964 const BYTE*
const ip,
const BYTE*
const iend,
29966 U32 const mls,
const int extDict)
29968 const ZSTD_compressionParameters*
const cParams = &ms->
cParams;
29970 U32 const hashLog = cParams->hashLog;
29973 U32 const btLog = cParams->chainLog - 1;
29974 U32 const btMask = (1 << btLog) - 1;
29975 U32 matchIndex = hashTable[h];
29976 size_t commonLengthSmaller=0, commonLengthLarger=0;
29980 const BYTE*
const dictEnd = dictBase + dictLimit;
29981 const BYTE*
const prefixStart = base + dictLimit;
29984 const U32 btLow = btMask >= curr ? 0 : curr - btMask;
29985 U32* smallerPtr = bt + 2*(curr&btMask);
29986 U32* largerPtr = smallerPtr + 1;
29992 U32 matchEndIdx = curr+8+1;
29993 size_t bestLength = 8;
29994 U32 nbCompares = 1U << cParams->searchLog;
29995 #ifdef ZSTD_C_PREDICT
29996 U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
29997 U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
29998 predictedSmall += (predictedSmall>0);
29999 predictedLarge += (predictedLarge>0);
30002 DEBUGLOG(8,
"ZSTD_insertBt1 (%u)", curr);
30006 hashTable[h] = curr;
30009 for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
30010 U32*
const nextPtr = bt + 2*(matchIndex & btMask);
30011 size_t matchLength =
MIN(commonLengthSmaller, commonLengthLarger);
30012 assert(matchIndex < curr);
30014 #ifdef ZSTD_C_PREDICT
30015 const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);
30016 if (matchIndex == predictedSmall) {
30018 *smallerPtr = matchIndex;
30019 if (matchIndex <= btLow) { smallerPtr=&dummy32;
break; }
30020 smallerPtr = nextPtr+1;
30021 matchIndex = nextPtr[1];
30022 predictedSmall = predictPtr[1] + (predictPtr[1]>0);
30025 if (matchIndex == predictedLarge) {
30026 *largerPtr = matchIndex;
30027 if (matchIndex <= btLow) { largerPtr=&dummy32;
break; }
30028 largerPtr = nextPtr;
30029 matchIndex = nextPtr[0];
30030 predictedLarge = predictPtr[0] + (predictPtr[0]>0);
30035 if (!extDict || (matchIndex+matchLength >= dictLimit)) {
30036 assert(matchIndex+matchLength >= dictLimit);
30037 match = base + matchIndex;
30040 match = dictBase + matchIndex;
30042 if (matchIndex+matchLength >= dictLimit)
30043 match = base + matchIndex;
30046 if (matchLength > bestLength) {
30047 bestLength = matchLength;
30048 if (matchLength > matchEndIdx - matchIndex)
30049 matchEndIdx = matchIndex + (
U32)matchLength;
30052 if (
ip+matchLength == iend) {
30056 if (
match[matchLength] <
ip[matchLength]) {
30058 *smallerPtr = matchIndex;
30059 commonLengthSmaller = matchLength;
30060 if (matchIndex <= btLow) { smallerPtr=&dummy32;
break; }
30061 smallerPtr = nextPtr+1;
30062 matchIndex = nextPtr[1];
30065 *largerPtr = matchIndex;
30066 commonLengthLarger = matchLength;
30067 if (matchIndex <= btLow) { largerPtr=&dummy32;
break; }
30068 largerPtr = nextPtr;
30069 matchIndex = nextPtr[0];
30072 *smallerPtr = *largerPtr = 0;
30073 {
U32 positions = 0;
30074 if (bestLength > 384) positions =
MIN(192, (
U32)(bestLength - 384));
30076 return MAX(positions, matchEndIdx - (curr + 8));
30083 const BYTE*
const ip,
const BYTE*
const iend,
30087 U32 const target = (
U32)(
ip - base);
30089 DEBUGLOG(6,
"ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
30090 idx, target, dictMode);
30092 while(idx < target) {
30097 assert((
size_t)(
ip - base) <= (
size_t)(
U32)(-1));
30098 assert((
size_t)(iend - base) <= (
size_t)(
U32)(-1));
30110 U32* nextToUpdate3,
30111 const BYTE*
const ip,
const BYTE*
const iLimit,
30115 const U32 lengthToBeat,
30118 const ZSTD_compressionParameters*
const cParams = &ms->
cParams;
30122 U32 const hashLog = cParams->hashLog;
30123 U32 const minMatch = (mls==3) ? 3 : 4;
30126 U32 matchIndex = hashTable[h];
30128 U32 const btLog = cParams->chainLog - 1;
30129 U32 const btMask= (1U << btLog) - 1;
30130 size_t commonLengthSmaller=0, commonLengthLarger=0;
30133 const BYTE*
const dictEnd = dictBase + dictLimit;
30134 const BYTE*
const prefixStart =
base + dictLimit;
30135 U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
30137 U32 const matchLow = windowLow ? windowLow : 1;
30138 U32* smallerPtr = bt + 2*(curr&btMask);
30139 U32* largerPtr = bt + 2*(curr&btMask) + 1;
30140 U32 matchEndIdx = curr+8+1;
30143 U32 nbCompares = 1U << cParams->searchLog;
30146 const ZSTD_compressionParameters*
const dmsCParams =
30156 U32 const dmsBtLow = dictMode ==
ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
30158 size_t bestLength = lengthToBeat-1;
30159 DEBUGLOG(8,
"ZSTD_insertBtAndGetAllMatches: current=%u", curr);
30165 for (repCode = ll0; repCode < lastR; repCode++) {
30166 U32 const repOffset = (repCode==
ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
30167 U32 const repIndex = curr - repOffset;
30169 assert(curr >= dictLimit);
30170 if (repOffset-1 < curr-dictLimit) {
30175 repLen = (
U32)
ZSTD_count(
ip+minMatch,
ip+minMatch-repOffset, iLimit) + minMatch;
30179 dmsBase + repIndex - dmsIndexDelta :
30180 dictBase + repIndex;
30181 assert(curr >= windowLow);
30183 && ( ((repOffset-1) < curr - windowLow)
30184 & (((
U32)((dictLimit-1) - repIndex) >= 3) ) )
30189 && ( ((repOffset-1) < curr - (dmsLowLimit + dmsIndexDelta))
30190 & ((
U32)((dictLimit-1) - repIndex) >= 3) )
30195 if (repLen > bestLength) {
30196 DEBUGLOG(8,
"found repCode %u (ll0:%u, offset:%u) of length %u",
30197 repCode, ll0, repOffset, repLen);
30198 bestLength = repLen;
30200 matches[mnum].
len = (
U32)repLen;
30202 if ( (repLen > sufficient_len)
30203 | (
ip+repLen == iLimit) ) {
30208 if ((mls == 3) && (bestLength < mls)) {
30210 if ((matchIndex3 >= matchLow)
30211 & (curr - matchIndex3 < (1<<18)) ) {
30217 const BYTE*
const match = dictBase + matchIndex3;
30222 if (mlen >= mls ) {
30223 DEBUGLOG(8,
"found small match with hlog3, of length %u",
30226 assert(curr > matchIndex3);
30229 matches[0].
len = (
U32)mlen;
30231 if ( (mlen > sufficient_len) |
30232 (
ip+mlen == iLimit) ) {
30239 hashTable[h] = curr;
30241 for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
30242 U32*
const nextPtr = bt + 2*(matchIndex & btMask);
30244 size_t matchLength =
MIN(commonLengthSmaller, commonLengthLarger);
30245 assert(curr > matchIndex);
30248 assert(matchIndex+matchLength >= dictLimit);
30250 if (matchIndex >= dictLimit)
assert(memcmp(
match,
ip, matchLength) == 0);
30253 match = dictBase + matchIndex;
30256 if (matchIndex+matchLength >= dictLimit)
30260 if (matchLength > bestLength) {
30261 DEBUGLOG(8,
"found match of length %u at distance %u (offBase=%u)",
30263 assert(matchEndIdx > matchIndex);
30264 if (matchLength > matchEndIdx - matchIndex)
30265 matchEndIdx = matchIndex + (
U32)matchLength;
30266 bestLength = matchLength;
30268 matches[mnum].
len = (
U32)matchLength;
30271 | (
ip+matchLength == iLimit) ) {
30276 if (
match[matchLength] <
ip[matchLength]) {
30278 *smallerPtr = matchIndex;
30279 commonLengthSmaller = matchLength;
30280 if (matchIndex <= btLow) { smallerPtr=&dummy32;
break; }
30281 smallerPtr = nextPtr+1;
30282 matchIndex = nextPtr[1];
30284 *largerPtr = matchIndex;
30285 commonLengthLarger = matchLength;
30286 if (matchIndex <= btLow) { largerPtr=&dummy32;
break; }
30287 largerPtr = nextPtr;
30288 matchIndex = nextPtr[0];
30291 *smallerPtr = *largerPtr = 0;
30293 assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX));
30298 commonLengthSmaller = commonLengthLarger = 0;
30299 for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
30300 const U32*
const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
30301 size_t matchLength =
MIN(commonLengthSmaller, commonLengthLarger);
30302 const BYTE*
match = dmsBase + dictMatchIndex;
30304 if (dictMatchIndex+matchLength >= dmsHighLimit)
30305 match =
base + dictMatchIndex + dmsIndexDelta;
30307 if (matchLength > bestLength) {
30308 matchIndex = dictMatchIndex + dmsIndexDelta;
30309 DEBUGLOG(8,
"found dms match of length %u at distance %u (offBase=%u)",
30311 if (matchLength > matchEndIdx - matchIndex)
30312 matchEndIdx = matchIndex + (
U32)matchLength;
30313 bestLength = matchLength;
30315 matches[mnum].
len = (
U32)matchLength;
30318 | (
ip+matchLength == iLimit) ) {
30322 if (dictMatchIndex <= dmsBtLow) {
break; }
30323 if (
match[matchLength] <
ip[matchLength]) {
30324 commonLengthSmaller = matchLength;
30325 dictMatchIndex = nextPtr[1];
30328 commonLengthLarger = matchLength;
30329 dictMatchIndex = nextPtr[0];
30332 assert(matchEndIdx > curr+8);
30345 U32 const lengthToBeat);
30350 U32* nextToUpdate3,
30352 const BYTE*
const iHighLimit,
30355 U32 const lengthToBeat,
30360 DEBUGLOG(8,
"ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (
int)dictMode, mls);
30367 #define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
30369 #define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls) \
30370 static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)( \
30371 ZSTD_match_t* matches, \
30372 ZSTD_matchState_t* ms, \
30373 U32* nextToUpdate3, \
30375 const BYTE* const iHighLimit, \
30376 const U32 rep[ZSTD_REP_NUM], \
30378 U32 const lengthToBeat) \
30380 return ZSTD_btGetAllMatches_internal( \
30381 matches, ms, nextToUpdate3, ip, iHighLimit, \
30382 rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
30385 #define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode) \
30386 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3) \
30387 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4) \
30388 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5) \
30389 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
30395 #define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode) \
30397 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
30398 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
30399 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
30400 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6) \
30414 return getAllMatchesFns[(
int)dictMode][mls - 3];
30424 U32 startPosInBlock;
30436 while (currPos && rawSeqStore->
pos < rawSeqStore->
size) {
30440 rawSeqStore->
pos++;
30446 if (currPos == 0 || rawSeqStore->
pos == rawSeqStore->
size) {
30457 U32 blockBytesRemaining)
30460 U32 currBlockEndPos;
30461 U32 literalsBytesRemaining;
30462 U32 matchBytesRemaining;
30474 currBlockEndPos = currPosInBlock + blockBytesRemaining;
30478 matchBytesRemaining = (literalsBytesRemaining == 0) ?
30483 if (literalsBytesRemaining >= blockBytesRemaining) {
30519 if (currPosInBlock < optLdm->startPosInBlock
30521 || candidateMatchLength <
MINMATCH) {
30525 if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches <
ZSTD_OPT_NUM)) {
30527 DEBUGLOG(6,
"ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u",
30528 candidateOffBase, candidateMatchLength, currPosInBlock);
30529 matches[*nbMatches].
len = candidateMatchLength;
30530 matches[*nbMatches].
off = candidateOffBase;
30541 U32 currPosInBlock,
U32 remainingBytes)
30568 return sol.litlen +
sol.mlen;
30574 listStats(
const U32*
table,
int lastEltID)
30576 int const nbElts = lastEltID + 1;
30578 for (enb=0; enb < nbElts; enb++) {
30592 const void* src,
size_t srcSize,
30593 const int optLevel,
30597 const BYTE*
const istart = (
const BYTE*)src;
30598 const BYTE*
ip = istart;
30601 const BYTE*
const ilimit = iend - 8;
30604 const ZSTD_compressionParameters*
const cParams = &ms->
cParams;
30609 U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
30624 DEBUGLOG(5,
"ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
30628 ip += (
ip==prefixStart);
30631 while (
ip < ilimit) {
30632 U32 cur, last_pos = 0;
30636 U32 const ll0 = !litlen;
30637 U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3,
ip, iend, rep, ll0, minMatch);
30640 if (!nbMatches) {
ip++;
continue; }
30654 {
U32 const maxML = matches[nbMatches-1].
len;
30655 U32 const maxOffBase = matches[nbMatches-1].
off;
30656 DEBUGLOG(6,
"found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series",
30657 nbMatches, maxML, maxOffBase, (
U32)(
ip-prefixStart));
30659 if (maxML > sufficient_len) {
30660 lastSequence.
litlen = litlen;
30661 lastSequence.
mlen = maxML;
30662 lastSequence.
off = maxOffBase;
30663 DEBUGLOG(6,
"large match (%u>%u), immediate encoding",
30664 maxML, sufficient_len);
30667 goto _shortestPath;
30671 assert(opt[0].price >= 0);
30675 for (pos = 1; pos < minMatch; pos++) {
30678 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
30679 U32 const offBase = matches[matchNb].
off;
30680 U32 const end = matches[matchNb].
len;
30681 for ( ; pos <= end ; pos++ ) {
30683 U32 const sequencePrice = literalsPrice + matchPrice;
30684 DEBUGLOG(7,
"rPos:%u => set initial price : %.2f",
30685 pos, ZSTD_fCost((
int)sequencePrice));
30686 opt[pos].
mlen = pos;
30687 opt[pos].
off = offBase;
30688 opt[pos].
litlen = litlen;
30689 opt[pos].
price = (
int)sequencePrice;
30696 for (cur = 1; cur <= last_pos; cur++) {
30697 const BYTE*
const inr =
ip + cur;
30699 DEBUGLOG(7,
"cPos:%zi==rPos:%u", inr-istart, cur)
30702 {
U32 const litlen = (opt[cur-1].
mlen == 0) ? opt[cur-1].litlen + 1 : 1;
30703 int const price = opt[cur-1].
price
30707 assert(price < 1000000000);
30708 if (price <= opt[cur].price) {
30709 DEBUGLOG(7,
"cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
30710 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
30711 opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
30714 opt[cur].
litlen = litlen;
30715 opt[cur].
price = price;
30717 DEBUGLOG(7,
"cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
30718 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
30719 opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
30729 assert(cur >= opt[cur].mlen);
30730 if (opt[cur].mlen != 0) {
30731 U32 const prev = cur - opt[cur].
mlen;
30739 if (inr > ilimit)
continue;
30741 if (cur == last_pos)
break;
30745 DEBUGLOG(7,
"move to next rPos:%u : price is <=", cur+1);
30749 assert(opt[cur].price >= 0);
30750 {
U32 const ll0 = (opt[cur].
mlen != 0);
30751 U32 const litlen = (opt[cur].
mlen == 0) ? opt[cur].litlen : 0;
30752 U32 const previousPrice = (
U32)opt[cur].price;
30754 U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
30758 (
U32)(inr-istart), (
U32)(iend-inr));
30761 DEBUGLOG(7,
"rPos:%u : no match found", cur);
30765 {
U32 const maxML = matches[nbMatches-1].
len;
30766 DEBUGLOG(7,
"cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
30767 inr-istart, cur, nbMatches, maxML);
30769 if ( (maxML > sufficient_len)
30771 lastSequence.
mlen = maxML;
30772 lastSequence.
off = matches[nbMatches-1].
off;
30773 lastSequence.
litlen = litlen;
30774 cur -= (opt[cur].
mlen==0) ? opt[cur].litlen : 0;
30777 goto _shortestPath;
30781 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
30782 U32 const offset = matches[matchNb].
off;
30783 U32 const lastML = matches[matchNb].
len;
30784 U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
30787 DEBUGLOG(7,
"testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
30788 matchNb, matches[matchNb].off, lastML, litlen);
30790 for (mlen = lastML; mlen >= startML; mlen--) {
30791 U32 const pos = cur + mlen;
30794 if ((pos > last_pos) || (price < opt[pos].price)) {
30795 DEBUGLOG(7,
"rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
30796 pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
30798 opt[pos].
mlen = mlen;
30799 opt[pos].
off = offset;
30800 opt[pos].
litlen = litlen;
30801 opt[pos].
price = price;
30803 DEBUGLOG(7,
"rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
30804 pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
30805 if (optLevel==0)
break;
30810 lastSequence = opt[last_pos];
30815 assert(opt[0].mlen == 0);
30821 if (lastSequence.
mlen != 0) {
30828 {
U32 const storeEnd = cur + 1;
30829 U32 storeStart = storeEnd;
30832 DEBUGLOG(6,
"start reverse traversal (last_pos:%u, cur:%u)",
30833 last_pos, cur); (
void)last_pos;
30835 DEBUGLOG(6,
"last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
30836 storeEnd, lastSequence.
litlen, lastSequence.
mlen, lastSequence.
off);
30837 opt[storeEnd] = lastSequence;
30838 while (seqPos > 0) {
30841 DEBUGLOG(6,
"sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
30842 seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
30843 opt[storeStart] = opt[seqPos];
30844 seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
30848 DEBUGLOG(6,
"sending selected sequences into seqStore")
30850 for (storePos=storeStart; storePos <= storeEnd; storePos++) {
30852 U32 const mlen = opt[storePos].
mlen;
30853 U32 const offBase = opt[storePos].
off;
30854 U32 const advance = llen + mlen;
30855 DEBUGLOG(6,
"considering seq starting at %zi, llen=%u, mlen=%u",
30856 anchor - istart, (
unsigned)llen, (
unsigned)mlen);
30859 assert(storePos == storeEnd);
30875 return (
size_t)(iend -
anchor);
30894 const void* src,
size_t srcSize)
30896 DEBUGLOG(5,
"ZSTD_compressBlock_btopt");
30912 const void* src,
size_t srcSize)
30944 const void* src,
size_t srcSize)
30972 const void* src,
size_t srcSize)
30979 const void* src,
size_t srcSize)
30986 const void* src,
size_t srcSize)
30993 const void* src,
size_t srcSize)
31002 #ifdef ZSTD_MULTITHREAD
31016 #if defined(_MSC_VER)
31017 # pragma warning(disable : 4204)
31022 #define ZSTDMT_OVERLAPLOG_DEFAULT 0
31039 #define ZSTD_RESIZE_SEQPOOL 0
31042 #if defined(DEBUGLEVEL) && (DEBUGLEVEL>=2) \
31043 && !defined(_MSC_VER) \
31044 && !defined(__MINGW32__)
31046 # include <stdio.h>
31047 # include <unistd.h>
31048 # include <sys/times.h>
31050 # define DEBUG_PRINTHEX(l,p,n) { \
31051 unsigned debug_u; \
31052 for (debug_u=0; debug_u<(n); debug_u++) \
31053 RAWLOG(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
31054 RAWLOG(l, " \n"); \
31057 static unsigned long long GetCurrentClockTimeMicroseconds(
void)
31059 static clock_t _ticksPerSecond = 0;
31060 if (_ticksPerSecond <= 0) _ticksPerSecond = sysconf(_SC_CLK_TCK);
31062 {
struct tms junk; clock_t newTicks = (clock_t) times(&junk);
31063 return ((((
unsigned long long)newTicks)*(1000000))/_ticksPerSecond);
31066 #define MUTEX_WAIT_TIME_DLEVEL 6
31067 #define ZSTD_PTHREAD_MUTEX_LOCK(mutex) { \
31068 if (DEBUGLEVEL >= MUTEX_WAIT_TIME_DLEVEL) { \
31069 unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
31070 ZSTD_pthread_mutex_lock(mutex); \
31071 { unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
31072 unsigned long long const elapsedTime = (afterTime-beforeTime); \
31073 if (elapsedTime > 1000) { \
31074 DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \
31075 elapsedTime, #mutex); \
31078 ZSTD_pthread_mutex_lock(mutex); \
31084 # define ZSTD_PTHREAD_MUTEX_LOCK(m) ZSTD_pthread_mutex_lock(m)
31085 # define DEBUG_PRINTHEX(l,p,n) {}
31105 ZSTD_customMem
cMem;
31113 if (bufPool==NULL)
return NULL;
31121 bufPool->
cMem = cMem;
31128 DEBUGLOG(3,
"ZSTDMT_freeBufferPool (address:%08X)", (
U32)(
size_t)bufPool);
31129 if (!bufPool)
return;
31130 for (u=0; u<bufPool->totalBuffers; u++) {
31131 DEBUGLOG(4,
"free buffer %2u (address:%08X)", u, (
U32)(
size_t)bufPool->bTable[u].start);
31141 size_t const poolSize =
sizeof(*bufPool)
31144 size_t totalBufferSize = 0;
31150 return poolSize + totalBufferSize;
31168 if (srcBufPool==NULL)
return NULL;
31172 { ZSTD_customMem
const cMem = srcBufPool->
cMem;
31173 size_t const bSize = srcBufPool->
bufferSize;
31177 if (newBufPool==NULL)
return newBufPool;
31194 size_t const availBufferSize = buf.
capacity;
31196 if ((availBufferSize >= bSize) & ((availBufferSize>>3) <= bSize)) {
31198 DEBUGLOG(5,
"ZSTDMT_getBuffer: provide buffer %u of size %u",
31204 DEBUGLOG(5,
"ZSTDMT_getBuffer: existing buffer does not meet size conditions => freeing");
31209 DEBUGLOG(5,
"ZSTDMT_getBuffer: create a new buffer");
31215 DEBUGLOG(5,
"ZSTDMT_getBuffer: buffer allocation failure !!");
31217 DEBUGLOG(5,
"ZSTDMT_getBuffer: created buffer of size %u", (
U32)bSize);
31223 #if ZSTD_RESIZE_SEQPOOL
31237 if (start != NULL) {
31240 DEBUGLOG(5,
"ZSTDMT_resizeBuffer: created buffer of size %u", (
U32)bSize);
31243 DEBUGLOG(5,
"ZSTDMT_resizeBuffer: buffer allocation failure !!");
31252 DEBUGLOG(5,
"ZSTDMT_releaseBuffer");
31253 if (buf.
start == NULL)
return;
31257 DEBUGLOG(5,
"ZSTDMT_releaseBuffer: stored buffer of size %u in slot %u",
31264 DEBUGLOG(5,
"ZSTDMT_releaseBuffer: pool capacity reached => freeing ");
31273 #define BUF_POOL_MAX_NB_BUFFERS(nbWorkers) (2*(nbWorkers) + 3)
31277 #define SEQ_POOL_MAX_NB_BUFFERS(nbWorkers) (nbWorkers)
31312 #if ZSTD_RESIZE_SEQPOOL
31332 if (seqPool == NULL)
return NULL;
31355 ZSTD_customMem cMem;
31363 for (cid=0; cid<pool->
totalCCtx; cid++)
31372 ZSTD_customMem cMem)
31377 if (!cctxPool)
return NULL;
31382 cctxPool->
cMem = cMem;
31387 DEBUGLOG(3,
"cctxPool created, with %u workers", nbWorkers);
31394 if (srcPool==NULL)
return NULL;
31395 if (nbWorkers <= srcPool->totalCCtx)
return srcPool;
31397 { ZSTD_customMem
const cMem = srcPool->
cMem;
31407 {
unsigned const nbWorkers = cctxPool->
totalCCtx;
31408 size_t const poolSize =
sizeof(*cctxPool)
31411 size_t totalCCtxSize = 0;
31412 for (u=0; u<nbWorkers; u++) {
31417 return poolSize + totalCCtxSize;
31432 DEBUGLOG(5,
"create one more CCtx");
31438 if (cctx==NULL)
return;
31444 DEBUGLOG(4,
"CCtx pool overflow : free cctx");
31461 ZSTD_CCtx_params params;
31464 unsigned nextJobID;
31476 ZSTD_CCtx_params params,
31478 const void* dict,
size_t const dictSize,
31479 ZSTD_dictContentType_e dictContentType)
31482 if (params.ldmParams.enableLdm == ZSTD_ps_enable) {
31483 DEBUGLOG(4,
"LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10);
31485 assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
31486 assert(params.ldmParams.hashRateLog < 32);
31488 ZSTD_memset(¶ms.ldmParams, 0,
sizeof(params.ldmParams));
31491 if (params.fParams.checksumFlag)
31493 if (params.ldmParams.enableLdm == ZSTD_ps_enable) {
31494 ZSTD_customMem cMem = params.customMem;
31495 unsigned const hashLog = params.ldmParams.hashLog;
31497 unsigned const bucketLog =
31498 params.ldmParams.hashLog - params.ldmParams.bucketSizeLog;
31499 unsigned const prevBucketLog =
31500 serialState->
params.ldmParams.hashLog -
31501 serialState->
params.ldmParams.bucketSizeLog;
31502 size_t const numBuckets = (
size_t)1 << bucketLog;
31524 if (dictSize > 0) {
31525 if (dictContentType == ZSTD_dct_rawContent) {
31526 BYTE const*
const dictEnd = (
const BYTE*)dict + dictSize;
31539 serialState->
params = params;
31540 serialState->
params.jobSize = (
U32)jobSize;
31547 ZSTD_memset(serialState, 0,
sizeof(*serialState));
31557 ZSTD_customMem cMem = serialState->
params.customMem;
31572 while (serialState->
nextJobID < jobID) {
31573 DEBUGLOG(5,
"wait for serialState->cond");
31579 if (serialState->
params.ldmParams.enableLdm == ZSTD_ps_enable) {
31586 &serialState->
ldmState, &seqStore,
31598 if (serialState->
params.fParams.checksumFlag && src.
size > 0)
31606 if (seqStore.
size > 0) {
31608 jobCCtx, seqStore.
seq, seqStore.
size);
31616 unsigned jobID,
size_t cSize)
31621 DEBUGLOG(5,
"Skipping past job %u because of error", jobID);
31656 ZSTD_CCtx_params params;
31658 unsigned long long fullFrameSize;
31660 unsigned frameChecksumNeeded;
31663 #define JOB_ERROR(e) { \
31664 ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); \
31666 ZSTD_pthread_mutex_unlock(&job->job_mutex); \
31674 ZSTD_CCtx_params jobParams = job->
params;
31678 size_t lastCBlockSize = 0;
31682 if (dstBuff.
start == NULL) {
31687 if (jobParams.ldmParams.enableLdm == ZSTD_ps_enable && rawSeqStore.
seq == NULL)
31693 if (job->
jobID != 0) jobParams.fParams.checksumFlag = 0;
31695 jobParams.ldmParams.enableLdm = ZSTD_ps_disable;
31697 jobParams.nbWorkers = 0;
31718 &jobParams, pledgedSrcSize);
31728 DEBUGLOG(5,
"ZSTDMT_compressionJob: flush and overwrite %u bytes of frame header (not first job)", (
U32)hSize);
31734 int const nbChunks = (
int)((job->
src.
size + (chunkSize-1)) / chunkSize);
31740 if (
sizeof(
size_t) >
sizeof(
int))
assert(job->
src.
size < ((
size_t)INT_MAX) * chunkSize);
31741 DEBUGLOG(5,
"ZSTDMT_compressionJob: compress %u bytes in %i blocks", (
U32)job->
src.
size, nbChunks);
31743 for (chunkNb = 1; chunkNb < nbChunks; chunkNb++) {
31750 job->
cSize += cSize;
31751 job->
consumed = chunkSize * chunkNb;
31752 DEBUGLOG(5,
"ZSTDMT_compressionJob: compress new block : cSize==%u bytes (total: %u)",
31759 assert((chunkSize & (chunkSize - 1)) == 0);
31760 if ((nbChunks > 0) | job->
lastJob ) {
31761 size_t const lastBlockSize1 = job->
src.
size & (chunkSize-1);
31762 size_t const lastBlockSize = ((lastBlockSize1==0) & (job->
src.
size>=chunkSize)) ? chunkSize : lastBlockSize1;
31763 size_t const cSize = (job->
lastJob) ?
31767 lastCBlockSize = cSize;
31788 job->
cSize += lastCBlockSize;
31821 #define RSYNC_LENGTH 32
31829 #define RSYNC_MIN_BLOCK_LOG ZSTD_BLOCKSIZELOG_MAX
31830 #define RSYNC_MIN_BLOCK_SIZE (1<<RSYNC_MIN_BLOCK_LOG)
31869 if (jobTable == NULL)
return;
31870 for (jobNb=0; jobNb<nbJobs; jobNb++) {
31883 U32 const nbJobs = 1 << nbJobsLog2;
31888 if (jobTable==NULL)
return NULL;
31889 *nbJobsPtr = nbJobs;
31890 for (jobNb=0; jobNb<nbJobs; jobNb++) {
31894 if (initError != 0) {
31902 U32 nbJobs = nbWorkers + 2;
31907 if (mtctx->
jobs==NULL)
return ERROR(memory_allocation);
31908 assert((nbJobs != 0) && ((nbJobs & (nbJobs - 1)) == 0));
31925 U32 nbJobs = nbWorkers + 2;
31927 DEBUGLOG(3,
"ZSTDMT_createCCtx_advanced (nbWorkers = %u)", nbWorkers);
31929 if (nbWorkers < 1)
return NULL;
31931 if ((cMem.customAlloc!=NULL) ^ (cMem.customFree!=NULL))
31936 if (!mtctx)
return NULL;
31938 mtctx->
cMem = cMem;
31940 if (pool != NULL) {
31949 assert(nbJobs > 0);
assert((nbJobs & (nbJobs - 1)) == 0);
31960 DEBUGLOG(3,
"mt_cctx created, for %u threads", nbWorkers);
31966 #ifdef ZSTD_MULTITHREAD
31982 DEBUGLOG(3,
"ZSTDMT_releaseAllJobResources");
31983 for (jobID=0; jobID <= mtctx->
jobIDMask; jobID++) {
32003 DEBUGLOG(4,
"ZSTDMT_waitForAllJobsCompleted");
32018 if (mtctx==NULL)
return 0;
32036 if (mtctx == NULL)
return 0;
32037 return sizeof(*mtctx)
32055 if (mtctx->
bufPool == NULL)
return ERROR(memory_allocation);
32057 if (mtctx->
cctxPool == NULL)
return ERROR(memory_allocation);
32059 if (mtctx->
seqPool == NULL)
return ERROR(memory_allocation);
32070 U32 const saved_wlog = mtctx->
params.cParams.windowLog;
32072 DEBUGLOG(5,
"ZSTDMT_updateCParams_whileCompressing (level:%i)",
32076 cParams.windowLog = saved_wlog;
32077 mtctx->
params.cParams = cParams;
32087 ZSTD_frameProgression fps;
32088 DEBUGLOG(5,
"ZSTDMT_getFrameProgression");
32091 fps.produced = fps.flushed = mtctx->
produced;
32093 fps.nbActiveWorkers = 0;
32096 DEBUGLOG(6,
"ZSTDMT_getFrameProgression: jobs: from %u to <%u (jobReady:%u)",
32098 for (jobNb = mtctx->
doneJobID ; jobNb < lastJobNb ; jobNb++) {
32099 unsigned const wJobID = jobNb & mtctx->
jobIDMask;
32102 {
size_t const cResult = jobPtr->
cSize;
32103 size_t const produced =
ZSTD_isError(cResult) ? 0 : cResult;
32105 assert(flushed <= produced);
32106 fps.ingested += jobPtr->
src.
size;
32108 fps.produced += produced;
32109 fps.flushed += flushed;
32122 unsigned const jobID = mtctx->
doneJobID;
32123 assert(jobID <= mtctx->nextJobID);
32124 if (jobID == mtctx->
nextJobID)
return 0;
32127 {
unsigned const wJobID = jobID & mtctx->
jobIDMask;
32130 {
size_t const cResult = jobPtr->
cSize;
32131 size_t const produced =
ZSTD_isError(cResult) ? 0 : cResult;
32133 assert(flushed <= produced);
32135 toFlush = produced - flushed;
32159 if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
32163 jobLog =
MAX(21,
ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy) + 3);
32165 jobLog =
MAX(20, params->cParams.windowLog + 2);
32193 assert(0 <= ovlog && ovlog <= 9);
32200 int const overlapRLog = 9 -
ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy);
32201 int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog);
32202 assert(0 <= overlapRLog && overlapRLog <= 8);
32203 if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
32211 assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX);
32212 DEBUGLOG(4,
"overlapLog : %i", params->overlapLog);
32213 DEBUGLOG(4,
"overlap size : %i", 1 << ovLog);
32214 return (ovLog==0) ? 0 : (
size_t)1 << ovLog;
32223 const void* dict,
size_t dictSize, ZSTD_dictContentType_e dictContentType,
32224 const ZSTD_CDict* cdict, ZSTD_CCtx_params params,
32225 unsigned long long pledgedSrcSize)
32227 DEBUGLOG(4,
"ZSTDMT_initCStream_internal (pledgedSrcSize=%u, nbWorkers=%u, cctxPool=%u)",
32232 assert(!((dict) && (cdict)));
32235 if (params.nbWorkers != mtctx->
params.nbWorkers)
32241 DEBUGLOG(4,
"ZSTDMT_initCStream_internal: %u workers", params.nbWorkers);
32254 ZSTD_dlm_byCopy, dictContentType,
32255 params.cParams, mtctx->
cMem);
32261 mtctx->
cdict = cdict;
32272 if (params.rsyncable) {
32279 DEBUGLOG(4,
"rsyncLog = %u", rsyncBits);
32290 size_t const windowSize = mtctx->
params.ldmParams.enableLdm == ZSTD_ps_enable ? (1U << mtctx->
params.cParams.windowLog) : 0;
32300 size_t const nbWorkers =
MAX(mtctx->
params.nbWorkers, 1);
32302 size_t const capacity =
MAX(windowSize, sectionsSize) + slackSize;
32309 return ERROR(memory_allocation);
32326 dict, dictSize, dictContentType))
32327 return ERROR(memory_allocation);
32361 DEBUGLOG(5,
"ZSTDMT_createCompressionJob: will not create new job : table is full");
32368 DEBUGLOG(5,
"ZSTDMT_createCompressionJob: preparing job %u to compress %u bytes with %u preload ",
32404 mtctx->
params.fParams.checksumFlag = 0;
32409 DEBUGLOG(5,
"ZSTDMT_createCompressionJob: creating a last empty block to end frame");
32417 DEBUGLOG(5,
"ZSTDMT_createCompressionJob: posting job %u : %u bytes (end:%u, jobNb == %u (mod:%u))",
32427 DEBUGLOG(5,
"ZSTDMT_createCompressionJob: no worker available for job %u", mtctx->
nextJobID);
32443 DEBUGLOG(5,
"ZSTDMT_flushProduced (blocking:%u , job %u <= %u)",
32453 DEBUGLOG(5,
"job %u is completely consumed (%u == %u) => don't wait for cond, there will be none",
32457 DEBUGLOG(5,
"waiting for something to flush from job %u (currently flushed: %u bytes)",
32463 {
size_t cSize = mtctx->
jobs[wJobID].
cSize;
32464 size_t const srcConsumed = mtctx->
jobs[wJobID].
consumed;
32468 DEBUGLOG(5,
"ZSTDMT_flushProduced: job %u : compression error detected : %s",
32476 if ( (srcConsumed ==
srcSize)
32479 DEBUGLOG(4,
"ZSTDMT_flushProduced: writing checksum : %08X \n", checksum);
32488 DEBUGLOG(5,
"ZSTDMT_flushProduced: Flushing %u bytes from job %u (completion:%u/%u, generated:%u)",
32501 if ( (srcConsumed ==
srcSize)
32503 DEBUGLOG(5,
"Job %u completed (%u bytes), moving to next one",
32506 DEBUGLOG(5,
"dstBuffer released");
32516 if (
srcSize > srcConsumed)
return 1;
32533 unsigned const firstJobID = mtctx->
doneJobID;
32534 unsigned const lastJobID = mtctx->
nextJobID;
32537 for (jobID = firstJobID; jobID < lastJobID; ++jobID) {
32538 unsigned const wJobID = jobID & mtctx->
jobIDMask;
32545 if (consumed < mtctx->jobs[wJobID].src.size) {
32547 if (range.
size == 0) {
32549 range = mtctx->
jobs[wJobID].
src;
32567 if (rangeStart == NULL || bufferStart == NULL)
32571 BYTE const*
const bufferEnd = bufferStart + buffer.
capacity;
32572 BYTE const*
const rangeEnd = rangeStart + range.
size;
32575 if (bufferStart == bufferEnd || rangeStart == rangeEnd)
32578 return bufferStart < rangeEnd && rangeStart < bufferEnd;
32587 DEBUGLOG(5,
"ZSTDMT_doesOverlapWindow");
32593 DEBUGLOG(5,
"extDict [0x%zx, 0x%zx)",
32594 (
size_t)extDict.
start,
32596 DEBUGLOG(5,
"prefix [0x%zx, 0x%zx)",
32597 (
size_t)prefix.
start,
32606 if (mtctx->
params.ldmParams.enableLdm == ZSTD_ps_enable) {
32608 DEBUGLOG(5,
"ZSTDMT_waitForLdmComplete");
32609 DEBUGLOG(5,
"source [0x%zx, 0x%zx)",
32610 (
size_t)buffer.
start,
32614 DEBUGLOG(5,
"Waiting for LDM to finish...");
32617 DEBUGLOG(6,
"Done waiting for LDM to finish");
32634 DEBUGLOG(5,
"ZSTDMT_tryGetInputRange");
32638 if (spaceLeft < target) {
32648 DEBUGLOG(5,
"Waiting for buffer...");
32660 DEBUGLOG(5,
"Waiting for buffer...");
32667 DEBUGLOG(5,
"Using prefix range [%zx, %zx)",
32670 DEBUGLOG(5,
"Using source range [%zx, %zx)",
32671 (
size_t)buffer.
start,
32705 syncPoint.flush = 0;
32706 if (!mtctx->
params.rsyncable)
32749 if ((
hash & hitMask) == hitMask) {
32755 syncPoint.toLoad = 0;
32756 syncPoint.flush = 1;
32769 for (; pos < syncPoint.toLoad; ++pos) {
32778 if ((
hash & hitMask) == hitMask) {
32779 syncPoint.toLoad = pos + 1;
32780 syncPoint.flush = 1;
32805 unsigned forwardInputProgress = 0;
32806 DEBUGLOG(5,
"ZSTDMT_compressStream_generic (endOp=%u, srcSize=%u)",
32813 return ERROR(stage_wrong);
32818 && (input->
size > input->
pos) ) {
32825 DEBUGLOG(5,
"ZSTDMT_tryGetInputRange failed");
32828 DEBUGLOG(5,
"ZSTDMT_tryGetInputRange completed successfully : mtctx->inBuff.buffer.start = %p", mtctx->
inBuff.
buffer.
start);
32836 DEBUGLOG(5,
"ZSTDMT_compressStream_generic: adding %u bytes on top of %u to buffer of size %u",
32841 forwardInputProgress = syncPoint.
toLoad>0;
32866 if (input->
pos < input->
size)
return MAX(remainingToFlush, 1);
32867 DEBUGLOG(5,
"end of ZSTDMT_compressStream_generic: remainingToFlush = %u", (
U32)remainingToFlush);
32868 return remainingToFlush;
32905 #define HUF_DECODER_FAST_TABLELOG 11
32915 #if defined(HUF_FORCE_DECOMPRESS_X1) && \
32916 defined(HUF_FORCE_DECOMPRESS_X2)
32917 #error "Cannot force the use of the X1 and X2 decoders at the same time!"
32925 # define HUF_FAST_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE
32927 # define HUF_FAST_BMI2_ATTRS
32931 # define HUF_EXTERN_C extern "C"
32933 # define HUF_EXTERN_C
32935 #define HUF_ASM_DECL HUF_EXTERN_C
32938 # define HUF_NEED_BMI2_FUNCTION 1
32940 # define HUF_NEED_BMI2_FUNCTION 0
32946 #define HUF_isError ERR_isError
32952 #define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
32953 #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
32966 #define HUF_DGEN(fn) \
32968 static size_t fn##_default( \
32969 void* dst, size_t dstSize, \
32970 const void* cSrc, size_t cSrcSize, \
32971 const HUF_DTable* DTable) \
32973 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
32976 static BMI2_TARGET_ATTRIBUTE size_t fn##_bmi2( \
32977 void* dst, size_t dstSize, \
32978 const void* cSrc, size_t cSrcSize, \
32979 const HUF_DTable* DTable) \
32981 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
32984 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
32985 size_t cSrcSize, HUF_DTable const* DTable, int flags) \
32987 if (flags & HUF_flags_bmi2) { \
32988 return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
32990 return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
32995 #define HUF_DGEN(fn) \
32996 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
32997 size_t cSrcSize, HUF_DTable const* DTable, int flags) \
33000 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
33019 BYTE const lastByte =
ip[7];
33020 size_t const bitsConsumed = lastByte ? 8 -
ZSTD_highbit32(lastByte) : 0;
33022 assert(bitsConsumed <= 8);
33023 assert(
sizeof(
size_t) == 8);
33024 return value << bitsConsumed;
33045 BYTE const* ilimit;
33047 BYTE const* iend[4];
33060 void const* dt = DTable + 1;
33063 const BYTE*
const ilimit = (
const BYTE*)src + 6 + 8;
33075 return ERROR(corruption_detected);
33086 const BYTE*
const istart = (
const BYTE*)src;
33090 size_t const length4 =
srcSize - (length1 + length2 + length3 + 6);
33091 args->iend[0] = istart + 6;
33092 args->iend[1] =
args->iend[0] + length1;
33093 args->iend[2] =
args->iend[1] + length2;
33094 args->iend[3] =
args->iend[2] + length3;
33101 if (length1 < 16 || length2 < 8 || length3 < 8 || length4 < 8)
33103 if (length4 >
srcSize)
return ERROR(corruption_detected);
33113 args->op[1] =
args->op[0] + (dstSize+3)/4;
33114 args->op[2] =
args->op[1] + (dstSize+3)/4;
33115 args->op[3] =
args->op[2] + (dstSize+3)/4;
33118 if (
args->op[3] >= oend)
33137 args->ilimit = ilimit;
33148 if (
args->op[stream] > segmentEnd)
33149 return ERROR(corruption_detected);
33155 if (
args->ip[stream] <
args->iend[stream] - 8)
33156 return ERROR(corruption_detected);
33159 assert(
sizeof(
size_t) == 8);
33162 bit->
start = (
const char*)
args->iend[0];
33164 bit->
ptr = (
const char*)
args->ip[stream];
33170 #ifndef HUF_FORCE_DECOMPRESS_X2
33184 D4 = (
U64)((symbol << 8) + nbBits);
33186 D4 = (
U64)(symbol + (nbBits << 8));
33188 assert(D4 < (1U << 16));
33189 D4 *= 0x0001000100010001ULL;
33200 if (tableLog > targetTableLog)
33202 if (tableLog < targetTableLog) {
33203 U32 const scale = targetTableLog - tableLog;
33206 for (
s = 0;
s < nbSymbols; ++
s) {
33207 huffWeight[
s] += (
BYTE)((huffWeight[
s] == 0) ? 0 : scale);
33213 for (
s = targetTableLog;
s > scale; --
s) {
33214 rankVal[
s] = rankVal[
s - scale];
33216 for (
s = scale;
s > 0; --
s) {
33220 return targetTableLog;
33236 void*
const dtPtr = DTable + 1;
33241 if (
sizeof(*wksp) > wkspSize)
return ERROR(tableLog_tooLarge);
33274 U32 nextRankStart = 0;
33276 int const nLimit = (
int)nbSymbols -
unroll + 1;
33277 for (n=0; n<(
int)tableLog+1; n++) {
33278 U32 const curr = nextRankStart;
33279 nextRankStart += wksp->
rankVal[n];
33282 for (n=0; n < nLimit; n +=
unroll) {
33284 for (u=0; u <
unroll; ++u) {
33289 for (; n < (
int)nbSymbols; ++n) {
33302 int symbol = wksp->
rankVal[0];
33304 for (w=1; w<tableLog+1; ++w) {
33305 int const symbolCount = wksp->
rankVal[w];
33306 int const length = (1 << w) >> 1;
33307 int uStart = rankStart;
33308 BYTE const nbBits = (
BYTE)(tableLog + 1 - w);
33313 for (s=0;
s<symbolCount; ++
s) {
33322 for (s=0;
s<symbolCount; ++
s) {
33332 for (s=0;
s<symbolCount; ++
s) {
33339 for (s=0;
s<symbolCount; ++
s) {
33347 for (s=0;
s<symbolCount; ++
s) {
33349 for (u=0; u <
length; u += 16) {
33360 symbol += symbolCount;
33361 rankStart += symbolCount *
length;
33376 #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
33377 *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
33379 #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
33380 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
33381 HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
33383 #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
33384 if (MEM_64bits()) \
33385 HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
33390 BYTE*
const pStart = p;
33393 if ((pEnd - p) > 3) {
33413 return (
size_t)(pEnd-pStart);
33418 void*
dst,
size_t dstSize,
33419 const void* cSrc,
size_t cSrcSize,
33423 BYTE*
const oend =
op + dstSize;
33424 const void* dtPtr = DTable + 1;
33445 void*
dst,
size_t dstSize,
33446 const void* cSrc,
size_t cSrcSize,
33450 if (cSrcSize < 10)
return ERROR(corruption_detected);
33452 {
const BYTE*
const istart = (
const BYTE*) cSrc;
33454 BYTE*
const oend = ostart + dstSize;
33455 BYTE*
const olimit = oend - 3;
33456 const void*
const dtPtr = DTable + 1;
33467 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
33468 const BYTE*
const istart1 = istart + 6;
33469 const BYTE*
const istart2 = istart1 + length1;
33470 const BYTE*
const istart3 = istart2 + length2;
33471 const BYTE*
const istart4 = istart3 + length3;
33472 const size_t segmentSize = (dstSize+3) / 4;
33473 BYTE*
const opStart2 = ostart + segmentSize;
33474 BYTE*
const opStart3 = opStart2 + segmentSize;
33475 BYTE*
const opStart4 = opStart3 + segmentSize;
33476 BYTE* op1 = ostart;
33477 BYTE* op2 = opStart2;
33478 BYTE* op3 = opStart3;
33479 BYTE* op4 = opStart4;
33484 if (length4 > cSrcSize)
return ERROR(corruption_detected);
33485 if (opStart4 > oend)
return ERROR(corruption_detected);
33486 if (dstSize < 6)
return ERROR(corruption_detected);
33493 if ((
size_t)(oend - op4) >=
sizeof(
size_t)) {
33494 for ( ; (endSignal) & (op4 < olimit) ; ) {
33521 if (op1 > opStart2)
return ERROR(corruption_detected);
33522 if (op2 > opStart3)
return ERROR(corruption_detected);
33523 if (op3 > opStart4)
return ERROR(corruption_detected);
33534 if (!endCheck)
return ERROR(corruption_detected); }
33541 #if HUF_NEED_BMI2_FUNCTION
33543 size_t HUF_decompress4X1_usingDTable_internal_bmi2(
void*
dst,
size_t dstSize,
void const* cSrc,
33551 size_t cSrcSize,
HUF_DTable const* DTable) {
33555 #if ZSTD_ENABLE_ASM_X86_64_BMI2
33567 U16 const*
const dtable = (
U16 const*)
args->dt;
33569 BYTE const*
const ilimit =
args->ilimit;
33586 for (stream = 0; stream < 4; ++stream) {
33587 assert(
op[stream] <= (stream == 3 ? oend :
op[stream + 1]));
33594 size_t const oiters = (
size_t)(oend -
op[3]) / 5;
33598 size_t const iiters = (
size_t)(
ip[0] - ilimit) / 7;
33600 size_t const iters =
MIN(oiters, iiters);
33601 size_t const symbols = iters * 5;
33607 olimit =
op[3] + symbols;
33610 if (
op[3] + 20 > olimit)
33617 for (stream = 1; stream < 4; ++stream) {
33618 if (
ip[stream] <
ip[stream - 1])
33624 for (stream = 1; stream < 4; ++stream) {
33631 for (symbol = 0; symbol < 5; ++symbol) {
33632 for (stream = 0; stream < 4; ++stream) {
33633 int const index = (
int)(bits[stream] >> 53);
33634 int const entry = (
int)dtable[index];
33635 bits[stream] <<= (entry & 63);
33636 op[stream][symbol] = (
BYTE)((entry >> 8) & 0xFF);
33640 for (stream = 0; stream < 4; ++stream) {
33642 int const nbBits = ctz & 7;
33643 int const nbBytes = ctz >> 3;
33645 ip[stream] -= nbBytes;
33647 bits[stream] <<= nbBits;
33649 }
while (
op[3] < olimit);
33668 void*
dst,
size_t dstSize,
33669 const void* cSrc,
size_t cSrcSize,
33673 void const* dt = DTable + 1;
33674 const BYTE*
const iend = (
const BYTE*)cSrc + 6;
33697 {
size_t const segmentSize = (dstSize+3) / 4;
33700 for (i = 0; i < 4; ++i) {
33702 if (segmentSize <= (
size_t)(oend - segmentEnd))
33703 segmentEnd += segmentSize;
33709 if (
args.op[i] != segmentEnd)
return ERROR(corruption_detected);
33718 HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
33721 size_t cSrcSize,
HUF_DTable const* DTable,
int flags)
33728 fallbackFn = HUF_decompress4X1_usingDTable_internal_bmi2;
33729 # if ZSTD_ENABLE_ASM_X86_64_BMI2
33731 loopFn = HUF_decompress4X1_usingDTable_internal_fast_asm_loop;
33735 return fallbackFn(
dst, dstSize, cSrc, cSrcSize, DTable);
33739 #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
33741 loopFn = HUF_decompress4X1_usingDTable_internal_fast_asm_loop;
33750 return fallbackFn(
dst, dstSize, cSrc, cSrcSize, DTable);
33754 const void* cSrc,
size_t cSrcSize,
33755 void* workSpace,
size_t wkspSize,
int flags)
33761 if (hSize >= cSrcSize)
return ERROR(srcSize_wrong);
33762 ip += hSize; cSrcSize -= hSize;
33770 #ifndef HUF_FORCE_DECOMPRESS_X1
33792 seq = level == 1 ? symbol : (baseSeq + (symbol << 8));
33793 return seq + (nbBits << 16) + ((
U32)level << 24);
33795 seq = level == 1 ? (symbol << 8) : ((baseSeq << 8) + symbol);
33796 return (seq << 16) + (nbBits << 8) + (
U32)level;
33818 return (
U64)DElt + ((
U64)DElt << 32);
33836 U32 nbBits,
U32 tableLog,
33837 U16 baseSeq,
int const level)
33839 U32 const length = 1U << ((tableLog - nbBits) & 0x1F );
33841 assert(level >= 1 && level <= 2);
33846 *DTableRank++ = DElt;
33852 DTableRank[0] = DElt;
33853 DTableRank[1] = DElt;
33860 ZSTD_memcpy(DTableRank + 0, &DEltX2,
sizeof(DEltX2));
33861 ZSTD_memcpy(DTableRank + 2, &DEltX2,
sizeof(DEltX2));
33868 ZSTD_memcpy(DTableRank + 0, &DEltX2,
sizeof(DEltX2));
33869 ZSTD_memcpy(DTableRank + 2, &DEltX2,
sizeof(DEltX2));
33870 ZSTD_memcpy(DTableRank + 4, &DEltX2,
sizeof(DEltX2));
33871 ZSTD_memcpy(DTableRank + 6, &DEltX2,
sizeof(DEltX2));
33879 for (; DTableRank != DTableRankEnd; DTableRank += 8) {
33880 ZSTD_memcpy(DTableRank + 0, &DEltX2,
sizeof(DEltX2));
33881 ZSTD_memcpy(DTableRank + 2, &DEltX2,
sizeof(DEltX2));
33882 ZSTD_memcpy(DTableRank + 4, &DEltX2,
sizeof(DEltX2));
33883 ZSTD_memcpy(DTableRank + 6, &DEltX2,
sizeof(DEltX2));
33893 const U32* rankVal,
const int minWeight,
const int maxWeight1,
33895 U32 nbBitsBaseline,
U16 baseSeq)
33902 U32 const length = 1U << ((targetLog - consumedBits) & 0x1F );
33904 int const skipSize = rankVal[minWeight];
33914 ZSTD_memcpy(DTable + 0, &DEltX2,
sizeof(DEltX2));
33915 ZSTD_memcpy(DTable + 2, &DEltX2,
sizeof(DEltX2));
33920 for (i = 0; i < skipSize; i += 8) {
33921 ZSTD_memcpy(DTable + i + 0, &DEltX2,
sizeof(DEltX2));
33922 ZSTD_memcpy(DTable + i + 2, &DEltX2,
sizeof(DEltX2));
33923 ZSTD_memcpy(DTable + i + 4, &DEltX2,
sizeof(DEltX2));
33924 ZSTD_memcpy(DTable + i + 6, &DEltX2,
sizeof(DEltX2));
33933 for (w = minWeight; w < maxWeight1; ++w) {
33934 int const begin = rankStart[w];
33935 int const end = rankStart[w+1];
33936 U32 const nbBits = nbBitsBaseline - w;
33937 U32 const totalBits = nbBits + consumedBits;
33939 DTable + rankVal[w],
33940 sortedSymbols + begin, sortedSymbols + end,
33941 totalBits, targetLog,
33950 const U32 nbBitsBaseline)
33952 U32*
const rankVal = rankValOrigin[0];
33953 const int scaleLog = nbBitsBaseline - targetLog;
33954 const U32 minBits = nbBitsBaseline - maxWeight;
33956 int const wEnd = (
int)maxWeight + 1;
33959 for (w = 1; w < wEnd; ++w) {
33960 int const begin = (
int)rankStart[w];
33961 int const end = (
int)rankStart[w+1];
33962 U32 const nbBits = nbBitsBaseline - w;
33964 if (targetLog-nbBits >= minBits) {
33966 int start = rankVal[w];
33967 U32 const length = 1U << ((targetLog - nbBits) & 0x1F );
33968 int minWeight = nbBits + scaleLog;
33970 if (minWeight < 1) minWeight = 1;
33974 for (s = begin;
s != end; ++
s) {
33976 DTable + start, targetLog, nbBits,
33977 rankValOrigin[nbBits], minWeight, wEnd,
33978 sortedList, rankStart,
33979 nbBitsBaseline, sortedList[s].symbol);
33985 DTable + rankVal[w],
33986 sortedList + begin, sortedList + end,
34003 const void* src,
size_t srcSize,
34004 void* workSpace,
size_t wkspSize,
int flags)
34006 U32 tableLog, maxW, nbSymbols;
34010 void* dtPtr = DTable+1;
34016 if (
sizeof(*wksp) > wkspSize)
return ERROR(GENERIC);
34030 if (tableLog > maxTableLog)
return ERROR(tableLog_tooLarge);
34034 for (maxW = tableLog; wksp->
rankStats[maxW]==0; maxW--) {}
34037 {
U32 w, nextRankStart = 0;
34038 for (w=1; w<maxW+1; w++) {
34039 U32 curr = nextRankStart;
34041 rankStart[w] = curr;
34043 rankStart[0] = nextRankStart;
34044 rankStart[maxW+1] = nextRankStart;
34049 for (s=0;
s<nbSymbols;
s++) {
34051 U32 const r = rankStart[w]++;
34059 {
int const rescale = (maxTableLog-tableLog) - 1;
34060 U32 nextRankVal = 0;
34062 for (w=1; w<maxW+1; w++) {
34063 U32 curr = nextRankVal;
34064 nextRankVal += wksp->
rankStats[w] << (w+rescale);
34065 rankVal0[w] = curr;
34067 {
U32 const minBits = tableLog+1 - maxW;
34069 for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
34070 U32*
const rankValPtr = wksp->
rankVal[consumed];
34072 for (w = 1; w < maxW+1; w++) {
34073 rankValPtr[w] = rankVal0[w] >> consumed;
34102 if (dt[val].length==1) {
34115 #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
34116 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
34118 #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
34119 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
34120 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
34122 #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
34123 if (MEM_64bits()) \
34124 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
34130 BYTE*
const pStart = p;
34133 if ((
size_t)(pEnd - p) >=
sizeof(bitDPtr->
bitContainer)) {
34157 if ((
size_t)(pEnd - p) >= 2) {
34161 while (p <= pEnd-2)
34173 void*
dst,
size_t dstSize,
34174 const void* cSrc,
size_t cSrcSize,
34184 BYTE*
const oend = ostart + dstSize;
34185 const void*
const dtPtr = DTable+1;
34204 void*
dst,
size_t dstSize,
34205 const void* cSrc,
size_t cSrcSize,
34208 if (cSrcSize < 10)
return ERROR(corruption_detected);
34210 {
const BYTE*
const istart = (
const BYTE*) cSrc;
34212 BYTE*
const oend = ostart + dstSize;
34213 BYTE*
const olimit = oend - (
sizeof(
size_t)-1);
34214 const void*
const dtPtr = DTable+1;
34225 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
34226 const BYTE*
const istart1 = istart + 6;
34227 const BYTE*
const istart2 = istart1 + length1;
34228 const BYTE*
const istart3 = istart2 + length2;
34229 const BYTE*
const istart4 = istart3 + length3;
34230 size_t const segmentSize = (dstSize+3) / 4;
34231 BYTE*
const opStart2 = ostart + segmentSize;
34232 BYTE*
const opStart3 = opStart2 + segmentSize;
34233 BYTE*
const opStart4 = opStart3 + segmentSize;
34234 BYTE* op1 = ostart;
34235 BYTE* op2 = opStart2;
34236 BYTE* op3 = opStart3;
34237 BYTE* op4 = opStart4;
34242 if (length4 > cSrcSize)
return ERROR(corruption_detected);
34243 if (opStart4 > oend)
return ERROR(corruption_detected);
34244 if (dstSize < 6)
return ERROR(corruption_detected);
34251 if ((
size_t)(oend - op4) >=
sizeof(
size_t)) {
34252 for ( ; (endSignal) & (op4 < olimit); ) {
34253 #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
34301 if (op1 > opStart2)
return ERROR(corruption_detected);
34302 if (op2 > opStart3)
return ERROR(corruption_detected);
34303 if (op3 > opStart4)
return ERROR(corruption_detected);
34314 if (!endCheck)
return ERROR(corruption_detected); }
34321 #if HUF_NEED_BMI2_FUNCTION
34323 size_t HUF_decompress4X2_usingDTable_internal_bmi2(
void*
dst,
size_t dstSize,
void const* cSrc,
34331 size_t cSrcSize,
HUF_DTable const* DTable) {
34335 #if ZSTD_ENABLE_ASM_X86_64_BMI2
34349 BYTE const*
const ilimit =
args->ilimit;
34359 oend[3] =
args->oend;
34371 for (stream = 0; stream < 4; ++stream) {
34372 assert(
op[stream] <= oend[stream]);
34386 size_t iters = (
size_t)(
ip[0] - ilimit) / 7;
34391 for (stream = 0; stream < 4; ++stream) {
34392 size_t const oiters = (
size_t)(oend[stream] -
op[stream]) / 10;
34393 iters =
MIN(iters, oiters);
34402 olimit =
op[3] + (iters * 5);
34405 if (
op[3] + 10 > olimit)
34412 for (stream = 1; stream < 4; ++stream) {
34413 if (
ip[stream] <
ip[stream - 1])
34419 for (stream = 1; stream < 4; ++stream) {
34426 for (symbol = 0; symbol < 5; ++symbol) {
34427 for (stream = 0; stream < 3; ++stream) {
34428 int const index = (
int)(bits[stream] >> 53);
34431 bits[stream] <<= (entry.
nbBits);
34437 int const index = (
int)(bits[3] >> 53);
34440 bits[3] <<= (entry.
nbBits);
34444 for (stream = 0; stream < 4; ++stream) {
34451 int const index = (
int)(bits[3] >> 53);
34454 bits[3] <<= (entry.
nbBits);
34462 int const nbBits = ctz & 7;
34463 int const nbBytes = ctz >> 3;
34464 ip[stream] -= nbBytes;
34466 bits[stream] <<= nbBits;
34469 }
while (
op[3] < olimit);
34483 void*
dst,
size_t dstSize,
34484 const void* cSrc,
size_t cSrcSize,
34487 void const* dt = DTable + 1;
34488 const BYTE*
const iend = (
const BYTE*)cSrc + 6;
34511 size_t const segmentSize = (dstSize+3) / 4;
34514 for (i = 0; i < 4; ++i) {
34516 if (segmentSize <= (
size_t)(oend - segmentEnd))
34517 segmentEnd += segmentSize;
34522 if (
args.op[i] != segmentEnd)
34523 return ERROR(corruption_detected);
34532 size_t cSrcSize,
HUF_DTable const* DTable,
int flags)
34539 fallbackFn = HUF_decompress4X2_usingDTable_internal_bmi2;
34540 # if ZSTD_ENABLE_ASM_X86_64_BMI2
34542 loopFn = HUF_decompress4X2_usingDTable_internal_fast_asm_loop;
34546 return fallbackFn(
dst, dstSize, cSrc, cSrcSize, DTable);
34550 #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
34552 loopFn = HUF_decompress4X2_usingDTable_internal_fast_asm_loop;
34561 return fallbackFn(
dst, dstSize, cSrc, cSrcSize, DTable);
34564 HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
34567 const void* cSrc,
size_t cSrcSize,
34568 void* workSpace,
size_t wkspSize,
int flags)
34573 workSpace, wkspSize, flags);
34575 if (hSize >= cSrcSize)
return ERROR(srcSize_wrong);
34576 ip += hSize; cSrcSize -= hSize;
34578 return HUF_decompress1X2_usingDTable_internal(
dst, dstSize,
ip, cSrcSize, DCtx, flags);
34582 const void* cSrc,
size_t cSrcSize,
34583 void* workSpace,
size_t wkspSize,
int flags)
34588 workSpace, wkspSize, flags);
34590 if (hSize >= cSrcSize)
return ERROR(srcSize_wrong);
34591 ip += hSize; cSrcSize -= hSize;
34604 #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
34611 {{ 150,216}, { 381,119}},
34612 {{ 170,205}, { 514,112}},
34613 {{ 177,199}, { 539,110}},
34614 {{ 197,194}, { 644,107}},
34615 {{ 221,192}, { 735,107}},
34616 {{ 256,189}, { 881,106}},
34617 {{ 359,188}, {1167,109}},
34618 {{ 582,187}, {1570,114}},
34619 {{ 688,187}, {1712,122}},
34620 {{ 825,186}, {1965,136}},
34621 {{ 976,185}, {2131,150}},
34622 {{1180,186}, {2070,175}},
34623 {{1377,185}, {1731,202}},
34624 {{1412,185}, {1695,202}},
34636 assert(dstSize <= 128*1024);
34637 #if defined(HUF_FORCE_DECOMPRESS_X1)
34641 #elif defined(HUF_FORCE_DECOMPRESS_X2)
34647 {
U32 const Q = (cSrcSize >= dstSize) ? 15 : (
U32)(cSrcSize * 16 / dstSize);
34648 U32 const D256 = (
U32)(dstSize >> 8);
34651 DTime1 += DTime1 >> 5;
34652 return DTime1 < DTime0;
34658 const void* cSrc,
size_t cSrcSize,
34659 void* workSpace,
size_t wkspSize,
int flags)
34662 if (dstSize == 0)
return ERROR(dstSize_tooSmall);
34663 if (cSrcSize > dstSize)
return ERROR(corruption_detected);
34664 if (cSrcSize == dstSize) {
ZSTD_memcpy(
dst, cSrc, dstSize);
return dstSize; }
34665 if (cSrcSize == 1) {
ZSTD_memset(
dst, *(
const BYTE*)cSrc, dstSize);
return dstSize; }
34668 #if defined(HUF_FORCE_DECOMPRESS_X1)
34672 cSrcSize, workSpace, wkspSize, flags);
34673 #elif defined(HUF_FORCE_DECOMPRESS_X2)
34677 cSrcSize, workSpace, wkspSize, flags);
34680 cSrcSize, workSpace, wkspSize, flags):
34682 cSrcSize, workSpace, wkspSize, flags);
34691 #if defined(HUF_FORCE_DECOMPRESS_X1)
34694 return HUF_decompress1X1_usingDTable_internal(
dst,
maxDstSize, cSrc, cSrcSize, DTable, flags);
34695 #elif defined(HUF_FORCE_DECOMPRESS_X2)
34698 return HUF_decompress1X2_usingDTable_internal(
dst,
maxDstSize, cSrc, cSrcSize, DTable, flags);
34701 HUF_decompress1X1_usingDTable_internal(
dst,
maxDstSize, cSrc, cSrcSize, DTable, flags);
34705 #ifndef HUF_FORCE_DECOMPRESS_X2
34712 if (hSize >= cSrcSize)
return ERROR(srcSize_wrong);
34713 ip += hSize; cSrcSize -= hSize;
34715 return HUF_decompress1X1_usingDTable_internal(
dst, dstSize,
ip, cSrcSize, dctx, flags);
34722 #if defined(HUF_FORCE_DECOMPRESS_X1)
34726 #elif defined(HUF_FORCE_DECOMPRESS_X2)
34739 if (dstSize == 0)
return ERROR(dstSize_tooSmall);
34740 if (cSrcSize == 0)
return ERROR(corruption_detected);
34743 #if defined(HUF_FORCE_DECOMPRESS_X1)
34747 #elif defined(HUF_FORCE_DECOMPRESS_X2)
34779 #define FSE_STATIC_LINKING_ONLY
34797 #ifndef ZSTD_DECOMPRESS_INTERNAL_H
34798 #define ZSTD_DECOMPRESS_INTERNAL_H
34813 0, 1, 2, 3, 4, 5, 6, 7,
34814 8, 9, 10, 11, 12, 13, 14, 15,
34815 16, 18, 20, 22, 24, 28, 32, 40,
34816 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
34817 0x2000, 0x4000, 0x8000, 0x10000 };
34820 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
34821 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
34822 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
34823 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
34826 0, 1, 2, 3, 4, 5, 6, 7,
34827 8, 9, 10, 11, 12, 13, 14, 15,
34828 16, 17, 18, 19, 20, 21, 22, 23,
34829 24, 25, 26, 27, 28, 29, 30, 31 };
34832 3, 4, 5, 6, 7, 8, 9, 10,
34833 11, 12, 13, 14, 15, 16, 17, 18,
34834 19, 20, 21, 22, 23, 24, 25, 26,
34835 27, 28, 29, 30, 31, 32, 33, 34,
34836 35, 37, 39, 41, 43, 47, 51, 59,
34837 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
34838 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
34851 BYTE nbAdditionalBits;
34856 #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log)))
34858 #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
34859 #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
34860 #define ZSTD_HUFFDTABLE_CAPACITY_LOG 12
34888 size_t ddictPtrTableSize;
34889 size_t ddictPtrCount;
34892 #ifndef ZSTD_DECODER_INTERNAL_BUFFER
34893 # define ZSTD_DECODER_INTERNAL_BUFFER (1 << 16)
34896 #define ZSTD_LBMIN 64
34897 #define ZSTD_LBMAX (128 << 10)
34900 #define ZSTD_LITBUFFEREXTRASIZE BOUNDED(ZSTD_LBMIN, ZSTD_DECODER_INTERNAL_BUFFER, ZSTD_LBMAX)
34938 #if DYNAMIC_BMI2 != 0
34963 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
34964 void* legacyContext;
34982 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
34983 void const* dictContentBeginForFuzzing;
34984 void const* dictContentEndForFuzzing;
34989 ZSTD_TraceCtx traceCtx;
34994 #if DYNAMIC_BMI2 != 0
35010 const void*
const dict,
size_t const dictSize);
35034 #ifndef ZSTD_DDICT_H
35035 #define ZSTD_DDICT_H
35069 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
35070 #error Using excluded file: ../legacy/zstd_legacy.h (re-amalgamate source to fix)
35085 ZSTD_customMem
cMem;
35102 DEBUGLOG(4,
"ZSTD_copyDDictParameters");
35110 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
35111 dctx->dictContentBeginForFuzzing = dctx->
prefixStart;
35133 ZSTD_dictContentType_e dictContentType)
35137 if (dictContentType == ZSTD_dct_rawContent)
return 0;
35140 if (dictContentType == ZSTD_dct_fullDict)
35141 return ERROR(dictionary_corrupted);
35146 if (dictContentType == ZSTD_dct_fullDict)
35147 return ERROR(dictionary_corrupted);
35156 dictionary_corrupted,
"");
35163 const void* dict,
size_t dictSize,
35164 ZSTD_dictLoadMethod_e dictLoadMethod,
35165 ZSTD_dictContentType_e dictContentType)
35167 if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
35170 if (!dict) dictSize = 0;
35175 if (!internalBuffer)
return ERROR(memory_allocation);
35188 ZSTD_dictLoadMethod_e dictLoadMethod,
35189 ZSTD_dictContentType_e dictContentType,
35190 ZSTD_customMem customMem)
35192 if ((!customMem.customAlloc) ^ (!customMem.customFree))
return NULL;
35195 if (ddict == NULL)
return NULL;
35196 ddict->
cMem = customMem;
35199 dictLoadMethod, dictContentType);
35214 ZSTD_customMem
const allocator = { NULL, NULL, NULL };
35224 ZSTD_customMem
const allocator = { NULL, NULL, NULL };
35230 void* sBuffer,
size_t sBufferSize,
35231 const void* dict,
size_t dictSize,
35232 ZSTD_dictLoadMethod_e dictLoadMethod,
35233 ZSTD_dictContentType_e dictContentType)
35235 size_t const neededSpace =
sizeof(
ZSTD_DDict)
35236 + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
35238 assert(sBuffer != NULL);
35240 if ((
size_t)sBuffer & 7)
return NULL;
35241 if (sBufferSize < neededSpace)
return NULL;
35242 if (dictLoadMethod == ZSTD_dlm_byCopy) {
35248 ZSTD_dlm_byRef, dictContentType) ))
35256 if (ddict==NULL)
return 0;
35257 { ZSTD_customMem
const cMem = ddict->
cMem;
35269 return sizeof(
ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
35274 if (ddict==NULL)
return 0;
35284 if (ddict==NULL)
return 0;
35309 #ifndef ZSTD_HEAPMODE
35310 # define ZSTD_HEAPMODE 1
35317 #ifndef ZSTD_LEGACY_SUPPORT
35318 # define ZSTD_LEGACY_SUPPORT 0
35327 #ifndef ZSTD_MAXWINDOWSIZE_DEFAULT
35328 # define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + 1)
35338 #ifndef ZSTD_NO_FORWARD_PROGRESS_MAX
35339 # define ZSTD_NO_FORWARD_PROGRESS_MAX 16
35349 #define FSE_STATIC_LINKING_ONLY
35368 #ifndef ZSTD_DEC_BLOCK_H
35369 #define ZSTD_DEC_BLOCK_H
35405 void*
dst,
size_t dstCapacity,
35418 const short* normalizedCounter,
unsigned maxSymbolValue,
35419 const U32* baseValue,
const U8* nbAdditionalBits,
35420 unsigned tableLog,
void* wksp,
size_t wkspSize,
35425 void*
dst,
size_t dstCapacity,
35426 const void* src,
size_t srcSize);
35433 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
35434 #error Using excluded file: ../legacy/zstd_legacy.h (re-amalgamate source to fix)
35443 #define DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT 4
35444 #define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3
35450 #define DDICT_HASHSET_TABLE_BASE_SIZE 64
35451 #define DDICT_HASHSET_RESIZE_FACTOR 2
35471 DEBUGLOG(4,
"Hashed index: for dictID: %u is %zu", dictID, idx);
35475 DEBUGLOG(4,
"DictID already exists, replacing rather than adding");
35479 idx &= idxRangeMask;
35482 DEBUGLOG(4,
"Final idx after probing for dictID %u is: %zu", dictID, idx);
35499 DEBUGLOG(4,
"Expanding DDict hash table! Old size: %zu new size: %zu", oldTableSize, newTableSize);
35500 RETURN_ERROR_IF(!newTable, memory_allocation,
"Expanded hashset allocation failed!");
35504 for (i = 0; i < oldTableSize; ++i) {
35505 if (oldTable[i] != NULL) {
35520 DEBUGLOG(4,
"Hashed index: for dictID: %u is %zu", dictID, idx);
35523 if (currDictID == dictID || currDictID == 0) {
35527 idx &= idxRangeMask;
35531 DEBUGLOG(4,
"Final idx after probing for dictID %u is: %zu", dictID, idx);
35541 DEBUGLOG(4,
"Allocating new hash set");
35545 if (!
ret->ddictPtrTable) {
35550 ret->ddictPtrCount = 0;
35558 DEBUGLOG(4,
"Freeing ddict hash set");
35584 if (dctx==NULL)
return 0;
35585 return sizeof(*dctx)
35595 size_t const startingInputLength = ZSTD_FRAMEHEADERSIZE_PREFIX(
format);
35598 return startingInputLength;
35604 dctx->
format = ZSTD_f_zstd1;
35615 dctx->
ddict = NULL;
35624 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
35625 dctx->legacyContext = NULL;
35626 dctx->previousLegacyVersion = 0;
35635 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
35636 dctx->dictContentEndForFuzzing = NULL;
35644 if ((
size_t)workspace & 7)
return NULL;
35645 if (workspaceSize <
sizeof(
ZSTD_DCtx))
return NULL;
35649 dctx->
inBuff = (
char*)(dctx+1);
35654 if ((!customMem.customAlloc) ^ (!customMem.customFree))
return NULL;
35657 if (!dctx)
return NULL;
35679 dctx->
ddict = NULL;
35685 if (dctx==NULL)
return 0;
35687 { ZSTD_customMem
const cMem = dctx->
customMem;
35691 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
35692 if (dctx->legacyContext)
35693 ZSTD_freeLegacyStreamContext(dctx->legacyContext, dctx->previousLegacyVersion);
35707 size_t const toCopy = (
size_t)((
char*)(&dstDCtx->
inBuff) - (
char*)dstDCtx);
35721 DEBUGLOG(4,
"Adjusting DDict based on requested dict ID from frame");
35728 dctx->
ddict = frameDDict;
35751 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
35752 if (ZSTD_isLegacy(buffer,
size))
return 1;
35780 {
BYTE const fhd = ((
const BYTE*)src)[minInputSize-1];
35781 U32 const dictID= fhd & 3;
35782 U32 const singleSegment = (fhd >> 5) & 1;
35783 U32 const fcsId = fhd >> 6;
35784 return minInputSize + !singleSegment
35786 + (singleSegment && !fcsId);
35811 DEBUGLOG(5,
"ZSTD_getFrameHeader_advanced: minInputSize = %zu, srcSize = %zu", minInputSize,
srcSize);
35815 RETURN_ERROR_IF(src==NULL, GENERIC,
"invalid parameter : src==NULL, but srcSize>0");
35817 if (
srcSize < minInputSize) {
35833 "first bytes don't correspond to any supported magic number");
35835 return minInputSize;
35839 if ( (
format != ZSTD_f_zstd1_magicless)
35843 if (
srcSize < ZSTD_SKIPPABLEHEADERSIZE)
35844 return ZSTD_SKIPPABLEHEADERSIZE;
35847 zfhPtr->frameType = ZSTD_skippableFrame;
35855 if (
srcSize < fhsize)
return fhsize;
35856 zfhPtr->headerSize = (
U32)fhsize;
35859 {
BYTE const fhdByte =
ip[minInputSize-1];
35860 size_t pos = minInputSize;
35861 U32 const dictIDSizeCode = fhdByte&3;
35862 U32 const checksumFlag = (fhdByte>>2)&1;
35863 U32 const singleSegment = (fhdByte>>5)&1;
35864 U32 const fcsID = fhdByte>>6;
35865 U64 windowSize = 0;
35869 "reserved bits, must be zero");
35871 if (!singleSegment) {
35872 BYTE const wlByte =
ip[pos++];
35874 RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge,
"");
35875 windowSize = (1ULL << windowLog);
35876 windowSize += (windowSize >> 3) * (wlByte&7);
35878 switch(dictIDSizeCode)
35884 case 1 : dictID =
ip[pos]; pos++;
break;
35893 case 0 :
if (singleSegment) frameContentSize =
ip[pos];
break;
35898 if (singleSegment) windowSize = frameContentSize;
35900 zfhPtr->frameType = ZSTD_frame;
35901 zfhPtr->frameContentSize = frameContentSize;
35902 zfhPtr->windowSize = windowSize;
35904 zfhPtr->dictID = dictID;
35905 zfhPtr->checksumFlag = checksumFlag;
35928 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
35929 if (ZSTD_isLegacy(src,
srcSize)) {
35930 unsigned long long const ret = ZSTD_getDecompressedSize_legacy(src,
srcSize);
35934 { ZSTD_frameHeader zfh;
35937 if (zfh.frameType == ZSTD_skippableFrame) {
35940 return zfh.frameContentSize;
35946 size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE;
35953 frameParameter_unsupported,
"");
35954 {
size_t const skippableSize = skippableHeaderSize + sizeU32;
35956 return skippableSize;
35972 unsigned* magicVariant,
35973 const void* src,
size_t srcSize)
35979 size_t skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE;
35983 RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE || skippableFrameSize >
srcSize, srcSize_wrong,
"");
35984 RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall,
"");
35987 if (skippableContentSize > 0 &&
dst != NULL)
35988 ZSTD_memcpy(
dst, (
const BYTE *)src + ZSTD_SKIPPABLEHEADERSIZE, skippableContentSize);
35989 if (magicVariant != NULL)
35991 return skippableContentSize;
36002 unsigned long long totalDstSize = 0;
36012 src = (
const BYTE *)src + skippableSize;
36020 if (totalDstSize + fcs < totalDstSize)
36022 totalDstSize += fcs;
36029 src = (
const BYTE *)src + frameSrcSize;
36036 return totalDstSize;
36070 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
36075 dictionary_wrong,
"");
36088 return frameSizeInfo;
36096 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
36097 if (ZSTD_isLegacy(src,
srcSize))
36098 return ZSTD_findFrameSizeInfoLegacy(src,
srcSize);
36101 if ((
srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
36106 return frameSizeInfo;
36109 const BYTE*
const ipstart =
ip;
36110 size_t remainingSize =
srcSize;
36111 size_t nbBlocks = 0;
36112 ZSTD_frameHeader zfh;
36122 ip += zfh.headerSize;
36123 remainingSize -= zfh.headerSize;
36143 if (zfh.checksumFlag) {
36144 if (remainingSize < 4)
36149 frameSizeInfo.
nbBlocks = nbBlocks;
36152 ? zfh.frameContentSize
36153 : (
unsigned long long)nbBlocks * zfh.blockSizeMax;
36154 return frameSizeInfo;
36177 unsigned long long bound = 0;
36188 bound += decompressedBound;
36196 unsigned maxBlockSize = 0;
36203 ZSTD_frameHeader zfh;
36207 return ERROR(corruption_detected);
36209 if (zfh.frameType == ZSTD_frame) {
36211 margin += zfh.headerSize;
36213 margin += zfh.checksumFlag ? 4 : 0;
36215 margin += 3 * frameSizeInfo.
nbBlocks;
36218 maxBlockSize =
MAX(maxBlockSize, zfh.blockSizeMax);
36220 assert(zfh.frameType == ZSTD_skippableFrame);
36231 margin += maxBlockSize;
36244 DEBUGLOG(5,
"ZSTD_insertBlock: %u bytes", (
unsigned)blockSize);
36252 const void* src,
size_t srcSize)
36270 if (regenSize == 0)
return 0;
36280 if (dctx->traceCtx && ZSTD_trace_decompress_end != NULL) {
36284 trace.streaming = streaming;
36290 trace.uncompressedSize = (
size_t)uncompressedSize;
36293 ZSTD_trace_decompress_end(dctx->traceCtx, &trace);
36297 (
void)uncompressedSize;
36309 void*
dst,
size_t dstCapacity,
36310 const void** srcPtr,
size_t *srcSizePtr)
36312 const BYTE*
const istart = (
const BYTE*)(*srcPtr);
36313 const BYTE*
ip = istart;
36315 BYTE*
const oend = dstCapacity != 0 ? ostart + dstCapacity : ostart;
36317 size_t remainingSrcSize = *srcSizePtr;
36319 DEBUGLOG(4,
"ZSTD_decompressFrame (srcSize:%i)", (
int)*srcSizePtr);
36324 srcSize_wrong,
"");
36329 if (
ZSTD_isError(frameHeaderSize))
return frameHeaderSize;
36331 srcSize_wrong,
"");
36333 ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize;
36338 BYTE* oBlockEnd = oend;
36339 size_t decodedSize;
36348 if (
ip >=
op &&
ip < oBlockEnd) {
36362 oBlockEnd =
op + (
ip -
op);
36379 RETURN_ERROR(corruption_detected,
"invalid block type");
36385 if (decodedSize != 0)
36389 remainingSrcSize -= cBlockSize;
36395 corruption_detected,
"");
36397 if (dctx->
fParams.checksumFlag) {
36406 remainingSrcSize -= 4;
36410 DEBUGLOG(4,
"ZSTD_decompressFrame: decompressed frame of size %zi, consuming %zi bytes of input",
op-ostart,
ip - (
const BYTE*)*srcPtr);
36412 *srcSizePtr = remainingSrcSize;
36413 return (
size_t)(
op-ostart);
36417 void*
dst,
size_t dstCapacity,
36418 const void* src,
size_t srcSize,
36419 const void* dict,
size_t dictSize,
36422 void*
const dststart =
dst;
36423 int moreThan1Frame = 0;
36425 DEBUGLOG(5,
"ZSTD_decompressMultiFrame");
36426 assert(dict==NULL || ddict==NULL);
36435 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
36436 if (ZSTD_isLegacy(src,
srcSize)) {
36437 size_t decodedSize;
36438 size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src,
srcSize);
36441 "legacy support is not compatible with static dctx");
36443 decodedSize = ZSTD_decompressLegacy(
dst, dstCapacity, src, frameSize, dict, dictSize);
36446 assert(decodedSize <= dstCapacity);
36448 dstCapacity -= decodedSize;
36450 src = (
const BYTE*)src + frameSize;
36459 DEBUGLOG(5,
"reading magic number %08X", (
unsigned)magicNumber);
36466 src = (
const BYTE *)src + skippableSize;
36485 && (moreThan1Frame==1),
36487 "At least one frame successfully completed, "
36488 "but following bytes are garbage: "
36489 "it's more likely to be a srcSize error, "
36490 "specifying more input bytes than size of frame(s). "
36491 "Note: one could be unlucky, it might be a corruption error instead, "
36492 "happening right at the place where we expect zstd magic bytes. "
36493 "But this is _much_ less likely than a srcSize field error.");
36495 assert(res <= dstCapacity);
36498 dstCapacity -= res;
36500 moreThan1Frame = 1;
36509 void*
dst,
size_t dstCapacity,
36510 const void* src,
size_t srcSize,
36511 const void* dict,
size_t dictSize)
36527 return dctx->
ddict;
36542 #if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1)
36582 switch(dctx->
stage)
36590 return ZSTDnit_frameHeader;
36592 return ZSTDnit_blockHeader;
36594 return ZSTDnit_block;
36596 return ZSTDnit_lastBlock;
36598 return ZSTDnit_checksum;
36602 return ZSTDnit_skippableFrame;
36614 DEBUGLOG(5,
"ZSTD_decompressContinue (srcSize:%u)", (
unsigned)
srcSize);
36621 switch (dctx->
stage)
36625 if (dctx->
format == ZSTD_f_zstd1) {
36652 RETURN_ERROR_IF(cBlockSize > dctx->
fParams.blockSizeMax, corruption_detected,
"Block Size Exceeds Maximum");
36662 if (dctx->
fParams.checksumFlag) {
36678 DEBUGLOG(5,
"ZSTD_decompressContinue: case ZSTDds_decompressBlock");
36680 switch(dctx->
bType)
36683 DEBUGLOG(5,
"ZSTD_decompressContinue: case bt_compressed");
36688 assert(srcSize <= dctx->expected);
36700 RETURN_ERROR(corruption_detected,
"invalid block type");
36703 RETURN_ERROR_IF(rSize > dctx->
fParams.blockSizeMax, corruption_detected,
"Decompressed Block Size Exceeds Maximum");
36704 DEBUGLOG(5,
"ZSTD_decompressContinue: decoded size from block : %u", (
unsigned)rSize);
36715 DEBUGLOG(4,
"ZSTD_decompressContinue: decoded size from frame : %u", (
unsigned)dctx->
decodedSize);
36719 corruption_detected,
"");
36720 if (dctx->
fParams.checksumFlag) {
36741 DEBUGLOG(4,
"ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (
unsigned)h32, (
unsigned)check32);
36776 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
36788 const void*
const dict,
size_t const dictSize)
36790 const BYTE* dictPtr = (
const BYTE*)dict;
36791 const BYTE*
const dictEnd = dictPtr + dictSize;
36793 RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted,
"dict is too small");
36800 {
void*
const workspace = &entropy->
LLTable;
36801 size_t const workspaceSize =
sizeof(entropy->
LLTable) +
sizeof(entropy->
OFTable) +
sizeof(entropy->
MLTable);
36802 #ifdef HUF_FORCE_DECOMPRESS_X1
36805 dictPtr, dictEnd - dictPtr,
36806 workspace, workspaceSize, 0);
36809 dictPtr, (
size_t)(dictEnd - dictPtr),
36810 workspace, workspaceSize, 0);
36816 {
short offcodeNCount[
MaxOff+1];
36817 unsigned offcodeMaxValue =
MaxOff, offcodeLog;
36818 size_t const offcodeHeaderSize =
FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, (
size_t)(dictEnd-dictPtr));
36823 offcodeNCount, offcodeMaxValue,
36828 dictPtr += offcodeHeaderSize;
36831 {
short matchlengthNCount[
MaxML+1];
36832 unsigned matchlengthMaxValue =
MaxML, matchlengthLog;
36833 size_t const matchlengthHeaderSize =
FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, (
size_t)(dictEnd-dictPtr));
36838 matchlengthNCount, matchlengthMaxValue,
36843 dictPtr += matchlengthHeaderSize;
36846 {
short litlengthNCount[
MaxLL+1];
36847 unsigned litlengthMaxValue =
MaxLL, litlengthLog;
36848 size_t const litlengthHeaderSize =
FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, (
size_t)(dictEnd-dictPtr));
36853 litlengthNCount, litlengthMaxValue,
36858 dictPtr += litlengthHeaderSize;
36863 size_t const dictContentSize = (
size_t)(dictEnd - (dictPtr+12));
36864 for (i=0; i<3; i++) {
36867 dictionary_corrupted,
"");
36868 entropy->
rep[i] = rep;
36871 return (
size_t)(dictPtr - (
const BYTE*)dict);
36886 dict = (
const char*)dict + eSize;
36899 dctx->traceCtx = (ZSTD_trace_decompress_begin != NULL) ? ZSTD_trace_decompress_begin(dctx) : 0;
36925 if (dict && dictSize)
36928 dictionary_corrupted,
"");
36937 DEBUGLOG(4,
"ZSTD_decompressBegin_usingDDict");
36942 const void*
const dictEnd = dictStart + dictSize;
36960 if (dictSize < 8)
return 0;
36980 ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0, 0, 0 };
36991 void*
dst,
size_t dstCapacity,
36992 const void* src,
size_t srcSize,
37008 DEBUGLOG(3,
"ZSTD_createDStream");
37034 const void* dict,
size_t dictSize,
37035 ZSTD_dictLoadMethod_e dictLoadMethod,
37036 ZSTD_dictContentType_e dictContentType)
37040 if (dict && dictSize != 0) {
37077 DEBUGLOG(4,
"ZSTD_initDStream_usingDict");
37097 DEBUGLOG(4,
"ZSTD_initDStream_usingDDict");
37119 dctx->
ddict = ddict;
37125 RETURN_ERROR(memory_allocation,
"Failed to allocate memory for hash set!");
37142 size_t const max = (
size_t)1 << bounds.
upperBound;
37163 case ZSTD_d_format:
37168 case ZSTD_d_stableOutBuffer:
37172 case ZSTD_d_forceIgnoreChecksum:
37176 case ZSTD_d_refMultipleDDicts:
37180 case ZSTD_d_disableHuffmanAssembly:
37187 bounds.
error =
ERROR(parameter_unsupported);
37203 #define CHECK_DBOUNDS(p,v) { \
37204 RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound, ""); \
37213 case ZSTD_d_format:
37216 case ZSTD_d_stableOutBuffer:
37219 case ZSTD_d_forceIgnoreChecksum:
37222 case ZSTD_d_refMultipleDDicts:
37225 case ZSTD_d_disableHuffmanAssembly:
37238 if (value == 0)
value = ZSTD_WINDOWLOG_LIMIT_DEFAULT;
37242 case ZSTD_d_format:
37244 dctx->
format = (ZSTD_format_e)value;
37246 case ZSTD_d_stableOutBuffer:
37250 case ZSTD_d_forceIgnoreChecksum:
37254 case ZSTD_d_refMultipleDDicts:
37257 RETURN_ERROR(parameter_unsupported,
"Static dctx does not support multiple DDicts!");
37261 case ZSTD_d_disableHuffmanAssembly:
37297 unsigned long long const neededSize =
MIN(frameContentSize, neededRBSize);
37298 size_t const minRBSize = (
size_t) neededSize;
37300 frameParameter_windowTooLarge,
"");
37307 size_t const inBuffSize = blockSize;
37314 U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX;
37315 ZSTD_frameHeader zfh;
37320 frameParameter_windowTooLarge,
"");
37360 RETURN_ERROR(dstBuffer_wrong,
"ZSTD_d_stableOutBuffer enabled but output differs!");
37370 void const* src,
size_t srcSize) {
37377 if (!decodedSize && !isSkipFrame) {
37385 size_t const dstSize = isSkipFrame ? 0 : (
size_t)(oend - *
op);
37388 *
op += decodedSize;
37399 const char*
const src = (
const char*)input->
src;
37400 const char*
const istart = input->
pos != 0 ? src + input->
pos : src;
37401 const char*
const iend = input->
size != 0 ? src + input->
size : src;
37402 const char*
ip = istart;
37407 U32 someMoreWork = 1;
37409 DEBUGLOG(5,
"ZSTD_decompressStream");
37413 "forbidden. in: pos: %u vs size: %u",
37418 "forbidden. out: pos: %u vs size: %u",
37423 while (someMoreWork) {
37427 DEBUGLOG(5,
"stage zdss_init => transparent reset ");
37430 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
37431 zds->legacyVersion = 0;
37438 DEBUGLOG(5,
"stage zdss_loadHeader (srcSize : %u)", (
U32)(iend -
ip));
37439 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
37440 if (zds->legacyVersion) {
37442 "legacy support is incompatible with static dctx");
37443 {
size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion,
output, input);
37453 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
37454 U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart);
37455 if (legacyVersion) {
37459 DEBUGLOG(5,
"ZSTD_decompressStream: detected legacy version v0.%u", legacyVersion);
37461 "legacy support is incompatible with static dctx");
37463 zds->previousLegacyVersion, legacyVersion,
37464 dict, dictSize),
"");
37465 zds->legacyVersion = zds->previousLegacyVersion = legacyVersion;
37466 {
size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, legacyVersion,
output, input);
37474 size_t const toLoad = hSize - zds->
lhSize;
37475 size_t const remainingInput = (
size_t)(iend-
ip);
37477 if (toLoad > remainingInput) {
37478 if (remainingInput > 0) {
37480 zds->
lhSize += remainingInput;
37486 "First few bytes detected incorrect" );
37497 && zds->
fParams.frameType != ZSTD_skippableFrame
37498 && (
U64)(
size_t)(oend-
op) >= zds->
fParams.frameContentSize) {
37500 if (cSize <= (
size_t)(iend-istart)) {
37503 if (
ZSTD_isError(decompressedSize))
return decompressedSize;
37504 DEBUGLOG(4,
"shortcut to single-pass ZSTD_decompress_usingDDict()")
37506 ip = istart + cSize;
37507 op =
op ?
op + decompressedSize :
op;
37516 && zds->fParams.frameType != ZSTD_skippableFrame
37518 && (
U64)(
size_t)(oend-
op) < zds->fParams.frameContentSize) {
37519 RETURN_ERROR(dstSize_tooSmall,
"ZSTD_obm_stable passed but ZSTD_outBuffer is too small");
37536 DEBUGLOG(4,
"Control max memory usage (%u KB <= max %u KB)",
37541 frameParameter_windowTooLarge,
"");
37544 {
size_t const neededInBuffSize =
MAX(zds->
fParams.blockSizeMax, 4 );
37551 {
int const tooSmall = (zds->
inBuffSize < neededInBuffSize) || (zds->
outBuffSize < neededOutBuffSize);
37554 if (tooSmall || tooLarge) {
37555 size_t const bufferSize = neededInBuffSize + neededOutBuffSize;
37556 DEBUGLOG(4,
"inBuff : from %u to %u",
37558 DEBUGLOG(4,
"outBuff : from %u to %u",
37565 memory_allocation,
"");
37583 DEBUGLOG(5,
"neededInSize = %u", (
U32)neededInSize);
37584 if (neededInSize==0) {
37589 if ((
size_t)(iend-
ip) >= neededInSize) {
37592 ip += neededInSize;
37596 if (
ip==iend) { someMoreWork = 0;
break; }
37602 size_t const toLoad = neededInSize - zds->
inPos;
37608 loadedSize =
MIN(toLoad, (
size_t)(iend-
ip));
37611 corruption_detected,
37612 "should never happen");
37615 if (loadedSize != 0) {
37618 zds->
inPos += loadedSize;
37620 if (loadedSize < toLoad) { someMoreWork = 0;
break; }
37636 if (flushedSize == toFlushSize) {
37640 DEBUGLOG(5,
"restart filling outBuff from beginning (left:%i, needed:%u)",
37663 if ((
ip==istart) && (
op==ostart)) {
37674 if (!nextSrcSizeHint) {
37677 if (input->
pos >= input->
size) {
37694 nextSrcSizeHint -= zds->
inPos;
37695 return nextSrcSizeHint;
37701 void*
dst,
size_t dstCapacity,
size_t* dstPos,
37702 const void* src,
size_t srcSize,
size_t* srcPos)
37707 output.size = dstCapacity;
37711 input.
pos = *srcPos;
37714 *srcPos = input.
pos;
37740 #define FSE_STATIC_LINKING_ONLY
37757 #if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
37758 defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
37759 #error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!"
37781 U32 const cSize = cBlockHeader >> 3;
37793 const streaming_operation streaming,
const size_t expectedWriteSize,
const unsigned splitImmediately)
37805 if (splitImmediately) {
37839 const void* src,
size_t srcSize,
37842 DEBUGLOG(5,
"ZSTD_decodeLiteralsBlock");
37845 {
const BYTE*
const istart = (
const BYTE*) src;
37851 DEBUGLOG(5,
"set_repeat flag : re-using stats from previous compressed literals block");
37857 { size_t lhSize, litSize, litCSize;
37858 U32 singleStream=0;
37859 U32 const lhlCode = (istart[0] >> 2) & 3;
37860 U32 const lhc = MEM_readLE32(istart);
37862 size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
37863 int const flags = 0
37864 | (ZSTD_DCtx_get_bmi2(dctx) ? HUF_flags_bmi2 : 0)
37865 | (dctx->disableHufAsm ? HUF_flags_disableAsm : 0);
37868 case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */
37869 /* 2 - 2 - 10 - 10 */
37870 singleStream = !lhlCode;
37872 litSize = (lhc >> 4) & 0x3FF;
37873 litCSize = (lhc >> 14) & 0x3FF;
37876 /* 2 - 2 - 14 - 14 */
37878 litSize = (lhc >> 4) & 0x3FFF;
37879 litCSize = lhc >> 18;
37882 /* 2 - 2 - 18 - 18 */
37884 litSize = (lhc >> 4) & 0x3FFFF;
37885 litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
37888 RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled
");
37889 RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
37891 RETURN_ERROR_IF(litSize < MIN_LITERALS_FOR_4_STREAMS, literals_headerWrong,
37892 "Not enough
literals (%zu)
for the 4-streams mode (min %u)
",
37893 litSize, MIN_LITERALS_FOR_4_STREAMS);
37894 RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
37895 RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
37896 ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0);
37898 /* prefetch huffman table if cold */
37899 if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
37900 PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
37903 if (litEncType==set_repeat) {
37904 if (singleStream) {
37905 hufSuccess = HUF_decompress1X_usingDTable(
37906 dctx->litBuffer, litSize, istart+lhSize, litCSize,
37907 dctx->HUFptr, flags);
37909 assert(litSize >= MIN_LITERALS_FOR_4_STREAMS);
37910 hufSuccess = HUF_decompress4X_usingDTable(
37911 dctx->litBuffer, litSize, istart+lhSize, litCSize,
37912 dctx->HUFptr, flags);
37915 if (singleStream) {
37916 #if defined(HUF_FORCE_DECOMPRESS_X2)
37917 hufSuccess = HUF_decompress1X_DCtx_wksp(
37918 dctx->entropy.hufTable, dctx->litBuffer, litSize,
37919 istart+lhSize, litCSize, dctx->workspace,
37920 sizeof(dctx->workspace), flags);
37922 hufSuccess = HUF_decompress1X1_DCtx_wksp(
37923 dctx->entropy.hufTable, dctx->litBuffer, litSize,
37924 istart+lhSize, litCSize, dctx->workspace,
37925 sizeof(dctx->workspace), flags);
37928 hufSuccess = HUF_decompress4X_hufOnly_wksp(
37929 dctx->entropy.hufTable, dctx->litBuffer, litSize,
37930 istart+lhSize, litCSize, dctx->workspace,
37931 sizeof(dctx->workspace), flags);
37934 if (dctx->litBufferLocation == ZSTD_split)
37936 ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
37937 ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE);
37938 dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
37939 dctx->litBufferEnd -= WILDCOPY_OVERLENGTH;
37942 RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
37944 dctx->litPtr = dctx->litBuffer;
37945 dctx->litSize = litSize;
37946 dctx->litEntropy = 1;
37947 if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
37948 return litCSize + lhSize;
37952 { size_t litSize, lhSize;
37953 U32 const lhlCode = ((istart[0]) >> 2) & 3;
37954 size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
37957 case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
37959 litSize = istart[0] >> 3;
37963 litSize = MEM_readLE16(istart) >> 4;
37967 RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >=
MIN_CBLOCK_SIZE == 2; here we need lhSize = 3
");
37968 litSize = MEM_readLE24(istart) >> 4;
37972 RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled
");
37973 RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
37974 ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
37975 if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
37976 RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
37977 if (dctx->litBufferLocation == ZSTD_split)
37979 ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize - ZSTD_LITBUFFEREXTRASIZE);
37980 ZSTD_memcpy(dctx->litExtraBuffer, istart + lhSize + litSize - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
37984 ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize);
37986 dctx->litPtr = dctx->litBuffer;
37987 dctx->litSize = litSize;
37988 return lhSize+litSize;
37990 /* direct reference into compressed stream */
37991 dctx->litPtr = istart+lhSize;
37992 dctx->litSize = litSize;
37993 dctx->litBufferEnd = dctx->litPtr + litSize;
37994 dctx->litBufferLocation = ZSTD_not_in_dst;
37995 return lhSize+litSize;
37999 { U32 const lhlCode = ((istart[0]) >> 2) & 3;
38000 size_t litSize, lhSize;
38001 size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
38004 case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
38006 litSize = istart[0] >> 3;
38010 RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >=
MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 3
");
38011 litSize = MEM_readLE16(istart) >> 4;
38015 RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >=
MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 4
");
38016 litSize = MEM_readLE24(istart) >> 4;
38019 RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled
");
38020 RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
38021 RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
38022 ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
38023 if (dctx->litBufferLocation == ZSTD_split)
38025 ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize - ZSTD_LITBUFFEREXTRASIZE);
38026 ZSTD_memset(dctx->litExtraBuffer, istart[lhSize], ZSTD_LITBUFFEREXTRASIZE);
38030 ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize);
38032 dctx->litPtr = dctx->litBuffer;
38033 dctx->litSize = litSize;
38037 RETURN_ERROR(corruption_detected, "impossible
");
38042 /* Default FSE distribution tables.
38043 * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
38044 * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
38045 * They were generated programmatically with following method :
38046 * - start from default distributions, present in /lib/common/zstd_internal.h
38047 * - generate tables normally, using ZSTD_buildFSETable()
38048 * - printout the content of tables
38049 * - pretify output, report below, test with fuzzer to ensure it's correct */
38051 /* Default FSE distribution table for Literal Lengths */
38052 static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
38053 { 1, 1, 1, LL_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
38054 /* nextState, nbAddBits, nbBits, baseVal */
38055 { 0, 0, 4, 0}, { 16, 0, 4, 0},
38056 { 32, 0, 5, 1}, { 0, 0, 5, 3},
38057 { 0, 0, 5, 4}, { 0, 0, 5, 6},
38058 { 0, 0, 5, 7}, { 0, 0, 5, 9},
38059 { 0, 0, 5, 10}, { 0, 0, 5, 12},
38060 { 0, 0, 6, 14}, { 0, 1, 5, 16},
38061 { 0, 1, 5, 20}, { 0, 1, 5, 22},
38062 { 0, 2, 5, 28}, { 0, 3, 5, 32},
38063 { 0, 4, 5, 48}, { 32, 6, 5, 64},
38064 { 0, 7, 5, 128}, { 0, 8, 6, 256},
38065 { 0, 10, 6, 1024}, { 0, 12, 6, 4096},
38066 { 32, 0, 4, 0}, { 0, 0, 4, 1},
38067 { 0, 0, 5, 2}, { 32, 0, 5, 4},
38068 { 0, 0, 5, 5}, { 32, 0, 5, 7},
38069 { 0, 0, 5, 8}, { 32, 0, 5, 10},
38070 { 0, 0, 5, 11}, { 0, 0, 6, 13},
38071 { 32, 1, 5, 16}, { 0, 1, 5, 18},
38072 { 32, 1, 5, 22}, { 0, 2, 5, 24},
38073 { 32, 3, 5, 32}, { 0, 3, 5, 40},
38074 { 0, 6, 4, 64}, { 16, 6, 4, 64},
38075 { 32, 7, 5, 128}, { 0, 9, 6, 512},
38076 { 0, 11, 6, 2048}, { 48, 0, 4, 0},
38077 { 16, 0, 4, 1}, { 32, 0, 5, 2},
38078 { 32, 0, 5, 3}, { 32, 0, 5, 5},
38079 { 32, 0, 5, 6}, { 32, 0, 5, 8},
38080 { 32, 0, 5, 9}, { 32, 0, 5, 11},
38081 { 32, 0, 5, 12}, { 0, 0, 6, 15},
38082 { 32, 1, 5, 18}, { 32, 1, 5, 20},
38083 { 32, 2, 5, 24}, { 32, 2, 5, 28},
38084 { 32, 3, 5, 40}, { 32, 4, 5, 48},
38085 { 0, 16, 6,65536}, { 0, 15, 6,32768},
38086 { 0, 14, 6,16384}, { 0, 13, 6, 8192},
38087 }; /* LL_defaultDTable */
38089 /* Default FSE distribution table for Offset Codes */
38090 static const ZSTD_seqSymbol OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
38091 { 1, 1, 1, OF_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
38092 /* nextState, nbAddBits, nbBits, baseVal */
38093 { 0, 0, 5, 0}, { 0, 6, 4, 61},
38094 { 0, 9, 5, 509}, { 0, 15, 5,32765},
38095 { 0, 21, 5,2097149}, { 0, 3, 5, 5},
38096 { 0, 7, 4, 125}, { 0, 12, 5, 4093},
38097 { 0, 18, 5,262141}, { 0, 23, 5,8388605},
38098 { 0, 5, 5, 29}, { 0, 8, 4, 253},
38099 { 0, 14, 5,16381}, { 0, 20, 5,1048573},
38100 { 0, 2, 5, 1}, { 16, 7, 4, 125},
38101 { 0, 11, 5, 2045}, { 0, 17, 5,131069},
38102 { 0, 22, 5,4194301}, { 0, 4, 5, 13},
38103 { 16, 8, 4, 253}, { 0, 13, 5, 8189},
38104 { 0, 19, 5,524285}, { 0, 1, 5, 1},
38105 { 16, 6, 4, 61}, { 0, 10, 5, 1021},
38106 { 0, 16, 5,65533}, { 0, 28, 5,268435453},
38107 { 0, 27, 5,134217725}, { 0, 26, 5,67108861},
38108 { 0, 25, 5,33554429}, { 0, 24, 5,16777213},
38109 }; /* OF_defaultDTable */
38112 /* Default FSE distribution table for Match Lengths */
38113 static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
38114 { 1, 1, 1, ML_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
38115 /* nextState, nbAddBits, nbBits, baseVal */
38116 { 0, 0, 6, 3}, { 0, 0, 4, 4},
38117 { 32, 0, 5, 5}, { 0, 0, 5, 6},
38118 { 0, 0, 5, 8}, { 0, 0, 5, 9},
38119 { 0, 0, 5, 11}, { 0, 0, 6, 13},
38120 { 0, 0, 6, 16}, { 0, 0, 6, 19},
38121 { 0, 0, 6, 22}, { 0, 0, 6, 25},
38122 { 0, 0, 6, 28}, { 0, 0, 6, 31},
38123 { 0, 0, 6, 34}, { 0, 1, 6, 37},
38124 { 0, 1, 6, 41}, { 0, 2, 6, 47},
38125 { 0, 3, 6, 59}, { 0, 4, 6, 83},
38126 { 0, 7, 6, 131}, { 0, 9, 6, 515},
38127 { 16, 0, 4, 4}, { 0, 0, 4, 5},
38128 { 32, 0, 5, 6}, { 0, 0, 5, 7},
38129 { 32, 0, 5, 9}, { 0, 0, 5, 10},
38130 { 0, 0, 6, 12}, { 0, 0, 6, 15},
38131 { 0, 0, 6, 18}, { 0, 0, 6, 21},
38132 { 0, 0, 6, 24}, { 0, 0, 6, 27},
38133 { 0, 0, 6, 30}, { 0, 0, 6, 33},
38134 { 0, 1, 6, 35}, { 0, 1, 6, 39},
38135 { 0, 2, 6, 43}, { 0, 3, 6, 51},
38136 { 0, 4, 6, 67}, { 0, 5, 6, 99},
38137 { 0, 8, 6, 259}, { 32, 0, 4, 4},
38138 { 48, 0, 4, 4}, { 16, 0, 4, 5},
38139 { 32, 0, 5, 7}, { 32, 0, 5, 8},
38140 { 32, 0, 5, 10}, { 32, 0, 5, 11},
38141 { 0, 0, 6, 14}, { 0, 0, 6, 17},
38142 { 0, 0, 6, 20}, { 0, 0, 6, 23},
38143 { 0, 0, 6, 26}, { 0, 0, 6, 29},
38144 { 0, 0, 6, 32}, { 0, 16, 6,65539},
38145 { 0, 15, 6,32771}, { 0, 14, 6,16387},
38146 { 0, 13, 6, 8195}, { 0, 12, 6, 4099},
38147 { 0, 11, 6, 2051}, { 0, 10, 6, 1027},
38148 }; /* ML_defaultDTable */
38151 static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U8 nbAddBits)
38154 ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
38155 ZSTD_seqSymbol* const cell = dt + 1;
38157 DTableH->tableLog = 0;
38158 DTableH->fastMode = 0;
38161 cell->nextState = 0;
38162 assert(nbAddBits < 255);
38163 cell->nbAdditionalBits = nbAddBits;
38164 cell->baseValue = baseValue;
38168 /* ZSTD_buildFSETable() :
38169 * generate FSE decoding table for one symbol (ll, ml or off)
38170 * cannot fail if input is valid =>
38171 * all inputs are presumed validated at this stage */
38172 FORCE_INLINE_TEMPLATE
38173 void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
38174 const short* normalizedCounter, unsigned maxSymbolValue,
38175 const U32* baseValue, const U8* nbAdditionalBits,
38176 unsigned tableLog, void* wksp, size_t wkspSize)
38178 ZSTD_seqSymbol* const tableDecode = dt+1;
38179 U32 const maxSV1 = maxSymbolValue + 1;
38180 U32 const tableSize = 1 << tableLog;
38182 U16* symbolNext = (U16*)wksp;
38183 BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1);
38184 U32 highThreshold = tableSize - 1;
38187 /* Sanity Checks */
38188 assert(maxSymbolValue <= MaxSeq);
38189 assert(tableLog <= MaxFSELog);
38190 assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
38192 /* Init, lay down lowprob symbols */
38193 { ZSTD_seqSymbol_header DTableH;
38194 DTableH.tableLog = tableLog;
38195 DTableH.fastMode = 1;
38196 { S16 const largeLimit= (S16)(1 << (tableLog-1));
38198 for (s=0; s<maxSV1; s++) {
38199 if (normalizedCounter[s]==-1) {
38200 tableDecode[highThreshold--].baseValue = s;
38203 if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
38204 assert(normalizedCounter[s]>=0);
38205 symbolNext[s] = (U16)normalizedCounter[s];
38207 ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
38210 /* Spread symbols */
38211 assert(tableSize <= 512);
38212 /* Specialized symbol spreading for the case when there are
38213 * no low probability (-1 count) symbols. When compressing
38214 * small blocks we avoid low probability symbols to hit this
38215 * case, since header decoding speed matters more.
38217 if (highThreshold == tableSize - 1) {
38218 size_t const tableMask = tableSize-1;
38219 size_t const step = FSE_TABLESTEP(tableSize);
38220 /* First lay down the symbols in order.
38221 * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
38222 * misses since small blocks generally have small table logs, so nearly
38223 * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
38224 * our buffer to handle the over-write.
38227 U64 const add = 0x0101010101010101ull;
38231 for (s=0; s<maxSV1; ++s, sv += add) {
38233 int const n = normalizedCounter[s];
38234 MEM_write64(spread + pos, sv);
38235 for (i = 8; i < n; i += 8) {
38236 MEM_write64(spread + pos + i, sv);
38242 /* Now we spread those positions across the table.
38243 * The benefit of doing it in two stages is that we avoid the
38244 * variable size inner loop, which caused lots of branch misses.
38245 * Now we can run through all the positions without any branch misses.
38246 * We unroll the loop twice, since that is what empirically worked best.
38249 size_t position = 0;
38251 size_t const unroll = 2;
38252 assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
38253 for (s = 0; s < (size_t)tableSize; s += unroll) {
38255 for (u = 0; u < unroll; ++u) {
38256 size_t const uPosition = (position + (u * step)) & tableMask;
38257 tableDecode[uPosition].baseValue = spread[s + u];
38259 position = (position + (unroll * step)) & tableMask;
38261 assert(position == 0);
38264 U32 const tableMask = tableSize-1;
38265 U32 const step = FSE_TABLESTEP(tableSize);
38266 U32 s, position = 0;
38267 for (s=0; s<maxSV1; s++) {
38269 int const n = normalizedCounter[s];
38270 for (i=0; i<n; i++) {
38271 tableDecode[position].baseValue = s;
38272 position = (position + step) & tableMask;
38273 while (UNLIKELY(position > highThreshold)) position = (position + step) & tableMask; /* lowprob area */
38275 assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
38278 /* Build Decoding table */
38281 for (u=0; u<tableSize; u++) {
38282 U32 const symbol = tableDecode[u].baseValue;
38283 U32 const nextState = symbolNext[symbol]++;
38284 tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );
38285 tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
38286 assert(nbAdditionalBits[symbol] < 255);
38287 tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];
38288 tableDecode[u].baseValue = baseValue[symbol];
38293 /* Avoids the FORCE_INLINE of the _body() function. */
38294 static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
38295 const short* normalizedCounter, unsigned maxSymbolValue,
38296 const U32* baseValue, const U8* nbAdditionalBits,
38297 unsigned tableLog, void* wksp, size_t wkspSize)
38299 ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
38300 baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
38304 BMI2_TARGET_ATTRIBUTE static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
38305 const short* normalizedCounter, unsigned maxSymbolValue,
38306 const U32* baseValue, const U8* nbAdditionalBits,
38307 unsigned tableLog, void* wksp, size_t wkspSize)
38309 ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
38310 baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
38314 void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
38315 const short* normalizedCounter, unsigned maxSymbolValue,
38316 const U32* baseValue, const U8* nbAdditionalBits,
38317 unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
38321 ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
38322 baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
38327 ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
38328 baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
38335 static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
38336 symbolEncodingType_e type, unsigned max, U32 maxLog,
38337 const void* src, size_t srcSize,
38338 const U32* baseValue, const U8* nbAdditionalBits,
38339 const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
38340 int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
38346 RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
38347 RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
38348 { U32 const symbol = *(const BYTE*)src;
38349 U32 const baseline = baseValue[symbol];
38350 U8 const nbBits = nbAdditionalBits[symbol];
38351 ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
38353 *DTablePtr = DTableSpace;
38356 *DTablePtr = defaultTable;
38359 RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
38360 /* prefetch FSE table if used */
38361 if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
38362 const void* const pStart = *DTablePtr;
38363 size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
38364 PREFETCH_AREA(pStart, pSize);
38367 case set_compressed :
38368 { unsigned tableLog;
38369 S16 norm[MaxSeq+1];
38370 size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
38371 RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
38372 RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
38373 ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
38374 *DTablePtr = DTableSpace;
38379 RETURN_ERROR(GENERIC, "impossible
");
38383 size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
38384 const void* src, size_t srcSize)
38386 const BYTE* const istart = (const BYTE*)src;
38387 const BYTE* const iend = istart + srcSize;
38388 const BYTE* ip = istart;
38393 RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
38399 RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
38402 if (nbSeq > 0x7F) {
38403 if (nbSeq == 0xFF) {
38404 RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
38405 nbSeq = MEM_readLE16(ip) + LONGNBSEQ;
38408 RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
38409 nbSeq = ((nbSeq-0x80)<<8) + *ip++;
38414 /* FSE table descriptors */
38415 RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
38416 { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
38417 symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
38418 symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
38421 /* Build DTables */
38422 { size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
38423 LLtype, MaxLL, LLFSELog,
38426 LL_defaultDTable, dctx->fseEntropy,
38427 dctx->ddictIsCold, nbSeq,
38428 dctx->workspace, sizeof(dctx->workspace),
38429 ZSTD_DCtx_get_bmi2(dctx));
38430 RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed
");
38434 { size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
38435 OFtype, MaxOff, OffFSELog,
38438 OF_defaultDTable, dctx->fseEntropy,
38439 dctx->ddictIsCold, nbSeq,
38440 dctx->workspace, sizeof(dctx->workspace),
38441 ZSTD_DCtx_get_bmi2(dctx));
38442 RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed
");
38446 { size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
38447 MLtype, MaxML, MLFSELog,
38450 ML_defaultDTable, dctx->fseEntropy,
38451 dctx->ddictIsCold, nbSeq,
38452 dctx->workspace, sizeof(dctx->workspace),
38453 ZSTD_DCtx_get_bmi2(dctx));
38465 size_t matchLength;
38471 const ZSTD_seqSymbol* table;
38475 BIT_DStream_t DStream;
38476 ZSTD_fseState stateLL;
38477 ZSTD_fseState stateOffb;
38478 ZSTD_fseState stateML;
38479 size_t prevOffset[ZSTD_REP_NUM];
38489 HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
38490 assert(*ip <= *op);
38492 /* close range match, overlap */
38493 static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
38494 static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
38495 int const sub2 = dec64table[offset];
38496 (*op)[0] = (*ip)[0];
38497 (*op)[1] = (*ip)[1];
38498 (*op)[2] = (*ip)[2];
38499 (*op)[3] = (*ip)[3];
38500 *ip += dec32table[offset];
38501 ZSTD_copy4(*op+4, *ip);
38504 ZSTD_copy8(*op, *ip);
38508 assert(*op - *ip >= 8);
38522 static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
38523 ptrdiff_t const diff = op - ip;
38524 BYTE* const oend = op + length;
38526 assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
38527 (ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
38530 /* Handle short lengths. */
38531 while (op < oend) *op++ = *ip++;
38534 if (ovtype == ZSTD_overlap_src_before_dst) {
38535 /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
38536 assert(length >= 8);
38537 ZSTD_overlapCopy8(&op, &ip, diff);
38539 assert(op - ip >= 8);
38540 assert(op <= oend);
38543 if (oend <= oend_w) {
38544 /* No risk of overwrite. */
38545 ZSTD_wildcopy(op, ip, length, ovtype);
38548 if (op <= oend_w) {
38549 /* Wildcopy until we get close to the end. */
38550 assert(oend > oend_w);
38551 ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
38555 /* Handle the leftovers. */
38556 while (op < oend) *op++ = *ip++;
38559 /* ZSTD_safecopyDstBeforeSrc():
38560 * This version allows overlap with dst before src, or handles the non-overlap case with dst after src
38561 * Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */
38562 static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length) {
38563 ptrdiff_t const diff = op - ip;
38564 BYTE* const oend = op + length;
38566 if (length < 8 || diff > -8) {
38567 /* Handle short lengths, close overlaps, and dst not before src. */
38568 while (op < oend) *op++ = *ip++;
38572 if (op <= oend - WILDCOPY_OVERLENGTH && diff < -WILDCOPY_VECLEN) {
38573 ZSTD_wildcopy(op, ip, oend - WILDCOPY_OVERLENGTH - op, ZSTD_no_overlap);
38574 ip += oend - WILDCOPY_OVERLENGTH - op;
38575 op += oend - WILDCOPY_OVERLENGTH - op;
38578 /* Handle the leftovers. */
38579 while (op < oend) *op++ = *ip++;
38582 /* ZSTD_execSequenceEnd():
38583 * This version handles cases that are near the end of the output buffer. It requires
38584 * more careful checks to make sure there is no overflow. By separating out these hard
38585 * and unlikely cases, we can speed up the common cases.
38587 * NOTE: This function needs to be fast for a single long sequence, but doesn't need
38588 * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
38591 size_t ZSTD_execSequenceEnd(BYTE* op,
38592 BYTE* const oend, seq_t sequence,
38593 const BYTE** litPtr, const BYTE* const litLimit,
38594 const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
38596 BYTE* const oLitEnd = op + sequence.litLength;
38597 size_t const sequenceLength = sequence.litLength + sequence.matchLength;
38598 const BYTE* const iLitEnd = *litPtr + sequence.litLength;
38599 const BYTE* match = oLitEnd - sequence.offset;
38600 BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
38602 /* bounds checks : careful of address space overflow in 32-bit mode */
38603 RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last
match must fit within dstBuffer
");
38604 RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer
");
38605 assert(op < op + sequenceLength);
38606 assert(oLitEnd < op + sequenceLength);
38608 /* copy literals */
38609 ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
38614 if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
38615 /* offset beyond prefix */
38616 RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
38617 match = dictEnd - (prefixStart - match);
38618 if (match + sequence.matchLength <= dictEnd) {
38619 ZSTD_memmove(oLitEnd, match, sequence.matchLength);
38620 return sequenceLength;
38622 /* span extDict & currentPrefixSegment */
38623 { size_t const length1 = dictEnd - match;
38624 ZSTD_memmove(oLitEnd, match, length1);
38625 op = oLitEnd + length1;
38626 sequence.matchLength -= length1;
38627 match = prefixStart;
38630 ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
38631 return sequenceLength;
38634 /* ZSTD_execSequenceEndSplitLitBuffer():
38635 * This version is intended to be used during instances where the litBuffer is still split. It is kept separate to avoid performance impact for the good case.
38638 size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
38639 BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
38640 const BYTE** litPtr, const BYTE* const litLimit,
38641 const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
38643 BYTE* const oLitEnd = op + sequence.litLength;
38644 size_t const sequenceLength = sequence.litLength + sequence.matchLength;
38645 const BYTE* const iLitEnd = *litPtr + sequence.litLength;
38646 const BYTE* match = oLitEnd - sequence.offset;
38649 /* bounds checks : careful of address space overflow in 32-bit mode */
38650 RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last
match must fit within dstBuffer
");
38651 RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer
");
38652 assert(op < op + sequenceLength);
38653 assert(oLitEnd < op + sequenceLength);
38655 /* copy literals */
38656 RETURN_ERROR_IF(op > *litPtr && op < *litPtr + sequence.litLength, dstSize_tooSmall, "output should not
catch up to and overwrite literal buffer
");
38657 ZSTD_safecopyDstBeforeSrc(op, *litPtr, sequence.litLength);
38662 if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
38663 /* offset beyond prefix */
38664 RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
38665 match = dictEnd - (prefixStart - match);
38666 if (match + sequence.matchLength <= dictEnd) {
38667 ZSTD_memmove(oLitEnd, match, sequence.matchLength);
38668 return sequenceLength;
38670 /* span extDict & currentPrefixSegment */
38671 { size_t const length1 = dictEnd - match;
38672 ZSTD_memmove(oLitEnd, match, length1);
38673 op = oLitEnd + length1;
38674 sequence.matchLength -= length1;
38675 match = prefixStart;
38678 ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
38679 return sequenceLength;
38683 size_t ZSTD_execSequence(BYTE* op,
38684 BYTE* const oend, seq_t sequence,
38685 const BYTE** litPtr, const BYTE* const litLimit,
38686 const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
38688 BYTE* const oLitEnd = op + sequence.litLength;
38689 size_t const sequenceLength = sequence.litLength + sequence.matchLength;
38690 BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
38691 BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */
38692 const BYTE* const iLitEnd = *litPtr + sequence.litLength;
38693 const BYTE* match = oLitEnd - sequence.offset;
38695 assert(op != NULL /* Precondition */);
38696 assert(oend_w < oend /* No underflow */);
38698 #if defined(__aarch64__)
38699 /* prefetch sequence starting from match that will be used for copy later */
38700 PREFETCH_L1(match);
38702 /* Handle edge cases in a slow path:
38703 * - Read beyond end of literals
38704 * - Match end is within WILDCOPY_OVERLIMIT of oend
38705 * - 32-bit mode and the match length overflows
38708 iLitEnd > litLimit ||
38709 oMatchEnd > oend_w ||
38710 (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
38711 return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
38713 /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
38714 assert(op <= oLitEnd /* No overflow */);
38715 assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
38716 assert(oMatchEnd <= oend /* No underflow */);
38717 assert(iLitEnd <= litLimit /* Literal length is in bounds */);
38718 assert(oLitEnd <= oend_w /* Can wildcopy literals */);
38719 assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
38722 * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
38723 * We likely don't need the full 32-byte wildcopy.
38725 assert(WILDCOPY_OVERLENGTH >= 16);
38726 ZSTD_copy16(op, (*litPtr));
38727 if (UNLIKELY(sequence.litLength > 16)) {
38728 ZSTD_wildcopy(op + 16, (*litPtr) + 16, sequence.litLength - 16, ZSTD_no_overlap);
38731 *litPtr = iLitEnd; /* update for next sequence */
38734 if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
38735 /* offset beyond prefix -> go into extDict */
38736 RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
38737 match = dictEnd + (match - prefixStart);
38738 if (match + sequence.matchLength <= dictEnd) {
38739 ZSTD_memmove(oLitEnd, match, sequence.matchLength);
38740 return sequenceLength;
38742 /* span extDict & currentPrefixSegment */
38743 { size_t const length1 = dictEnd - match;
38744 ZSTD_memmove(oLitEnd, match, length1);
38745 op = oLitEnd + length1;
38746 sequence.matchLength -= length1;
38747 match = prefixStart;
38750 /* Match within prefix of 1 or more bytes */
38751 assert(op <= oMatchEnd);
38752 assert(oMatchEnd <= oend_w);
38753 assert(match >= prefixStart);
38754 assert(sequence.matchLength >= 1);
38756 /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
38757 * without overlap checking.
38759 if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
38760 /* We bet on a full wildcopy for matches, since we expect matches to be
38761 * longer than literals (in general). In silesia, ~10% of matches are longer
38764 ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
38765 return sequenceLength;
38767 assert(sequence.offset < WILDCOPY_VECLEN);
38769 /* Copy 8 bytes and spread the offset to be >= 8. */
38770 ZSTD_overlapCopy8(&op, &match, sequence.offset);
38772 /* If the match length is > 8 bytes, then continue with the wildcopy. */
38773 if (sequence.matchLength > 8) {
38774 assert(op < oMatchEnd);
38775 ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8, ZSTD_overlap_src_before_dst);
38777 return sequenceLength;
38781 size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op,
38782 BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
38783 const BYTE** litPtr, const BYTE* const litLimit,
38784 const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
38786 BYTE* const oLitEnd = op + sequence.litLength;
38787 size_t const sequenceLength = sequence.litLength + sequence.matchLength;
38788 BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
38789 const BYTE* const iLitEnd = *litPtr + sequence.litLength;
38790 const BYTE* match = oLitEnd - sequence.offset;
38792 assert(op != NULL /* Precondition */);
38793 assert(oend_w < oend /* No underflow */);
38794 /* Handle edge cases in a slow path:
38795 * - Read beyond end of literals
38796 * - Match end is within WILDCOPY_OVERLIMIT of oend
38797 * - 32-bit mode and the match length overflows
38800 iLitEnd > litLimit ||
38801 oMatchEnd > oend_w ||
38802 (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
38803 return ZSTD_execSequenceEndSplitLitBuffer(op, oend, oend_w, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
38805 /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
38806 assert(op <= oLitEnd /* No overflow */);
38807 assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
38808 assert(oMatchEnd <= oend /* No underflow */);
38809 assert(iLitEnd <= litLimit /* Literal length is in bounds */);
38810 assert(oLitEnd <= oend_w /* Can wildcopy literals */);
38811 assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
38814 * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
38815 * We likely don't need the full 32-byte wildcopy.
38817 assert(WILDCOPY_OVERLENGTH >= 16);
38818 ZSTD_copy16(op, (*litPtr));
38819 if (UNLIKELY(sequence.litLength > 16)) {
38820 ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
38823 *litPtr = iLitEnd; /* update for next sequence */
38826 if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
38827 /* offset beyond prefix -> go into extDict */
38828 RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
38829 match = dictEnd + (match - prefixStart);
38830 if (match + sequence.matchLength <= dictEnd) {
38831 ZSTD_memmove(oLitEnd, match, sequence.matchLength);
38832 return sequenceLength;
38834 /* span extDict & currentPrefixSegment */
38835 { size_t const length1 = dictEnd - match;
38836 ZSTD_memmove(oLitEnd, match, length1);
38837 op = oLitEnd + length1;
38838 sequence.matchLength -= length1;
38839 match = prefixStart;
38841 /* Match within prefix of 1 or more bytes */
38842 assert(op <= oMatchEnd);
38843 assert(oMatchEnd <= oend_w);
38844 assert(match >= prefixStart);
38845 assert(sequence.matchLength >= 1);
38847 /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
38848 * without overlap checking.
38850 if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
38851 /* We bet on a full wildcopy for matches, since we expect matches to be
38852 * longer than literals (in general). In silesia, ~10% of matches are longer
38855 ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
38856 return sequenceLength;
38858 assert(sequence.offset < WILDCOPY_VECLEN);
38860 /* Copy 8 bytes and spread the offset to be >= 8. */
38861 ZSTD_overlapCopy8(&op, &match, sequence.offset);
38863 /* If the match length is > 8 bytes, then continue with the wildcopy. */
38864 if (sequence.matchLength > 8) {
38865 assert(op < oMatchEnd);
38866 ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
38868 return sequenceLength;
38873 ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
38875 const void* ptr = dt;
38876 const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
38877 DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
38879 (U32)DStatePtr->state, DTableH->tableLog);
38880 BIT_reloadDStream(bitD);
38881 DStatePtr->table = dt + 1;
38884 FORCE_INLINE_TEMPLATE void
38885 ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 nextState, U32 nbBits)
38887 size_t const lowBits = BIT_readBits(bitD, nbBits);
38888 DStatePtr->state = nextState + lowBits;
38891 /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
38892 * offset bits. But we can only read at most STREAM_ACCUMULATOR_MIN_32
38893 * bits before reloading. This value is the maximum number of bytes we read
38894 * after reloading when we are decoding long offsets.
38896 #define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
38897 (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
38898 ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \
38901 typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
38903 FORCE_INLINE_TEMPLATE seq_t
38904 ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
38908 * ZSTD_seqSymbol is a structure with a total of 64 bits wide. So it can be
38909 * loaded in one operation and extracted its fields by simply shifting or
38910 * bit-extracting on aarch64.
38911 * GCC doesn't recognize this and generates more unnecessary ldr/ldrb/ldrh
38912 * operations that cause performance drop. This can be avoided by using this
38913 * ZSTD_memcpy hack.
38915 #if defined(__aarch64__) && (defined(__GNUC__) && !defined(__clang__))
38916 ZSTD_seqSymbol llDInfoS, mlDInfoS, ofDInfoS;
38917 ZSTD_seqSymbol* const llDInfo = &llDInfoS;
38918 ZSTD_seqSymbol* const mlDInfo = &mlDInfoS;
38919 ZSTD_seqSymbol* const ofDInfo = &ofDInfoS;
38920 ZSTD_memcpy(llDInfo, seqState->stateLL.table + seqState->stateLL.state, sizeof(ZSTD_seqSymbol));
38921 ZSTD_memcpy(mlDInfo, seqState->stateML.table + seqState->stateML.state, sizeof(ZSTD_seqSymbol));
38922 ZSTD_memcpy(ofDInfo, seqState->stateOffb.table + seqState->stateOffb.state, sizeof(ZSTD_seqSymbol));
38924 const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
38925 const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
38926 const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
38928 seq.matchLength = mlDInfo->baseValue;
38929 seq.litLength = llDInfo->baseValue;
38930 { U32 const ofBase = ofDInfo->baseValue;
38931 BYTE const llBits = llDInfo->nbAdditionalBits;
38932 BYTE const mlBits = mlDInfo->nbAdditionalBits;
38933 BYTE const ofBits = ofDInfo->nbAdditionalBits;
38934 BYTE const totalBits = llBits+mlBits+ofBits;
38936 U16 const llNext = llDInfo->nextState;
38937 U16 const mlNext = mlDInfo->nextState;
38938 U16 const ofNext = ofDInfo->nextState;
38939 U32 const llnbBits = llDInfo->nbBits;
38940 U32 const mlnbBits = mlDInfo->nbBits;
38941 U32 const ofnbBits = ofDInfo->nbBits;
38943 assert(llBits <= MaxLLBits);
38944 assert(mlBits <= MaxMLBits);
38945 assert(ofBits <= MaxOff);
38947 * As gcc has better branch and block analyzers, sometimes it is only
38948 * valuable to mark likeliness for clang, it gives around 3-4% of
38955 ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
38956 ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
38957 ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 > LONG_OFFSETS_MAX_EXTRA_BITS_32);
38958 ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 - LONG_OFFSETS_MAX_EXTRA_BITS_32 >= MaxMLBits);
38959 if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
38960 /* Always read extra bits, this keeps the logic simple,
38961 * avoids branches, and avoids accidentally reading 0 bits.
38963 U32 const extraBits = LONG_OFFSETS_MAX_EXTRA_BITS_32;
38964 offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
38965 BIT_reloadDStream(&seqState->DStream);
38966 offset += BIT_readBitsFast(&seqState->DStream, extraBits);
38968 offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
38969 if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
38971 seqState->prevOffset[2] = seqState->prevOffset[1];
38972 seqState->prevOffset[1] = seqState->prevOffset[0];
38973 seqState->prevOffset[0] = offset;
38975 U32 const ll0 = (llDInfo->baseValue == 0);
38976 if (LIKELY((ofBits == 0))) {
38977 offset = seqState->prevOffset[ll0];
38978 seqState->prevOffset[1] = seqState->prevOffset[!ll0];
38979 seqState->prevOffset[0] = offset;
38981 offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
38982 { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
38983 temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
38984 if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
38985 seqState->prevOffset[1] = seqState->prevOffset[0];
38986 seqState->prevOffset[0] = offset = temp;
38988 seq.offset = offset;
38992 seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
38994 if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
38995 BIT_reloadDStream(&seqState->DStream);
38996 if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
38997 BIT_reloadDStream(&seqState->DStream);
38998 /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
38999 ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
39002 seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
39005 BIT_reloadDStream(&seqState->DStream);
39007 DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u
",
39008 (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
39010 ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */
39011 ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */
39012 if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
39013 ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */
39019 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
39020 MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
39022 size_t const windowSize = dctx->fParams.windowSize;
39023 /* No dictionary used. */
39024 if (dctx->dictContentEndForFuzzing == NULL) return 0;
39025 /* Dictionary is our prefix. */
39026 if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;
39027 /* Dictionary is not our ext-dict. */
39028 if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;
39029 /* Dictionary is not within our window size. */
39030 if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;
39031 /* Dictionary is active. */
39035 MEM_STATIC void ZSTD_assertValidSequence(
39036 ZSTD_DCtx const* dctx,
39037 BYTE const* op, BYTE const* oend,
39039 BYTE const* prefixStart, BYTE const* virtualStart)
39041 #if DEBUGLEVEL >= 1
39042 size_t const windowSize = dctx->fParams.windowSize;
39043 size_t const sequenceSize = seq.litLength + seq.matchLength;
39044 BYTE const* const oLitEnd = op + seq.litLength;
39045 DEBUGLOG(6, "Checking
sequence: litL=%u matchL=%u offset=%u
",
39046 (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
39047 assert(op <= oend);
39048 assert((size_t)(oend - op) >= sequenceSize);
39049 assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
39050 if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
39051 size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
39052 /* Offset must be within the dictionary. */
39053 assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
39054 assert(seq.offset <= windowSize + dictSize);
39056 /* Offset must be within our window. */
39057 assert(seq.offset <= windowSize);
39060 (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
39065 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
39068 FORCE_INLINE_TEMPLATE size_t
39070 ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
39071 void* dst, size_t maxDstSize,
39072 const void* seqStart, size_t seqSize, int nbSeq,
39073 const ZSTD_longOffset_e isLongOffset,
39076 const BYTE* ip = (const BYTE*)seqStart;
39077 const BYTE* const iend = ip + seqSize;
39078 BYTE* const ostart = (BYTE*)dst;
39079 BYTE* const oend = ostart + maxDstSize;
39081 const BYTE* litPtr = dctx->litPtr;
39082 const BYTE* litBufferEnd = dctx->litBufferEnd;
39083 const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
39084 const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
39085 const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
39089 /* Regen sequences */
39091 seqState_t seqState;
39092 dctx->fseEntropy = 1;
39093 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
39095 ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
39096 corruption_detected, "");
39097 ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
39098 ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
39099 ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
39100 assert(dst != NULL);
39102 ZSTD_STATIC_ASSERT(
39103 BIT_DStream_unfinished < BIT_DStream_completed &&
39104 BIT_DStream_endOfBuffer < BIT_DStream_completed &&
39105 BIT_DStream_completed < BIT_DStream_overflow);
39107 /* decompress without overrunning litPtr begins */
39109 seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
39110 /* Align the decompression loop to 32 + 16 bytes.
39112 * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
39113 * speed swings based on the alignment of the decompression loop. This
39114 * performance swing is caused by parts of the decompression loop falling
39115 * out of the DSB. The entire decompression loop should fit in the DSB,
39116 * when it can't we get much worse performance. You can measure if you've
39117 * hit the good case or the bad case with this perf command for some
39118 * compressed file test.zst:
39120 * perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
39121 * -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
39123 * If you see most cycles served out of the MITE you've hit the bad case.
39124 * If you see most cycles served out of the DSB you've hit the good case.
39125 * If it is pretty even then you may be in an okay case.
39127 * This issue has been reproduced on the following CPUs:
39128 * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
39129 * Use Instruments->Counters to get DSB/MITE cycles.
39130 * I never got performance swings, but I was able to
39131 * go from the good case of mostly DSB to half of the
39132 * cycles served from MITE.
39133 * - Coffeelake: Intel i9-9900k
39134 * - Coffeelake: Intel i7-9700k
39136 * I haven't been able to reproduce the instability or DSB misses on any
39137 * of the following CPUS:
39139 * - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
39142 * Alignment is done for each of the three major decompression loops:
39143 * - ZSTD_decompressSequences_bodySplitLitBuffer - presplit section of the literal buffer
39144 * - ZSTD_decompressSequences_bodySplitLitBuffer - postsplit section of the literal buffer
39145 * - ZSTD_decompressSequences_body
39146 * Alignment choices are made to minimize large swings on bad cases and influence on performance
39147 * from changes external to this code, rather than to overoptimize on the current commit.
39149 * If you are seeing performance stability this script can help test.
39150 * It tests on 4 commits in zstd where I saw performance change.
39152 * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
39154 #if defined(__GNUC__) && defined(__x86_64__)
39155 __asm__(".p2align 6
");
39157 /* good for gcc-7, gcc-9, and gcc-11 */
39159 __asm__(".p2align 5
");
39161 __asm__(".p2align 4
");
39162 # if __GNUC__ == 8 || __GNUC__ == 10
39163 /* good for gcc-8 and gcc-10 */
39165 __asm__(".p2align 3
");
39170 /* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */
39171 for (; litPtr + sequence.litLength <= dctx->litBufferEnd; ) {
39172 size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
39173 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
39174 assert(!ZSTD_isError(oneSeqSize));
39175 if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
39177 if (UNLIKELY(ZSTD_isError(oneSeqSize)))
39179 DEBUGLOG(6, "regenerated
sequence size : %u
", (U32)oneSeqSize);
39181 if (UNLIKELY(!--nbSeq))
39183 BIT_reloadDStream(&(seqState.DStream));
39184 sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
39187 /* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */
39189 const size_t leftoverLit = dctx->litBufferEnd - litPtr;
39192 RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer
");
39193 ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
39194 sequence.litLength -= leftoverLit;
39197 litPtr = dctx->litExtraBuffer;
39198 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
39199 dctx->litBufferLocation = ZSTD_not_in_dst;
39201 size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
39202 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
39203 assert(!ZSTD_isError(oneSeqSize));
39204 if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
39206 if (UNLIKELY(ZSTD_isError(oneSeqSize)))
39208 DEBUGLOG(6, "regenerated
sequence size : %u
", (U32)oneSeqSize);
39211 BIT_reloadDStream(&(seqState.DStream));
39216 if (nbSeq > 0) /* there is remaining lit from extra buffer */
39219 #if defined(__GNUC__) && defined(__x86_64__)
39220 __asm__(".p2align 6
");
39223 /* worse for gcc-7 better for gcc-8, gcc-9, and gcc-10 and clang */
39224 __asm__(".p2align 4
");
39226 __asm__(".p2align 3
");
39227 # elif __GNUC__ >= 11
39228 __asm__(".p2align 3
");
39230 __asm__(".p2align 5
");
39232 __asm__(".p2align 3
");
39237 seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
39238 size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
39239 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
39240 assert(!ZSTD_isError(oneSeqSize));
39241 if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
39243 if (UNLIKELY(ZSTD_isError(oneSeqSize)))
39245 DEBUGLOG(6, "regenerated
sequence size : %u
", (U32)oneSeqSize);
39247 if (UNLIKELY(!--nbSeq))
39249 BIT_reloadDStream(&(seqState.DStream));
39253 /* check if reached exact end */
39255 RETURN_ERROR_IF(nbSeq, corruption_detected, "");
39256 RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
39257 /* save reps for next block */
39258 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
39261 /* last literal segment */
39262 if (dctx->litBufferLocation == ZSTD_split) /* split hasn't been reached yet, first get dst then copy litExtraBuffer */
39264 size_t const lastLLSize = litBufferEnd - litPtr;
39265 RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
39267 ZSTD_memmove(op, litPtr, lastLLSize);
39270 litPtr = dctx->litExtraBuffer;
39271 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
39272 dctx->litBufferLocation = ZSTD_not_in_dst;
39274 { size_t const lastLLSize = litBufferEnd - litPtr;
39275 RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
39277 ZSTD_memcpy(op, litPtr, lastLLSize);
39285 FORCE_INLINE_TEMPLATE size_t
39287 ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
39288 void* dst, size_t maxDstSize,
39289 const void* seqStart, size_t seqSize, int nbSeq,
39290 const ZSTD_longOffset_e isLongOffset,
39293 const BYTE* ip = (const BYTE*)seqStart;
39294 const BYTE* const iend = ip + seqSize;
39295 BYTE* const ostart = (BYTE*)dst;
39296 BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ostart + maxDstSize : dctx->litBuffer;
39298 const BYTE* litPtr = dctx->litPtr;
39299 const BYTE* const litEnd = litPtr + dctx->litSize;
39300 const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);
39301 const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
39302 const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
39306 /* Regen sequences */
39308 seqState_t seqState;
39309 dctx->fseEntropy = 1;
39310 { U32 i; for (i = 0; i < ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
39312 ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend - ip)),
39313 corruption_detected, "");
39314 ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
39315 ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
39316 ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
39317 assert(dst != NULL);
39319 ZSTD_STATIC_ASSERT(
39320 BIT_DStream_unfinished < BIT_DStream_completed &&
39321 BIT_DStream_endOfBuffer < BIT_DStream_completed &&
39322 BIT_DStream_completed < BIT_DStream_overflow);
39324 #if defined(__GNUC__) && defined(__x86_64__)
39325 __asm__(".p2align 6
");
39328 __asm__(".p2align 5
");
39330 __asm__(".p2align 3
");
39332 __asm__(".p2align 4
");
39334 __asm__(".p2align 3
");
39339 seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
39340 size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
39341 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
39342 assert(!ZSTD_isError(oneSeqSize));
39343 if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
39345 if (UNLIKELY(ZSTD_isError(oneSeqSize)))
39347 DEBUGLOG(6, "regenerated
sequence size : %u
", (U32)oneSeqSize);
39349 if (UNLIKELY(!--nbSeq))
39351 BIT_reloadDStream(&(seqState.DStream));
39354 /* check if reached exact end */
39356 RETURN_ERROR_IF(nbSeq, corruption_detected, "");
39357 RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
39358 /* save reps for next block */
39359 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
39362 /* last literal segment */
39363 { size_t const lastLLSize = litEnd - litPtr;
39364 RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
39366 ZSTD_memcpy(op, litPtr, lastLLSize);
39375 ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
39376 void* dst, size_t maxDstSize,
39377 const void* seqStart, size_t seqSize, int nbSeq,
39378 const ZSTD_longOffset_e isLongOffset,
39381 return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
39385 ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx,
39386 void* dst, size_t maxDstSize,
39387 const void* seqStart, size_t seqSize, int nbSeq,
39388 const ZSTD_longOffset_e isLongOffset,
39391 return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
39393 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
39395 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
39397 FORCE_INLINE_TEMPLATE size_t
39398 ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
39399 const BYTE* const prefixStart, const BYTE* const dictEnd)
39401 prefetchPos += sequence.litLength;
39402 { const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
39403 const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
39404 * No consequence though : memory address is only used for prefetching, not for dereferencing */
39405 PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
39407 return prefetchPos + sequence.matchLength;
39410 /* This decoding function employs prefetching
39411 * to reduce latency impact of cache misses.
39412 * It's generally employed when block contains a significant portion of long-distance matches
39413 * or when coupled with a "cold
" dictionary */
39414 FORCE_INLINE_TEMPLATE size_t
39415 ZSTD_decompressSequencesLong_body(
39417 void* dst, size_t maxDstSize,
39418 const void* seqStart, size_t seqSize, int nbSeq,
39419 const ZSTD_longOffset_e isLongOffset,
39422 const BYTE* ip = (const BYTE*)seqStart;
39423 const BYTE* const iend = ip + seqSize;
39424 BYTE* const ostart = (BYTE*)dst;
39425 BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ostart + maxDstSize;
39427 const BYTE* litPtr = dctx->litPtr;
39428 const BYTE* litBufferEnd = dctx->litBufferEnd;
39429 const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
39430 const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
39431 const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
39434 /* Regen sequences */
39436 #define STORED_SEQS 8
39437 #define STORED_SEQS_MASK (STORED_SEQS-1)
39438 #define ADVANCED_SEQS STORED_SEQS
39439 seq_t sequences[STORED_SEQS];
39440 int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
39441 seqState_t seqState;
39443 size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */
39445 dctx->fseEntropy = 1;
39446 { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
39447 assert(dst != NULL);
39448 assert(iend >= ip);
39450 ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
39451 corruption_detected, "");
39452 ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
39453 ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
39454 ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
39456 /* prepare in advance */
39457 for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
39458 seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
39459 prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
39460 sequences[seqNb] = sequence;
39462 RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
39464 /* decompress without stomping litBuffer */
39465 for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb < nbSeq); seqNb++) {
39466 seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
39469 if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd)
39471 /* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */
39472 const size_t leftoverLit = dctx->litBufferEnd - litPtr;
39475 RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer
");
39476 ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
39477 sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength -= leftoverLit;
39480 litPtr = dctx->litExtraBuffer;
39481 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
39482 dctx->litBufferLocation = ZSTD_not_in_dst;
39483 oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
39484 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
39485 assert(!ZSTD_isError(oneSeqSize));
39486 if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
39488 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
39490 prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
39491 sequences[seqNb & STORED_SEQS_MASK] = sequence;
39496 /* lit buffer is either wholly contained in first or second split, or not split at all*/
39497 oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
39498 ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
39499 ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
39500 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
39501 assert(!ZSTD_isError(oneSeqSize));
39502 if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
39504 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
39506 prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
39507 sequences[seqNb & STORED_SEQS_MASK] = sequence;
39511 RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
39514 seqNb -= seqAdvance;
39515 for ( ; seqNb<nbSeq ; seqNb++) {
39516 seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]);
39517 if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd)
39519 const size_t leftoverLit = dctx->litBufferEnd - litPtr;
39522 RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer
");
39523 ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
39524 sequence->litLength -= leftoverLit;
39527 litPtr = dctx->litExtraBuffer;
39528 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
39529 dctx->litBufferLocation = ZSTD_not_in_dst;
39531 size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
39532 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
39533 assert(!ZSTD_isError(oneSeqSize));
39534 if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
39536 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
39542 size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
39543 ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence->litLength - WILDCOPY_OVERLENGTH, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
39544 ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
39545 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
39546 assert(!ZSTD_isError(oneSeqSize));
39547 if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
39549 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
39554 /* save reps for next block */
39555 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
39558 /* last literal segment */
39559 if (dctx->litBufferLocation == ZSTD_split) /* first deplete literal buffer in dst, then copy litExtraBuffer */
39561 size_t const lastLLSize = litBufferEnd - litPtr;
39562 RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
39564 ZSTD_memmove(op, litPtr, lastLLSize);
39567 litPtr = dctx->litExtraBuffer;
39568 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
39570 { size_t const lastLLSize = litBufferEnd - litPtr;
39571 RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
39573 ZSTD_memmove(op, litPtr, lastLLSize);
39582 ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
39583 void* dst, size_t maxDstSize,
39584 const void* seqStart, size_t seqSize, int nbSeq,
39585 const ZSTD_longOffset_e isLongOffset,
39588 return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
39590 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
39596 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
39597 static BMI2_TARGET_ATTRIBUTE size_t
39599 ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
39600 void* dst, size_t maxDstSize,
39601 const void* seqStart, size_t seqSize, int nbSeq,
39602 const ZSTD_longOffset_e isLongOffset,
39605 return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
39607 static BMI2_TARGET_ATTRIBUTE size_t
39609 ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
39610 void* dst, size_t maxDstSize,
39611 const void* seqStart, size_t seqSize, int nbSeq,
39612 const ZSTD_longOffset_e isLongOffset,
39615 return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
39617 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
39619 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
39620 static BMI2_TARGET_ATTRIBUTE size_t
39621 ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
39622 void* dst, size_t maxDstSize,
39623 const void* seqStart, size_t seqSize, int nbSeq,
39624 const ZSTD_longOffset_e isLongOffset,
39627 return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
39629 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
39631 #endif /* DYNAMIC_BMI2 */
39633 typedef size_t (*ZSTD_decompressSequences_t)(
39635 void* dst, size_t maxDstSize,
39636 const void* seqStart, size_t seqSize, int nbSeq,
39637 const ZSTD_longOffset_e isLongOffset,
39640 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
39642 ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
39643 const void* seqStart, size_t seqSize, int nbSeq,
39644 const ZSTD_longOffset_e isLongOffset,
39649 if (ZSTD_DCtx_get_bmi2(dctx)) {
39650 return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
39653 return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
39656 ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
39657 const void* seqStart, size_t seqSize, int nbSeq,
39658 const ZSTD_longOffset_e isLongOffset,
39663 if (ZSTD_DCtx_get_bmi2(dctx)) {
39664 return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
39667 return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
39669 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
39672 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
39673 /* ZSTD_decompressSequencesLong() :
39674 * decompression function triggered when a minimum share of offsets is considered "long",
39675 * aka out of cache.
39676 * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream
register", and sometimes meaning "farther than memory cache distance
".
39677 * This function will try to mitigate main memory latency through the use of prefetching */
39679 ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
39680 void* dst, size_t maxDstSize,
39681 const void* seqStart, size_t seqSize, int nbSeq,
39682 const ZSTD_longOffset_e isLongOffset,
39687 if (ZSTD_DCtx_get_bmi2(dctx)) {
39688 return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
39691 return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
39693 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
39701 static size_t ZSTD_totalHistorySize(BYTE* op, BYTE const* virtualStart)
39703 return (size_t)(op - virtualStart);
39707 unsigned longOffsetShare;
39708 unsigned maxNbAdditionalBits;
39711 /* ZSTD_getOffsetInfo() :
39712 * condition : offTable must be valid
39713 * @return : "share
" of long offsets (arbitrarily defined as > (1<<23))
39714 * compared to maximum possible of (1<<OffFSELog),
39715 * as well as the maximum number additional bits required.
39717 static ZSTD_OffsetInfo
39718 ZSTD_getOffsetInfo(const ZSTD_seqSymbol* offTable, int nbSeq)
39720 ZSTD_OffsetInfo info = {0, 0};
39721 /* If nbSeq == 0, then the offTable is uninitialized, but we have
39722 * no sequences, so both values should be 0.
39725 const void* ptr = offTable;
39726 U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
39727 const ZSTD_seqSymbol* table = offTable + 1;
39728 U32 const max = 1 << tableLog;
39730 DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)
", tableLog);
39732 assert(max <= (1 << OffFSELog)); /* max not too large */
39733 for (u=0; u<max; u++) {
39734 info.maxNbAdditionalBits = MAX(info.maxNbAdditionalBits, table[u].nbAdditionalBits);
39735 if (table[u].nbAdditionalBits > 22) info.longOffsetShare += 1;
39738 assert(tableLog <= OffFSELog);
39739 info.longOffsetShare <<= (OffFSELog - tableLog); /* scale to OffFSELog */
39750 static size_t ZSTD_maxShortOffset(void)
39752 if (MEM_64bits()) {
39753 /* We can decode any offset without reloading bits.
39754 * This might change if the max window size grows.
39756 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
39759 /* The maximum offBase is (1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1.
39760 * This offBase would require STREAM_ACCUMULATOR_MIN extra bits.
39761 * Then we have to subtract ZSTD_REP_NUM to get the maximum possible offset.
39763 size_t const maxOffbase = ((size_t)1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1;
39764 size_t const maxOffset = maxOffbase - ZSTD_REP_NUM;
39765 assert(ZSTD_highbit32((U32)maxOffbase) == STREAM_ACCUMULATOR_MIN);
39771 ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
39772 void* dst, size_t dstCapacity,
39773 const void* src, size_t srcSize, const int frame, const streaming_operation streaming)
39774 { /* blockType == blockCompressed */
39775 const BYTE* ip = (const BYTE*)src;
39778 /* Note : the wording of the specification
39779 * allows compressed block to be sized exactly ZSTD_BLOCKSIZE_MAX.
39780 * This generally does not happen, as it makes little sense,
39781 * since an uncompressed block would feature same size and have no decompression cost.
39782 * Also, note that decoder from reference libzstd before < v1.5.4
39783 * would consider this edge case as an error.
39784 * As a consequence, avoid generating compressed blocks of size ZSTD_BLOCKSIZE_MAX
39785 * for broader compatibility with the deployed ecosystem of zstd decoders */
39786 RETURN_ERROR_IF(srcSize > ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
39788 /* Decode literals section */
39789 { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
39791 if (ZSTD_isError(litCSize)) return litCSize;
39793 srcSize -= litCSize;
39796 /* Build Decoding Tables */
39798 /* Compute the maximum block size, which must also work when !frame and fParams are unset.
39799 * Additionally, take the min with dstCapacity to ensure that the totalHistorySize fits in a size_t.
39801 size_t const blockSizeMax = MIN(dstCapacity, (frame ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX));
39802 size_t const totalHistorySize = ZSTD_totalHistorySize((BYTE*)dst + blockSizeMax, (BYTE const*)dctx->virtualStart);
39803 /* isLongOffset must be true if there are long offsets.
39804 * Offsets are long if they are larger than ZSTD_maxShortOffset().
39805 * We don't expect that to be the case in 64-bit mode.
39807 * We check here to see if our history is large enough to allow long offsets.
39808 * If it isn't, then we can't possible have (valid) long offsets. If the offset
39809 * is invalid, then it is okay to read it incorrectly.
39811 * If isLongOffsets is true, then we will later check our decoding table to see
39812 * if it is even possible to generate long offsets.
39814 ZSTD_longOffset_e isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (totalHistorySize > ZSTD_maxShortOffset()));
39815 /* These macros control at build-time which decompressor implementation
39816 * we use. If neither is defined, we do some inspection and dispatch at
39819 #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
39820 !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
39821 int usePrefetchDecoder = dctx->ddictIsCold;
39823 /* Set to 1 to avoid computing offset info if we don't need to.
39824 * Otherwise this value is ignored.
39826 int usePrefetchDecoder = 1;
39829 size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
39830 if (ZSTD_isError(seqHSize)) return seqHSize;
39832 srcSize -= seqHSize;
39834 RETURN_ERROR_IF((dst == NULL || dstCapacity == 0) && nbSeq > 0, dstSize_tooSmall, "NULL not handled
");
39835 RETURN_ERROR_IF(MEM_64bits() && sizeof(size_t) == sizeof(void*) && (size_t)(-1) - (size_t)dst < (size_t)(1 << 20), dstSize_tooSmall,
39838 /* If we could potentially have long offsets, or we might want to use the prefetch decoder,
39839 * compute information about the share of long offsets, and the maximum nbAdditionalBits.
39840 * NOTE: could probably use a larger nbSeq limit
39842 if (isLongOffset || (!usePrefetchDecoder && (totalHistorySize > (1u << 24)) && (nbSeq > 8))) {
39843 ZSTD_OffsetInfo const info = ZSTD_getOffsetInfo(dctx->OFTptr, nbSeq);
39844 if (isLongOffset && info.maxNbAdditionalBits <= STREAM_ACCUMULATOR_MIN) {
39845 /* If isLongOffset, but the maximum number of additional bits that we see in our table is small
39846 * enough, then we know it is impossible to have too long an offset in this block, so we can
39847 * use the regular offset decoder.
39849 isLongOffset = ZSTD_lo_isRegularOffset;
39851 if (!usePrefetchDecoder) {
39852 U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
39853 usePrefetchDecoder = (info.longOffsetShare >= minShare);
39857 dctx->ddictIsCold = 0;
39859 #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
39860 !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
39861 if (usePrefetchDecoder) {
39863 (void)usePrefetchDecoder;
39866 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
39867 return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
39871 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
39873 if (dctx->litBufferLocation == ZSTD_split)
39874 return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
39876 return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
39882 void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
39884 if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */
39885 dctx->dictEnd = dctx->previousDstEnd;
39886 dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
39887 dctx->prefixStart = dst;
39888 dctx->previousDstEnd = dst;
39893 size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
39894 void* dst, size_t dstCapacity,
39895 const void* src, size_t srcSize)
39898 ZSTD_checkContinuity(dctx, dst, dstCapacity);
39899 dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0, not_streaming);
39900 dctx->previousDstEnd = (char*)dst + dSize;
39905 /* NOTE: Must just wrap ZSTD_decompressBlock_deprecated() */
39906 size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
39907 void* dst, size_t dstCapacity,
39908 const void* src, size_t srcSize)
39910 return ZSTD_decompressBlock_deprecated(dctx, dst, dstCapacity, src, srcSize);
39912 /**** ended inlining decompress/zstd_decompress_block.c ****/
39914 /**** start inlining dictBuilder/cover.c ****/
39916 * Copyright (c) Meta Platforms, Inc. and affiliates.
39917 * All rights reserved.
39919 * This source code is licensed under both the BSD-style license (found in the
39920 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
39921 * in the COPYING file in the root directory of this source tree).
39922 * You may select, at your option, one of the above-listed licenses.
39925 /* *****************************************************************************
39926 * Constructs a dictionary using a heuristic based on the following paper:
39928 * Liao, Petri, Moffat, Wirth
39929 * Effective Construction of Relative Lempel-Ziv Dictionaries
39930 * Published in WWW 2016.
39932 * Adapted from code originally written by @ot (Giuseppe Ottaviano).
39933 ******************************************************************************/
39935 /*-*************************************
39937 ***************************************/
39938 #include <stdio.h> /* fprintf */
39939 #include <stdlib.h> /* malloc, free, qsort */
39940 #include <string.h> /* memset */
39941 #include <time.h> /* clock */
39943 #ifndef ZDICT_STATIC_LINKING_ONLY
39944 # define ZDICT_STATIC_LINKING_ONLY
39947 /**** skipping file: ../common/mem.h ****/
39948 /**** skipping file: ../common/pool.h ****/
39949 /**** skipping file: ../common/threading.h ****/
39950 /**** skipping file: ../common/zstd_internal.h ****/
39951 /**** skipping file: ../common/bits.h ****/
39952 /**** start inlining ../zdict.h ****/
39954 * Copyright (c) Meta Platforms, Inc. and affiliates.
39955 * All rights reserved.
39957 * This source code is licensed under both the BSD-style license (found in the
39958 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
39959 * in the COPYING file in the root directory of this source tree).
39960 * You may select, at your option, one of the above-listed licenses.
39963 #if defined (__cplusplus)
39967 #ifndef ZSTD_ZDICT_H
39968 #define ZSTD_ZDICT_H
39970 /*====== Dependencies ======*/
39971 #include <stddef.h> /* size_t */
39974 /* ===== ZDICTLIB_API : control library symbols visibility ===== */
39975 #ifndef ZDICTLIB_VISIBLE
39976 /* Backwards compatibility with old macro name */
39977 # ifdef ZDICTLIB_VISIBILITY
39978 # define ZDICTLIB_VISIBLE ZDICTLIB_VISIBILITY
39979 # elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
39980 # define ZDICTLIB_VISIBLE __attribute__ ((visibility ("default")))
39982 # define ZDICTLIB_VISIBLE
39986 #ifndef ZDICTLIB_HIDDEN
39987 # if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
39988 # define ZDICTLIB_HIDDEN __attribute__ ((visibility ("hidden
")))
39990 # define ZDICTLIB_HIDDEN
39994 #if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
39995 # define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBLE
39996 #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
39997 # define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBLE /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
39999 # define ZDICTLIB_API ZDICTLIB_VISIBLE
40162 ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
40163 const void* samplesBuffer,
40164 const size_t* samplesSizes, unsigned nbSamples);
40167 int compressionLevel; /**< optimize for a specific zstd compression level; 0 means default */
40168 unsigned notificationLevel; /**< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
40169 unsigned dictID; /**< force dictID value; 0 means auto mode (32-bits random value)
40170 * NOTE: The zstd format reserves some dictionary IDs for future use.
40171 * You may use them in private settings, but be warned that they
40172 * may be used by zstd in a public dictionary registry in the future.
40173 * These dictionary IDs are:
40174 * - low range : <= 32767
40175 * - high range : >= (2^31)
40214 ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dstDictBuffer, size_t maxDictSize,
40215 const void* dictContent, size_t dictContentSize,
40216 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
40217 ZDICT_params_t parameters);
40220 /*====== Helper functions ======*/
40221 ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize); /**< extracts dictID; @return zero if error (not a valid dictionary) */
40222 ZDICTLIB_API size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize); /* returns dict header size; returns a ZSTD error code on failure */
40223 ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode);
40224 ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
40226 #endif /* ZSTD_ZDICT_H */
40228 #if defined(ZDICT_STATIC_LINKING_ONLY) && !defined(ZSTD_ZDICT_H_STATIC)
40229 #define ZSTD_ZDICT_H_STATIC
40231 /* This can be overridden externally to hide static symbols. */
40232 #ifndef ZDICTLIB_STATIC_API
40233 # if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
40234 # define ZDICTLIB_STATIC_API __declspec(dllexport) ZDICTLIB_VISIBLE
40235 # elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
40236 # define ZDICTLIB_STATIC_API __declspec(dllimport) ZDICTLIB_VISIBLE
40238 # define ZDICTLIB_STATIC_API ZDICTLIB_VISIBLE
40242 /* ====================================================================================
40243 * The definitions in this section are considered experimental.
40244 * They should never be used with a dynamic library, as they may change in the future.
40245 * They are provided for advanced usages.
40246 * Use them only in association with static linking.
40247 * ==================================================================================== */
40249 #define ZDICT_DICTSIZE_MIN 256
40250 /* Deprecated: Remove in v1.6.0 */
40251 #define ZDICT_CONTENTSIZE_MIN 128
40258 unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
40259 unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
40260 unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
40261 unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
40262 double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */
40263 unsigned shrinkDict; /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking */
40264 unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */
40265 ZDICT_params_t zParams;
40266 } ZDICT_cover_params_t;
40269 unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
40270 unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
40271 unsigned f; /* log of size of frequency array : constraint: 0 < f <= 31 : 1 means default(20)*/
40272 unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
40273 unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
40274 double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (0.75), 1.0 when all samples are used for both training and testing */
40275 unsigned accel; /* Acceleration level: constraint: 0 < accel <= 10, higher means faster and less accurate, 0 means default(1) */
40276 unsigned shrinkDict; /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking */
40277 unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */
40279 ZDICT_params_t zParams;
40280 } ZDICT_fastCover_params_t;
40296 ZDICTLIB_STATIC_API size_t ZDICT_trainFromBuffer_cover(
40297 void *dictBuffer, size_t dictBufferCapacity,
40298 const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
40299 ZDICT_cover_params_t parameters);
40318 ZDICTLIB_STATIC_API size_t ZDICT_optimizeTrainFromBuffer_cover(
40319 void* dictBuffer, size_t dictBufferCapacity,
40320 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
40321 ZDICT_cover_params_t* parameters);
40339 ZDICTLIB_STATIC_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer,
40340 size_t dictBufferCapacity, const void *samplesBuffer,
40341 const size_t *samplesSizes, unsigned nbSamples,
40342 ZDICT_fastCover_params_t parameters);
40362 ZDICTLIB_STATIC_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer,
40363 size_t dictBufferCapacity, const void* samplesBuffer,
40364 const size_t* samplesSizes, unsigned nbSamples,
40365 ZDICT_fastCover_params_t* parameters);
40368 unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */
40369 ZDICT_params_t zParams;
40370 } ZDICT_legacy_params_t;
40387 ZDICTLIB_STATIC_API size_t ZDICT_trainFromBuffer_legacy(
40388 void* dictBuffer, size_t dictBufferCapacity,
40389 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
40390 ZDICT_legacy_params_t parameters);
40393 /* Deprecation warnings */
40394 /* It is generally possible to disable deprecation warnings from compiler,
40395 for example with -Wno-deprecated-declarations for gcc
40396 or _CRT_SECURE_NO_WARNINGS in Visual.
40397 Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */
40398 #ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS
40399 # define ZDICT_DEPRECATED(message) /* disable deprecation warnings */
40401 # define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
40402 # if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
40403 # define ZDICT_DEPRECATED(message) [[deprecated(message)]]
40404 # elif defined(__clang__) || (ZDICT_GCC_VERSION >= 405)
40405 # define ZDICT_DEPRECATED(message) __attribute__((deprecated(message)))
40406 # elif (ZDICT_GCC_VERSION >= 301)
40407 # define ZDICT_DEPRECATED(message) __attribute__((deprecated))
40408 # elif defined(_MSC_VER)
40409 # define ZDICT_DEPRECATED(message) __declspec(deprecated(message))
40412 # define ZDICT_DEPRECATED(message)
40414 #endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */
40419 const
void* samplesBuffer, const
size_t* samplesSizes,
unsigned nbSamples);
40424 #if defined (__cplusplus)
40439 #ifndef ZDICT_STATIC_LINKING_ONLY
40440 # define ZDICT_STATIC_LINKING_ONLY
40444 #include <stdlib.h>
40445 #include <string.h>
40522 const size_t *samplesSizes,
const BYTE *samples,
40524 size_t nbTrainSamples,
size_t nbSamples,
40525 BYTE *
const dict,
size_t dictBufferCapacity);
40530 size_t COVER_sum(
const size_t *samplesSizes,
unsigned nbSamples) ;
40585 size_t dictContentSize,
const BYTE* samplesBuffer,
const size_t* samplesSizes,
unsigned nbFinalizeSamples,
40586 size_t nbCheckSamples,
size_t nbSamples,
ZDICT_cover_params_t params,
size_t* offsets,
size_t totalCompressedSize);
40599 #define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
40600 #define COVER_DEFAULT_SPLITPOINT 1.0
40605 #ifndef LOCALDISPLAYLEVEL
40609 #define DISPLAY(...) \
40611 fprintf(stderr, __VA_ARGS__); \
40614 #undef LOCALDISPLAYLEVEL
40615 #define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
40616 if (displayLevel >= l) { \
40617 DISPLAY(__VA_ARGS__); \
40619 #undef DISPLAYLEVEL
40620 #define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
40622 #ifndef LOCALDISPLAYUPDATE
40623 static const clock_t
g_refreshRate = CLOCKS_PER_SEC * 15 / 100;
40624 static clock_t
g_time = 0;
40626 #undef LOCALDISPLAYUPDATE
40627 #define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
40628 if (displayLevel >= l) { \
40629 if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \
40630 g_time = clock(); \
40631 DISPLAY(__VA_ARGS__); \
40634 #undef DISPLAYUPDATE
40635 #define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
40646 #define MAP_EMPTY_VALUE ((U32)-1)
40675 map->sizeMask =
map->size - 1;
40700 for (i =
hash;; i = (i + 1) &
map->sizeMask) {
40705 if (pos->
key == key) {
40722 return &pos->
value;
40735 for (i = (i + 1) &
map->sizeMask;; i = (i + 1) &
map->sizeMask) {
40772 const size_t *samplesSizes;
40774 size_t nbTrainSamples;
40775 size_t nbTestSamples;
40793 size_t COVER_sum(
const size_t *samplesSizes,
unsigned nbSamples) {
40796 for (i = 0; i < nbSamples; ++i) {
40797 sum += samplesSizes[i];
40808 U32 const lhs = *(
U32 const *)lp;
40809 U32 const rhs = *(
U32 const *)rp;
40816 U64 const mask = (ctx->
d == 8) ? (
U64)-1 : (((
U64)1 << (8 * ctx->
d)) - 1);
40822 return (lhs > rhs);
40833 result = lp < rp ? -1 : 1;
40843 result = lp < rp ? -1 : 1;
40855 while (
count != 0) {
40856 size_t step =
count / 2;
40859 if (*
ptr < value) {
40876 int (*cmp)(
COVER_ctx_t *,
const void *,
const void *),
40877 void (*grp)(
COVER_ctx_t *,
const void *,
const void *)) {
40880 while (num <
count) {
40883 while (num <
count && cmp(ctx,
ptr, grpEnd) == 0) {
40887 grp(ctx,
ptr, grpEnd);
40902 const void *groupEnd) {
40904 const U32 *grpPtr = (
const U32 *)group;
40905 const U32 *grpEnd = (
const U32 *)groupEnd;
40914 const size_t *curOffsetPtr = ctx->
offsets;
40919 size_t curSampleEnd = ctx->
offsets[0];
40920 for (; grpPtr != grpEnd; ++grpPtr) {
40922 ctx->
dmerAt[*grpPtr] = dmerId;
40927 if (*grpPtr < curSampleEnd) {
40935 if (grpPtr + 1 != grpEnd) {
40936 const size_t *sampleEndPtr =
40938 curSampleEnd = *sampleEndPtr;
40939 curOffsetPtr = sampleEndPtr + 1;
40947 ctx->
suffix[dmerId] = freq;
40967 const U32 k = parameters.
k;
40968 const U32 d = parameters.
d;
40969 const U32 dmersInK = k -
d + 1;
40976 activeSegment.
begin = begin;
40977 activeSegment.
end = begin;
40978 activeSegment.
score = 0;
40982 while (activeSegment.
end < end) {
40988 if (*newDmerOcc == 0) {
40992 activeSegment.
score += freqs[newDmer];
40995 activeSegment.
end += 1;
40999 if (activeSegment.
end - activeSegment.
begin == dmersInK + 1) {
41002 activeSegment.
begin += 1;
41005 if (*delDmerOcc == 0) {
41007 activeSegment.
score -= freqs[delDmer];
41012 if (activeSegment.
score > bestSegment.
score) {
41013 bestSegment = activeSegment;
41018 U32 newBegin = bestSegment.
end;
41021 for (pos = bestSegment.
begin; pos != bestSegment.
end; ++pos) {
41024 newBegin =
MIN(newBegin, pos);
41028 bestSegment.
begin = newBegin;
41029 bestSegment.
end = newEnd;
41034 for (pos = bestSegment.
begin; pos != bestSegment.
end; ++pos) {
41038 return bestSegment;
41046 size_t maxDictSize) {
41048 if (parameters.
d == 0 || parameters.
k == 0) {
41052 if (parameters.
k > maxDictSize) {
41056 if (parameters.
d > parameters.
k) {
41099 const size_t *samplesSizes,
unsigned nbSamples,
41100 unsigned d,
double splitPoint) {
41101 const BYTE *
const samples = (
const BYTE *)samplesBuffer;
41102 const size_t totalSamplesSize =
COVER_sum(samplesSizes, nbSamples);
41104 const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((
double)nbSamples * splitPoint) : nbSamples;
41105 const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples;
41106 const size_t trainingSamplesSize = splitPoint < 1.0 ?
COVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize;
41107 const size_t testSamplesSize = splitPoint < 1.0 ?
COVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize;
41109 if (totalSamplesSize <
MAX(
d,
sizeof(
U64)) ||
41111 DISPLAYLEVEL(1,
"Total samples size is too large (%u MB), maximum size is %u MB\n",
41113 return ERROR(srcSize_wrong);
41116 if (nbTrainSamples < 5) {
41117 DISPLAYLEVEL(1,
"Total number of training samples is %u and is invalid.", nbTrainSamples);
41118 return ERROR(srcSize_wrong);
41121 if (nbTestSamples < 1) {
41122 DISPLAYLEVEL(1,
"Total number of testing samples is %u and is invalid.", nbTestSamples);
41123 return ERROR(srcSize_wrong);
41126 memset(ctx, 0,
sizeof(*ctx));
41127 DISPLAYLEVEL(2,
"Training on %u samples of total size %u\n", nbTrainSamples,
41128 (
unsigned)trainingSamplesSize);
41129 DISPLAYLEVEL(2,
"Testing on %u samples of total size %u\n", nbTestSamples,
41130 (
unsigned)testSamplesSize);
41142 ctx->
offsets = (
size_t *)malloc((nbSamples + 1) *
sizeof(
size_t));
41144 DISPLAYLEVEL(1,
"Failed to allocate scratch buffers\n");
41146 return ERROR(memory_allocation);
41155 for (i = 1; i <= nbSamples; ++i) {
41159 DISPLAYLEVEL(2,
"Constructing partial suffix array\n");
41173 #if defined(__OpenBSD__)
41198 const double ratio = (double)nbDmers / (
double)maxDictSize;
41203 "WARNING: The maximum dictionary size %u is too large "
41204 "compared to the source size %u! "
41205 "size(source)/size(dictionary) = %f, but it should be >= "
41206 "10! This may lead to a subpar dictionary! We recommend "
41207 "training on sources at least 10x, and preferably 100x "
41208 "the size of the dictionary! \n", (
U32)maxDictSize,
41209 (
U32)nbDmers, ratio);
41215 const U32 minEpochSize = k * 10;
41217 epochs.
num =
MAX(1, maxDictSize / k / passes);
41218 epochs.
size = nbDmers / epochs.
num;
41219 if (epochs.
size >= minEpochSize) {
41223 epochs.
size =
MIN(minEpochSize, nbDmers);
41224 epochs.
num = nbDmers / epochs.
size;
41234 size_t dictBufferCapacity,
41236 BYTE *
const dict = (
BYTE *)dictBuffer;
41237 size_t tail = dictBufferCapacity;
41241 const size_t maxZeroScoreRun =
MAX(10,
MIN(100, epochs.
num >> 3));
41242 size_t zeroScoreRun = 0;
41244 DISPLAYLEVEL(2,
"Breaking content into %u epochs of size %u\n",
41249 for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs.
num) {
41250 const U32 epochBegin = (
U32)(epoch * epochs.
size);
41251 const U32 epochEnd = epochBegin + epochs.
size;
41252 size_t segmentSize;
41255 ctx, freqs, activeDmers, epochBegin, epochEnd, parameters);
41259 if (segment.
score == 0) {
41260 if (++zeroScoreRun >= maxZeroScoreRun) {
41267 segmentSize =
MIN(segment.
end - segment.
begin + parameters.
d - 1, tail);
41268 if (segmentSize < parameters.
d) {
41274 tail -= segmentSize;
41275 memcpy(dict + tail, ctx->
samples + segment.
begin, segmentSize);
41278 (
unsigned)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
41285 void *dictBuffer,
size_t dictBufferCapacity,
41286 const void *samplesBuffer,
const size_t *samplesSizes,
unsigned nbSamples,
41289 BYTE*
const dict = (
BYTE*)dictBuffer;
41298 return ERROR(parameter_outOfBound);
41300 if (nbSamples == 0) {
41301 DISPLAYLEVEL(1,
"Cover must have at least one input file\n");
41302 return ERROR(srcSize_wrong);
41305 DISPLAYLEVEL(1,
"dictBufferCapacity must be at least %u\n",
41307 return ERROR(dstSize_tooSmall);
41311 size_t const initVal =
COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
41319 DISPLAYLEVEL(1,
"Failed to allocate dmer map: out of memory\n");
41321 return ERROR(memory_allocation);
41326 const size_t tail =
41328 dictBufferCapacity, parameters);
41330 dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
41331 samplesBuffer, samplesSizes, nbSamples, parameters.
zParams);
41333 DISPLAYLEVEL(2,
"Constructed dictionary of size %u\n",
41334 (
unsigned)dictionarySize);
41338 return dictionarySize;
41345 const size_t *samplesSizes,
const BYTE *samples,
41347 size_t nbTrainSamples,
size_t nbSamples,
41348 BYTE *
const dict,
size_t dictBufferCapacity) {
41349 size_t totalCompressedSize =
ERROR(GENERIC);
41355 size_t dstCapacity;
41359 size_t maxSampleSize = 0;
41360 i = parameters.
splitPoint < 1.0 ? nbTrainSamples : 0;
41361 for (; i < nbSamples; ++i) {
41362 maxSampleSize =
MAX(samplesSizes[i], maxSampleSize);
41365 dst = malloc(dstCapacity);
41371 if (!
dst || !cctx || !cdict) {
41372 goto _compressCleanup;
41375 totalCompressedSize = dictBufferCapacity;
41376 i = parameters.
splitPoint < 1.0 ? nbTrainSamples : 0;
41377 for (; i < nbSamples; ++i) {
41379 cctx,
dst, dstCapacity, samples + offsets[i],
41380 samplesSizes[i], cdict);
41382 totalCompressedSize =
size;
41383 goto _compressCleanup;
41385 totalCompressedSize +=
size;
41393 return totalCompressedSize;
41401 if (best==NULL)
return;
41462 size_t dictSize = selection.
dictSize;
41478 best->
dict = malloc(dictSize);
41489 memcpy(best->
dict, dict, dictSize);
41495 if (liveJobs == 0) {
41524 size_t dictContentSize,
const BYTE* samplesBuffer,
const size_t* samplesSizes,
unsigned nbFinalizeSamples,
41525 size_t nbCheckSamples,
size_t nbSamples,
ZDICT_cover_params_t params,
size_t* offsets,
size_t totalCompressedSize) {
41527 size_t largestDict = 0;
41528 size_t largestCompressed = 0;
41529 BYTE* customDictContentEnd = customDictContent + dictContentSize;
41531 BYTE * largestDictbuffer = (
BYTE *)malloc(dictBufferCapacity);
41532 BYTE * candidateDictBuffer = (
BYTE *)malloc(dictBufferCapacity);
41535 if (!largestDictbuffer || !candidateDictBuffer) {
41536 free(largestDictbuffer);
41537 free(candidateDictBuffer);
41542 memcpy(largestDictbuffer, customDictContent, dictContentSize);
41544 largestDictbuffer, dictBufferCapacity, customDictContent, dictContentSize,
41545 samplesBuffer, samplesSizes, nbFinalizeSamples, params.
zParams);
41548 free(largestDictbuffer);
41549 free(candidateDictBuffer);
41554 samplesBuffer, offsets,
41555 nbCheckSamples, nbSamples,
41556 largestDictbuffer, dictContentSize);
41559 free(largestDictbuffer);
41560 free(candidateDictBuffer);
41565 free(candidateDictBuffer);
41566 return setDictSelection(largestDictbuffer, dictContentSize, totalCompressedSize);
41569 largestDict = dictContentSize;
41570 largestCompressed = totalCompressedSize;
41574 while (dictContentSize < largestDict) {
41575 memcpy(candidateDictBuffer, largestDictbuffer, largestDict);
41577 candidateDictBuffer, dictBufferCapacity, customDictContentEnd - dictContentSize, dictContentSize,
41578 samplesBuffer, samplesSizes, nbFinalizeSamples, params.
zParams);
41581 free(largestDictbuffer);
41582 free(candidateDictBuffer);
41588 samplesBuffer, offsets,
41589 nbCheckSamples, nbSamples,
41590 candidateDictBuffer, dictContentSize);
41593 free(largestDictbuffer);
41594 free(candidateDictBuffer);
41598 if ((
double)totalCompressedSize <= (
double)largestCompressed * regressionTolerance) {
41599 free(largestDictbuffer);
41600 return setDictSelection( candidateDictBuffer, dictContentSize, totalCompressedSize );
41602 dictContentSize *= 2;
41604 dictContentSize = largestDict;
41605 totalCompressedSize = largestCompressed;
41606 free(candidateDictBuffer);
41631 size_t dictBufferCapacity =
data->dictBufferCapacity;
41632 size_t totalCompressedSize =
ERROR(GENERIC);
41635 BYTE*
const dict = (
BYTE*)malloc(dictBufferCapacity);
41639 DISPLAYLEVEL(1,
"Failed to allocate dmer map: out of memory\n");
41642 if (!dict || !freqs) {
41643 DISPLAYLEVEL(1,
"Failed to allocate buffers: out of memory\n");
41651 dictBufferCapacity, parameters);
41652 selection =
COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
41654 totalCompressedSize);
41671 void* dictBuffer,
size_t dictBufferCapacity,
const void* samplesBuffer,
41672 const size_t* samplesSizes,
unsigned nbSamples,
41676 const unsigned nbThreads = parameters->
nbThreads;
41677 const double splitPoint =
41679 const unsigned kMinD = parameters->
d == 0 ? 6 : parameters->
d;
41680 const unsigned kMaxD = parameters->
d == 0 ? 8 : parameters->
d;
41681 const unsigned kMinK = parameters->
k == 0 ? 50 : parameters->
k;
41682 const unsigned kMaxK = parameters->
k == 0 ? 2000 : parameters->
k;
41683 const unsigned kSteps = parameters->
steps == 0 ? 40 : parameters->
steps;
41684 const unsigned kStepSize =
MAX((kMaxK - kMinK) / kSteps, 1);
41685 const unsigned kIterations =
41686 (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
41687 const unsigned shrinkDict = 0;
41690 unsigned iteration = 1;
41698 if (splitPoint <= 0 || splitPoint > 1) {
41700 return ERROR(parameter_outOfBound);
41702 if (kMinK < kMaxD || kMaxK < kMinK) {
41704 return ERROR(parameter_outOfBound);
41706 if (nbSamples == 0) {
41707 DISPLAYLEVEL(1,
"Cover must have at least one input file\n");
41708 return ERROR(srcSize_wrong);
41711 DISPLAYLEVEL(1,
"dictBufferCapacity must be at least %u\n",
41713 return ERROR(dstSize_tooSmall);
41715 if (nbThreads > 1) {
41718 return ERROR(memory_allocation);
41728 for (d = kMinD;
d <= kMaxD;
d += 2) {
41733 const size_t initVal =
COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint);
41746 for (k = kMinK; k <= kMaxK; k += kStepSize) {
41756 return ERROR(memory_allocation);
41759 data->best = &best;
41760 data->dictBufferCapacity = dictBufferCapacity;
41761 data->parameters = *parameters;
41762 data->parameters.k = k;
41763 data->parameters.d =
d;
41764 data->parameters.splitPoint = splitPoint;
41765 data->parameters.steps = kSteps;
41766 data->parameters.shrinkDict = shrinkDict;
41783 (
unsigned)((iteration * 100) / kIterations));
41792 const size_t dictSize = best.
dictSize;
41800 memcpy(dictBuffer, best.
dict, dictSize);
41836 #pragma clang diagnostic ignored "-Wshorten-64-to-32"
41839 #if defined(_MSC_VER)
41840 # pragma warning(disable : 4244)
41841 # pragma warning(disable : 4127)
41848 #include <stdlib.h>
41877 #ifndef _DIVSUFSORT_H
41878 #define _DIVSUFSORT_H 1
41896 divsufsort(
const unsigned char *T,
int *SA,
int n,
int openMP);
41910 divbwt(
const unsigned char *T,
unsigned char *U,
int *A,
int n,
unsigned char * num_indexes,
int * indexes,
int openMP);
41921 #if defined(INLINE)
41924 #if !defined(INLINE)
41925 # define INLINE __inline
41927 #if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1)
41928 # undef ALPHABET_SIZE
41930 #if !defined(ALPHABET_SIZE)
41931 # define ALPHABET_SIZE (256)
41933 #define BUCKET_A_SIZE (ALPHABET_SIZE)
41934 #define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE)
41935 #if defined(SS_INSERTIONSORT_THRESHOLD)
41936 # if SS_INSERTIONSORT_THRESHOLD < 1
41937 # undef SS_INSERTIONSORT_THRESHOLD
41938 # define SS_INSERTIONSORT_THRESHOLD (1)
41941 # define SS_INSERTIONSORT_THRESHOLD (8)
41943 #if defined(SS_BLOCKSIZE)
41944 # if SS_BLOCKSIZE < 0
41945 # undef SS_BLOCKSIZE
41946 # define SS_BLOCKSIZE (0)
41947 # elif 32768 <= SS_BLOCKSIZE
41948 # undef SS_BLOCKSIZE
41949 # define SS_BLOCKSIZE (32767)
41952 # define SS_BLOCKSIZE (1024)
41955 #if SS_BLOCKSIZE == 0
41956 # define SS_MISORT_STACKSIZE (96)
41957 #elif SS_BLOCKSIZE <= 4096
41958 # define SS_MISORT_STACKSIZE (16)
41960 # define SS_MISORT_STACKSIZE (24)
41962 #define SS_SMERGE_STACKSIZE (32)
41963 #define TR_INSERTIONSORT_THRESHOLD (8)
41964 #define TR_STACKSIZE (64)
41969 # define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0)
41972 # define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b))
41975 # define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b))
41977 #define STACK_PUSH(_a, _b, _c, _d)\
41979 assert(ssize < STACK_SIZE);\
41980 stack[ssize].a = (_a), stack[ssize].b = (_b),\
41981 stack[ssize].c = (_c), stack[ssize++].d = (_d);\
41983 #define STACK_PUSH5(_a, _b, _c, _d, _e)\
41985 assert(ssize < STACK_SIZE);\
41986 stack[ssize].a = (_a), stack[ssize].b = (_b),\
41987 stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\
41989 #define STACK_POP(_a, _b, _c, _d)\
41991 assert(0 <= ssize);\
41992 if(ssize == 0) { return; }\
41993 (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
41994 (_c) = stack[ssize].c, (_d) = stack[ssize].d;\
41996 #define STACK_POP5(_a, _b, _c, _d, _e)\
41998 assert(0 <= ssize);\
41999 if(ssize == 0) { return; }\
42000 (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
42001 (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\
42003 #define BUCKET_A(_c0) bucket_A[(_c0)]
42004 #if ALPHABET_SIZE == 256
42005 #define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)])
42006 #define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)])
42008 #define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)])
42009 #define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)])
42016 -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
42017 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
42018 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
42019 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
42020 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
42021 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
42022 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
42023 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
42026 #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
42031 #if SS_BLOCKSIZE == 0
42032 return (n & 0xffff0000) ?
42033 ((n & 0xff000000) ?
42035 16 +
lg_table[(n >> 16) & 0xff]) :
42036 ((n & 0x0000ff00) ?
42039 #elif SS_BLOCKSIZE < 256
42042 return (n & 0xff00) ?
42050 #if SS_BLOCKSIZE != 0
42053 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61,
42054 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89,
42055 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109,
42056 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
42057 128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
42058 143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
42059 156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
42060 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
42061 181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
42062 192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
42063 202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
42064 212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
42065 221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
42066 230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
42067 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
42068 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
42077 e = (
x & 0xffff0000) ?
42078 ((
x & 0xff000000) ?
42081 ((
x & 0x0000ff00) ?
42086 y =
sqq_table[
x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
42087 if(e >= 24) {
y = (
y + 1 +
x /
y) >> 1; }
42088 y = (
y + 1 +
x /
y) >> 1;
42089 }
else if(e >= 8) {
42090 y = (
sqq_table[
x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
42095 return (
x < (
y *
y)) ?
y - 1 :
y;
42107 const int *p1,
const int *p2,
42109 const unsigned char *U1, *U2, *U1n, *U2n;
42111 for(U1 = T + depth + *p1,
42112 U2 = T + depth + *p2,
42113 U1n = T + *(p1 + 1) + 2,
42114 U2n = T + *(p2 + 1) + 2;
42115 (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
42120 (U2 < U2n ? *U1 - *U2 : 1) :
42121 (U2 < U2n ? -1 : 0);
42127 #if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
42133 int *
first,
int *last,
int depth) {
42138 for(i = last - 2;
first <= i; --i) {
42139 for(t = *i, j = i + 1; 0 < (r =
ss_compare(T, PA + t, PA + *j, depth));) {
42140 do { *(j - 1) = *j; }
while((++j < last) && (*j < 0));
42141 if(last <= j) {
break; }
42143 if(r == 0) { *j = ~*j; }
42153 #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
42157 ss_fixdown(
const unsigned char *Td,
const int *PA,
42158 int *SA,
int i,
int size) {
42163 for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) <
size; SA[i] = SA[k], i = k) {
42164 d = Td[PA[SA[k = j++]]];
42165 if(d < (e = Td[PA[SA[j]]])) { k = j;
d = e; }
42166 if(d <= c) {
break; }
42174 ss_heapsort(
const unsigned char *Td,
const int *PA,
int *SA,
int size) {
42179 if((
size % 2) == 0) {
42181 if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) {
SWAP(SA[m], SA[m / 2]); }
42184 for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); }
42185 if((
size % 2) == 0) {
SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); }
42186 for(i = m - 1; 0 < i; --i) {
42187 t = SA[0], SA[0] = SA[i];
42188 ss_fixdown(Td, PA, SA, 0, i);
42199 ss_median3(
const unsigned char *Td,
const int *PA,
42200 int *v1,
int *v2,
int *v3) {
42202 if(Td[PA[*v1]] > Td[PA[*v2]]) {
SWAP(v1, v2); }
42203 if(Td[PA[*v2]] > Td[PA[*v3]]) {
42204 if(Td[PA[*v1]] > Td[PA[*v3]]) {
return v1; }
42205 else {
return v3; }
42213 ss_median5(
const unsigned char *Td,
const int *PA,
42214 int *v1,
int *v2,
int *v3,
int *v4,
int *v5) {
42216 if(Td[PA[*v2]] > Td[PA[*v3]]) {
SWAP(v2, v3); }
42217 if(Td[PA[*v4]] > Td[PA[*v5]]) {
SWAP(v4, v5); }
42218 if(Td[PA[*v2]] > Td[PA[*v4]]) {
SWAP(v2, v4);
SWAP(v3, v5); }
42219 if(Td[PA[*v1]] > Td[PA[*v3]]) {
SWAP(v1, v3); }
42220 if(Td[PA[*v1]] > Td[PA[*v4]]) {
SWAP(v1, v4);
SWAP(v3, v5); }
42221 if(Td[PA[*v3]] > Td[PA[*v4]]) {
return v4; }
42228 ss_pivot(
const unsigned char *Td,
const int *PA,
int *
first,
int *last) {
42233 middle =
first + t / 2;
42237 return ss_median3(Td, PA,
first, middle, last - 1);
42240 return ss_median5(Td, PA,
first,
first + t, middle, last - 1 - t, last - 1);
42245 middle = ss_median3(Td, PA, middle - t, middle, middle + t);
42246 last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
42247 return ss_median3(Td, PA,
first, middle, last);
42256 ss_partition(
const int *PA,
42257 int *
first,
int *last,
int depth) {
42260 for(a =
first - 1, b = last;;) {
42261 for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
42262 for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { }
42263 if(b <= a) {
break; }
42275 ss_mintrosort(
const unsigned char *T,
const int *PA,
42276 int *
first,
int *last,
42278 #define STACK_SIZE SS_MISORT_STACKSIZE
42279 struct {
int *a, *b, c;
int d; } stack[
STACK_SIZE];
42280 const unsigned char *Td;
42281 int *a, *b, *c, *
d, *e, *
f;
42287 for(
ssize = 0, limit = ss_ilg(last -
first);;) {
42290 #if 1 < SS_INSERTIONSORT_THRESHOLD
42298 if(limit-- == 0) { ss_heapsort(Td, PA,
first, last -
first); }
42300 for(a =
first + 1, v = Td[PA[*
first]]; a < last; ++a) {
42301 if((
x = Td[PA[*a]]) != v) {
42302 if(1 < (a -
first)) {
break; }
42307 if(Td[PA[*
first] - 1] < v) {
42310 if((a -
first) <= (last - a)) {
42311 if(1 < (a -
first)) {
42313 last = a, depth += 1, limit = ss_ilg(a -
first);
42315 first = a, limit = -1;
42318 if(1 < (last - a)) {
42320 first = a, limit = -1;
42322 last = a, depth += 1, limit = ss_ilg(a -
first);
42329 a = ss_pivot(Td, PA,
first, last);
42334 for(b =
first; (++b < last) && ((
x = Td[PA[*b]]) == v);) { }
42335 if(((a = b) < last) && (
x < v)) {
42336 for(; (++b < last) && ((
x = Td[PA[*b]]) <= v);) {
42337 if(
x == v) {
SWAP(*b, *a); ++a; }
42340 for(c = last; (b < --c) && ((
x = Td[PA[*c]]) == v);) { }
42341 if((b < (d = c)) && (
x > v)) {
42342 for(; (b < --c) && ((
x = Td[PA[*c]]) >= v);) {
42343 if(
x == v) {
SWAP(*c, *d); --
d; }
42348 for(; (++b < c) && ((
x = Td[PA[*b]]) <= v);) {
42349 if(
x == v) {
SWAP(*b, *a); ++a; }
42351 for(; (b < --c) && ((
x = Td[PA[*c]]) >= v);) {
42352 if(
x == v) {
SWAP(*c, *d); --
d; }
42359 if((s = a -
first) > (t = b - a)) {
s = t; }
42360 for(e =
first, f = b - s; 0 <
s; --
s, ++e, ++
f) {
SWAP(*e, *f); }
42361 if((s = d - c) > (t = last - d - 1)) {
s = t; }
42362 for(e = b, f = last - s; 0 <
s; --
s, ++e, ++
f) {
SWAP(*e, *f); }
42364 a =
first + (b - a), c = last - (d - c);
42365 b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
42367 if((a -
first) <= (last - c)) {
42368 if((last - c) <= (c - b)) {
42372 }
else if((a -
first) <= (c - b)) {
42379 first = b, last = c, depth += 1, limit = ss_ilg(c - b);
42382 if((a -
first) <= (c - b)) {
42386 }
else if((last - c) <= (c - b)) {
42393 first = b, last = c, depth += 1, limit = ss_ilg(c - b);
42398 if(Td[PA[*
first] - 1] < v) {
42399 first = ss_partition(PA,
first, last, depth);
42400 limit = ss_ilg(last -
first);
42413 #if SS_BLOCKSIZE != 0
42419 for(; 0 < n; --n, ++a, ++b) {
42420 t = *a, *a = *b, *b = t;
42429 l = middle -
first, r = last - middle;
42430 for(; (0 < l) && (0 < r);) {
42433 a = last - 1, b = middle - 1;
42436 *a-- = *b, *b-- = *a;
42440 if((r -= l + 1) <= l) {
break; }
42441 a -= 1, b = middle - 1;
42446 a =
first, b = middle;
42449 *a++ = *b, *b++ = *a;
42453 if((l -= r + 1) <= r) {
break; }
42454 a += 1, b = middle;
42468 int *
first,
int *middle,
int *last,
42477 if(*(last - 1) < 0) {
x = 1; p = PA + ~*(last - 1); }
42478 else {
x = 0; p = PA + *(last - 1); }
42479 for(a =
first, len = middle -
first, half = len >> 1, r = -1;
42481 len = half, half >>= 1) {
42483 q =
ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
42486 half -= (len & 1) ^ 1;
42492 if(r == 0) { *a = ~*a; }
42494 last -= middle - a;
42496 if(
first == middle) {
break; }
42499 if(
x != 0) {
while(*--last < 0) { } }
42500 if(middle == last) {
break; }
42511 int *
first,
int *middle,
int *last,
42512 int *buf,
int depth) {
42513 int *a, *b, *c, *bufend;
42517 bufend = buf + (middle -
first) - 1;
42520 for(t = *(a =
first), b = buf, c = middle;;) {
42525 if(bufend <= b) { *bufend = t;
return; }
42530 *a++ = *c, *c++ = *a;
42532 while(b < bufend) { *a++ = *b, *b++ = *a; }
42541 if(bufend <= b) { *bufend = t;
return; }
42546 *a++ = *c, *c++ = *a;
42548 while(b < bufend) { *a++ = *b, *b++ = *a; }
42561 int *
first,
int *middle,
int *last,
42562 int *buf,
int depth) {
42563 const int *p1, *p2;
42564 int *a, *b, *c, *bufend;
42569 bufend = buf + (last - middle) - 1;
42573 if(*bufend < 0) { p1 = PA + ~*bufend;
x |= 1; }
42574 else { p1 = PA + *bufend; }
42575 if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1);
x |= 2; }
42576 else { p2 = PA + *(middle - 1); }
42577 for(t = *(a = last - 1), b = bufend, c = middle - 1;;) {
42580 if(
x & 1) {
do { *a-- = *b, *b-- = *a; }
while(*b < 0);
x ^= 1; }
42582 if(b <= buf) { *buf = t;
break; }
42584 if(*b < 0) { p1 = PA + ~*b;
x |= 1; }
42585 else { p1 = PA + *b; }
42587 if(
x & 2) {
do { *a-- = *c, *c-- = *a; }
while(*c < 0);
x ^= 2; }
42588 *a-- = *c, *c-- = *a;
42590 while(buf < b) { *a-- = *b, *b-- = *a; }
42594 if(*c < 0) { p2 = PA + ~*c;
x |= 2; }
42595 else { p2 = PA + *c; }
42597 if(
x & 1) {
do { *a-- = *b, *b-- = *a; }
while(*b < 0);
x ^= 1; }
42599 if(b <= buf) { *buf = t;
break; }
42601 if(
x & 2) {
do { *a-- = *c, *c-- = *a; }
while(*c < 0);
x ^= 2; }
42602 *a-- = *c, *c-- = *a;
42604 while(buf < b) { *a-- = *b, *b-- = *a; }
42608 if(*b < 0) { p1 = PA + ~*b;
x |= 1; }
42609 else { p1 = PA + *b; }
42610 if(*c < 0) { p2 = PA + ~*c;
x |= 2; }
42611 else { p2 = PA + *c; }
42620 int *
first,
int *middle,
int *last,
42621 int *buf,
int bufsize,
int depth) {
42622 #define STACK_SIZE SS_SMERGE_STACKSIZE
42623 #define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
42624 #define MERGE_CHECK(a, b, c)\
42627 (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\
42630 if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\
42634 struct {
int *a, *b, *c;
int d; } stack[
STACK_SIZE];
42635 int *l, *r, *lm, *rm;
42641 if((last - middle) <= bufsize) {
42642 if((
first < middle) && (middle < last)) {
42650 if((middle -
first) <= bufsize) {
42651 if(
first < middle) {
42659 for(m = 0, len =
MIN(middle -
first, last - middle), half = len >> 1;
42661 len = half, half >>= 1) {
42663 PA +
GETIDX(*(middle - m - half - 1)), depth) < 0) {
42665 half -= (len & 1) ^ 1;
42670 lm = middle - m, rm = middle + m;
42672 l = r = middle,
next = 0;
42676 if(
first < lm) {
for(; *--l < 0;) { }
next |= 4; }
42678 }
else if(
first < lm) {
42679 for(; *r < 0; ++r) { }
42684 if((l -
first) <= (last - r)) {
42688 if((
next & 2) && (r == middle)) {
next ^= 6; }
42694 *middle = ~*middle;
42711 sssort(
const unsigned char *T,
const int *PA,
42712 int *
first,
int *last,
42713 int *buf,
int bufsize,
42714 int depth,
int n,
int lastsuffix) {
42716 #if SS_BLOCKSIZE != 0
42717 int *b, *middle, *curbuf;
42718 int j, k, curbufsize, limit;
42722 if(lastsuffix != 0) { ++
first; }
42724 #if SS_BLOCKSIZE == 0
42725 ss_mintrosort(T, PA,
first, last, depth);
42728 (bufsize < (last -
first)) &&
42731 buf = middle = last - limit, bufsize = limit;
42733 middle = last, limit = 0;
42736 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
42738 #elif 1 < SS_BLOCKSIZE
42743 if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
42744 for(b = a, k =
SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
42745 ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
42748 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
42749 ss_mintrosort(T, PA, a, middle, depth);
42750 #elif 1 < SS_BLOCKSIZE
42755 ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
42760 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
42761 ss_mintrosort(T, PA, middle, last, depth);
42762 #elif 1 < SS_BLOCKSIZE
42769 if(lastsuffix != 0) {
42771 int PAi[2]; PAi[0] = PA[*(
first - 1)], PAi[1] = n - 2;
42773 (a < last) && ((*a < 0) || (0 <
ss_compare(T, &(PAi[0]), PA + *a, depth)));
42787 return (n & 0xffff0000) ?
42788 ((n & 0xff000000) ?
42790 16 +
lg_table[(n >> 16) & 0xff]) :
42791 ((n & 0x0000ff00) ?
42806 for(a =
first + 1; a < last; ++a) {
42807 for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
42808 do { *(b + 1) = *b; }
while((
first <= --b) && (*b < 0));
42809 if(b <
first) {
break; }
42811 if(r == 0) { *b = ~*b; }
42826 for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) <
size; SA[i] = SA[k], i = k) {
42828 if(d < (e = ISAd[SA[j]])) { k = j;
d = e; }
42829 if(d <= c) {
break; }
42842 if((
size % 2) == 0) {
42844 if(ISAd[SA[m / 2]] < ISAd[SA[m]]) {
SWAP(SA[m], SA[m / 2]); }
42847 for(i = m / 2 - 1; 0 <= i; --i) {
tr_fixdown(ISAd, SA, i, m); }
42849 for(i = m - 1; 0 < i; --i) {
42850 t = SA[0], SA[0] = SA[i];
42862 tr_median3(
const int *ISAd,
int *v1,
int *v2,
int *v3) {
42864 if(ISAd[*v1] > ISAd[*v2]) {
SWAP(v1, v2); }
42865 if(ISAd[*v2] > ISAd[*v3]) {
42866 if(ISAd[*v1] > ISAd[*v3]) {
return v1; }
42867 else {
return v3; }
42876 int *v1,
int *v2,
int *v3,
int *v4,
int *v5) {
42878 if(ISAd[*v2] > ISAd[*v3]) {
SWAP(v2, v3); }
42879 if(ISAd[*v4] > ISAd[*v5]) {
SWAP(v4, v5); }
42881 if(ISAd[*v1] > ISAd[*v3]) {
SWAP(v1, v3); }
42882 if(ISAd[*v1] > ISAd[*v4]) {
SWAP(v1, v4);
SWAP(v3, v5); }
42883 if(ISAd[*v3] > ISAd[*v4]) {
return v4; }
42895 middle =
first + t / 2;
42908 last =
tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
42926 budget->
chance = chance;
42933 if(size <= budget->remain) { budget->
remain -=
size;
return 1; }
42946 int *
first,
int *middle,
int *last,
42947 int **pa,
int **pb,
int v) {
42948 int *a, *b, *c, *
d, *e, *
f;
42952 for(b = middle - 1; (++b < last) && ((
x = ISAd[*b]) == v);) { }
42953 if(((a = b) < last) && (
x < v)) {
42954 for(; (++b < last) && ((
x = ISAd[*b]) <= v);) {
42955 if(
x == v) {
SWAP(*b, *a); ++a; }
42958 for(c = last; (b < --c) && ((
x = ISAd[*c]) == v);) { }
42959 if((b < (d = c)) && (
x > v)) {
42960 for(; (b < --c) && ((
x = ISAd[*c]) >= v);) {
42961 if(
x == v) {
SWAP(*c, *d); --
d; }
42966 for(; (++b < c) && ((
x = ISAd[*b]) <= v);) {
42967 if(
x == v) {
SWAP(*b, *a); ++a; }
42969 for(; (b < --c) && ((
x = ISAd[*c]) >= v);) {
42970 if(
x == v) {
SWAP(*c, *d); --
d; }
42976 if((s = a -
first) > (t = b - a)) {
s = t; }
42978 if((s = d - c) > (t = last - d - 1)) {
s = t; }
42979 for(e = b, f = last - s; 0 <
s; --
s, ++e, ++
f) {
SWAP(*e, *f); }
42980 first += (b - a), last -= (d - c);
42982 *pa =
first, *pb = last;
42987 tr_copy(
int *ISA,
const int *SA,
42988 int *
first,
int *a,
int *b,
int *last,
42996 for(c =
first, d = a - 1; c <=
d; ++c) {
42997 if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
43002 for(c = last - 1, e = d + 1, d = b; e <
d; --c) {
43003 if((0 <= (
s = *c - depth)) && (ISA[
s] == v)) {
43013 int *
first,
int *a,
int *b,
int *last,
43017 int rank, lastrank, newrank = -1;
43021 for(c =
first, d = a - 1; c <=
d; ++c) {
43022 if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
43024 rank = ISA[
s + depth];
43025 if(lastrank != rank) { lastrank = rank; newrank =
d - SA; }
43031 for(e = d;
first <= e; --e) {
43033 if(lastrank != rank) { lastrank = rank; newrank = e - SA; }
43034 if(newrank != rank) { ISA[*e] = newrank; }
43038 for(c = last - 1, e = d + 1, d = b; e <
d; --c) {
43039 if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
43041 rank = ISA[
s + depth];
43042 if(lastrank != rank) { lastrank = rank; newrank =
d - SA; }
43051 int *SA,
int *
first,
int *last,
43053 #define STACK_SIZE TR_STACKSIZE
43054 struct {
const int *a;
int *b, *c;
int d, e; }stack[
STACK_SIZE];
43058 int incr = ISAd - ISA;
43060 int ssize, trlink = -1;
43071 for(c =
first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
43074 for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
43081 trlink =
ssize - 2;
43083 if((a -
first) <= (last - b)) {
43084 if(1 < (a -
first)) {
43087 }
else if(1 < (last - b)) {
43093 if(1 < (last - b)) {
43096 }
else if(1 < (a -
first)) {
43102 }
else if(limit == -2) {
43105 if(stack[
ssize].d == 0) {
43108 if(0 <= trlink) { stack[trlink].d = -1; }
43116 do { ISA[*a] = a - SA; }
while((++a < last) && (0 <= *a));
43120 a =
first;
do { *a = ~*a; }
while(*++a < 0);
43122 if(++a < last) {
for(b =
first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
43126 if((a -
first) <= (last - a)) {
43128 ISAd += incr, last = a, limit =
next;
43130 if(1 < (last - a)) {
43132 first = a, limit = -3;
43134 ISAd += incr, last = a, limit =
next;
43138 if(0 <= trlink) { stack[trlink].d = -1; }
43139 if(1 < (last - a)) {
43140 first = a, limit = -3;
43160 for(a = last - 1;
first < a; a = b) {
43161 for(
x = ISAd[*a], b = a - 1; (
first <= b) && (ISAd[*b] ==
x); --b) { *b = ~*b; }
43174 if((last -
first) != (b - a)) {
43178 for(c =
first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
43179 if(b < last) {
for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
43183 if((a -
first) <= (last - b)) {
43184 if((last - b) <= (b - a)) {
43185 if(1 < (a -
first)) {
43189 }
else if(1 < (last - b)) {
43193 ISAd += incr,
first = a, last = b, limit =
next;
43195 }
else if((a -
first) <= (b - a)) {
43196 if(1 < (a -
first)) {
43202 ISAd += incr,
first = a, last = b, limit =
next;
43207 ISAd += incr,
first = a, last = b, limit =
next;
43210 if((a -
first) <= (b - a)) {
43211 if(1 < (last - b)) {
43215 }
else if(1 < (a -
first)) {
43219 ISAd += incr,
first = a, last = b, limit =
next;
43221 }
else if((last - b) <= (b - a)) {
43222 if(1 < (last - b)) {
43228 ISAd += incr,
first = a, last = b, limit =
next;
43233 ISAd += incr,
first = a, last = b, limit =
next;
43237 if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
43238 if((a -
first) <= (last - b)) {
43239 if(1 < (a -
first)) {
43242 }
else if(1 < (last - b)) {
43248 if(1 < (last - b)) {
43251 }
else if(1 < (a -
first)) {
43262 if(0 <= trlink) { stack[trlink].d = -1; }
43277 trsort(
int *ISA,
int *SA,
int n,
int depth) {
43281 int t, skip, unsorted;
43285 for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
43290 if((t = *
first) < 0) {
first -= t; skip += t; }
43292 if(skip != 0) { *(
first + skip) = skip; skip = 0; }
43293 last = SA + ISA[t] + 1;
43294 if(1 < (last -
first)) {
43297 if(budget.
count != 0) { unsorted += budget.
count; }
43298 else { skip =
first - last; }
43299 }
else if((last -
first) == 1) {
43304 }
while(
first < (SA + n));
43305 if(skip != 0) { *(
first + skip) = skip; }
43306 if(unsorted == 0) {
break; }
43317 int *bucket_A,
int *bucket_B,
43318 int n,
int openMP) {
43319 int *PAb, *ISAb, *buf;
43320 #ifdef LIBBSC_OPENMP
43324 int i, j, k, t, m, bufsize;
43326 #ifdef LIBBSC_OPENMP
43338 for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) {
43340 do { ++
BUCKET_A(c1 = c0); }
while((0 <= --i) && ((c0 = T[i]) >= c1));
43346 for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) {
43372 PAb = SA + n - m; ISAb = SA + m;
43373 for(i = m - 2; 0 <= i; --i) {
43374 t = PAb[i], c0 = T[t], c1 = T[t + 1];
43377 t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
43381 #ifdef LIBBSC_OPENMP
43386 #pragma omp parallel default(shared) private(bufsize, curbuf, k, l, d0, d1)
43388 bufsize = (n - (2 * m)) / omp_get_num_threads();
43389 curbuf = buf + omp_get_thread_num() * bufsize;
43392 #pragma omp critical(sssort_lock)
43400 if(--d0 < 0) {
break; }
43402 }
while(((l - k) <= 1) && (0 < (l = k)));
43403 c0 = d0, c1 = d1, j = k;
43406 if(l == 0) {
break; }
43407 sssort(T, PAb, SA + k, SA + l,
43408 curbuf, bufsize, 2, n, *(SA + k) == (m - 1));
43414 buf = SA + m, bufsize = n - (2 * m);
43419 sssort(T, PAb, SA + i, SA + j,
43420 buf, bufsize, 2, n, *(SA + i) == (m - 1));
43426 buf = SA + m, bufsize = n - (2 * m);
43431 sssort(T, PAb, SA + i, SA + j,
43432 buf, bufsize, 2, n, *(SA + i) == (m - 1));
43439 for(i = m - 1; 0 <= i; --i) {
43442 do { ISAb[SA[i]] = i; }
while((0 <= --i) && (0 <= SA[i]));
43444 if(i <= 0) {
break; }
43447 do { ISAb[SA[i] = ~SA[i]] = j; }
while(SA[--i] < 0);
43455 for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
43456 for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
43459 for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { }
43460 SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t;
43475 --i, --k) { SA[i] = SA[k]; }
43489 int *bucket_A,
int *bucket_B,
43501 j = SA +
BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
43506 assert(((
s + 1) < n) && (T[
s] <= T[
s + 1]));
43510 if((0 <
s) && (T[
s - 1] > c0)) {
s = ~
s; }
43512 if(0 <= c2) {
BUCKET_B(c2, c1) = k - SA; }
43518 assert(((
s == 0) && (T[
s] == c1)) || (
s < 0));
43528 *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1);
43530 for(i = SA, j = SA + n; i < j; ++i) {
43532 assert(T[s - 1] >= T[s]);
43534 if((s == 0) || (T[s - 1] < c0)) {
s = ~
s; }
43553 int *bucket_A,
int *bucket_B,
43555 int *i, *j, *k, *orig;
43565 j = SA +
BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
43570 assert(((s + 1) < n) && (T[s] <= T[s + 1]));
43571 assert(T[s - 1] <= T[s]);
43574 if((0 < s) && (T[
s - 1] > c0)) {
s = ~
s; }
43576 if(0 <= c2) {
BUCKET_B(c2, c1) = k - SA; }
43581 }
else if(s != 0) {
43595 *k++ = (T[n - 2] < c2) ? ~((
int)T[n - 2]) : (n - 1);
43597 for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
43599 assert(T[s - 1] >= T[s]);
43602 if((0 < s) && (T[s - 1] < c0)) {
s = ~((
int)T[s - 1]); }
43609 }
else if(s != 0) {
43624 int *bucket_A,
int *bucket_B,
43626 unsigned char * num_indexes,
int * indexes) {
43627 int *i, *j, *k, *orig;
43637 *num_indexes = (
unsigned char)((n - 1) / (
mod + 1));
43646 j = SA +
BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
43651 assert(((s + 1) < n) && (T[s] <= T[s + 1]));
43652 assert(T[s - 1] <= T[s]);
43654 if ((s &
mod) == 0) indexes[
s / (
mod + 1) - 1] = j - SA;
43658 if((0 < s) && (T[
s - 1] > c0)) {
s = ~
s; }
43660 if(0 <= c2) {
BUCKET_B(c2, c1) = k - SA; }
43665 }
else if(s != 0) {
43679 if (T[n - 2] < c2) {
43680 if (((n - 1) &
mod) == 0) indexes[(n - 1) / (
mod + 1) - 1] = k - SA;
43681 *k++ = ~((
int)T[n - 2]);
43688 for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
43690 assert(T[s - 1] >= T[s]);
43692 if ((s &
mod) == 0) indexes[
s / (
mod + 1) - 1] = i - SA;
43701 if((0 < s) && (T[s - 1] < c0)) {
43702 if ((s &
mod) == 0) indexes[
s / (
mod + 1) - 1] = k - SA;
43703 *k++ = ~((
int)T[s - 1]);
43706 }
else if(s != 0) {
43722 divsufsort(
const unsigned char *T,
int *SA,
int n,
int openMP) {
43723 int *bucket_A, *bucket_B;
43728 if((T == NULL) || (SA == NULL) || (n < 0)) {
return -1; }
43729 else if(n == 0) {
return 0; }
43730 else if(n == 1) { SA[0] = 0;
return 0; }
43731 else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1;
return 0; }
43737 if((bucket_A != NULL) && (bucket_B != NULL)) {
43751 divbwt(
const unsigned char *T,
unsigned char *U,
int *A,
int n,
unsigned char * num_indexes,
int * indexes,
int openMP) {
43753 int *bucket_A, *bucket_B;
43757 if((T == NULL) || (U == NULL) || (n < 0)) {
return -1; }
43758 else if(n <= 1) {
if(n == 1) { U[0] = T[0]; }
return n; }
43760 if((
B = A) == NULL) {
B = (
int *)malloc((
size_t)(n + 1) *
sizeof(
int)); }
43765 if((
B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) {
43768 if (num_indexes == NULL || indexes == NULL) {
43776 for(i = 0; i < pidx; ++i) { U[i + 1] = (
unsigned char)
B[i]; }
43777 for(i += 1; i < n; ++i) { U[i] = (
unsigned char)
B[i]; }
43785 if(A == NULL) { free(
B); }
43805 #include <stdlib.h>
43806 #include <string.h>
43809 #ifndef ZDICT_STATIC_LINKING_ONLY
43810 # define ZDICT_STATIC_LINKING_ONLY
43832 #define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
43833 #define FASTCOVER_MAX_F 31
43834 #define FASTCOVER_MAX_ACCEL 10
43835 #define FASTCOVER_DEFAULT_SPLITPOINT 0.75
43836 #define DEFAULT_F 20
43837 #define DEFAULT_ACCEL 1
43843 #ifndef LOCALDISPLAYLEVEL
43847 #define DISPLAY(...) \
43849 fprintf(stderr, __VA_ARGS__); \
43852 #undef LOCALDISPLAYLEVEL
43853 #define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
43854 if (displayLevel >= l) { \
43855 DISPLAY(__VA_ARGS__); \
43857 #undef DISPLAYLEVEL
43858 #define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
43860 #ifndef LOCALDISPLAYUPDATE
43861 static const clock_t
g_refreshRate = CLOCKS_PER_SEC * 15 / 100;
43862 static clock_t
g_time = 0;
43864 #undef LOCALDISPLAYUPDATE
43865 #define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
43866 if (displayLevel >= l) { \
43867 if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \
43868 g_time = clock(); \
43869 DISPLAY(__VA_ARGS__); \
43872 #undef DISPLAYUPDATE
43873 #define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
43920 const size_t *samplesSizes;
43922 size_t nbTrainSamples;
43923 size_t nbTestSamples;
43949 U16* segmentFreqs) {
43951 const U32 k = parameters.
k;
43952 const U32 d = parameters.
d;
43953 const U32 f = ctx->
f;
43954 const U32 dmersInK = k -
d + 1;
43962 activeSegment.
begin = begin;
43963 activeSegment.
end = begin;
43964 activeSegment.
score = 0;
43969 while (activeSegment.
end < end) {
43974 if (segmentFreqs[idx] == 0) {
43975 activeSegment.
score += freqs[idx];
43978 activeSegment.
end += 1;
43979 segmentFreqs[idx] += 1;
43981 if (activeSegment.
end - activeSegment.
begin == dmersInK + 1) {
43984 segmentFreqs[delIndex] -= 1;
43986 if (segmentFreqs[delIndex] == 0) {
43987 activeSegment.
score -= freqs[delIndex];
43990 activeSegment.
begin += 1;
43994 if (activeSegment.
score > bestSegment.
score) {
43995 bestSegment = activeSegment;
44000 while (activeSegment.
begin < end) {
44002 segmentFreqs[delIndex] -= 1;
44003 activeSegment.
begin += 1;
44009 for (pos = bestSegment.
begin; pos != bestSegment.
end; ++pos) {
44015 return bestSegment;
44020 size_t maxDictSize,
unsigned f,
44023 if (parameters.
d == 0 || parameters.
k == 0) {
44027 if (parameters.
d != 6 && parameters.
d != 8) {
44031 if (parameters.
k > maxDictSize) {
44035 if (parameters.
d > parameters.
k) {
44047 if (accel > 10 || accel == 0) {
44076 const unsigned f = ctx->
f;
44077 const unsigned d = ctx->
d;
44079 const unsigned readLength =
MAX(d, 8);
44085 size_t const currSampleEnd = ctx->
offsets[i+1];
44086 while (start + readLength <= currSampleEnd) {
44088 freqs[dmerIndex]++;
44104 const void* samplesBuffer,
44105 const size_t* samplesSizes,
unsigned nbSamples,
44106 unsigned d,
double splitPoint,
unsigned f,
44109 const BYTE*
const samples = (
const BYTE*)samplesBuffer;
44110 const size_t totalSamplesSize =
COVER_sum(samplesSizes, nbSamples);
44112 const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((
double)nbSamples * splitPoint) : nbSamples;
44113 const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples;
44114 const size_t trainingSamplesSize = splitPoint < 1.0 ?
COVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize;
44115 const size_t testSamplesSize = splitPoint < 1.0 ?
COVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize;
44118 if (totalSamplesSize <
MAX(
d,
sizeof(
U64)) ||
44120 DISPLAYLEVEL(1,
"Total samples size is too large (%u MB), maximum size is %u MB\n",
44122 return ERROR(srcSize_wrong);
44126 if (nbTrainSamples < 5) {
44127 DISPLAYLEVEL(1,
"Total number of training samples is %u and is invalid\n", nbTrainSamples);
44128 return ERROR(srcSize_wrong);
44132 if (nbTestSamples < 1) {
44133 DISPLAYLEVEL(1,
"Total number of testing samples is %u and is invalid.\n", nbTestSamples);
44134 return ERROR(srcSize_wrong);
44138 memset(ctx, 0,
sizeof(*ctx));
44139 DISPLAYLEVEL(2,
"Training on %u samples of total size %u\n", nbTrainSamples,
44140 (
unsigned)trainingSamplesSize);
44141 DISPLAYLEVEL(2,
"Testing on %u samples of total size %u\n", nbTestSamples,
44142 (
unsigned)testSamplesSize);
44155 ctx->
offsets = (
size_t*)calloc((nbSamples + 1),
sizeof(
size_t));
44157 DISPLAYLEVEL(1,
"Failed to allocate scratch buffers \n");
44159 return ERROR(memory_allocation);
44166 for (i = 1; i <= nbSamples; ++i) {
44173 if (ctx->
freqs == NULL) {
44174 DISPLAYLEVEL(1,
"Failed to allocate frequency table \n");
44176 return ERROR(memory_allocation);
44192 void* dictBuffer,
size_t dictBufferCapacity,
44196 BYTE *
const dict = (
BYTE *)dictBuffer;
44197 size_t tail = dictBufferCapacity;
44201 const size_t maxZeroScoreRun = 10;
44202 size_t zeroScoreRun = 0;
44204 DISPLAYLEVEL(2,
"Breaking content into %u epochs of size %u\n",
44209 for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs.
num) {
44210 const U32 epochBegin = (
U32)(epoch * epochs.
size);
44211 const U32 epochEnd = epochBegin + epochs.
size;
44212 size_t segmentSize;
44215 ctx, freqs, epochBegin, epochEnd, parameters, segmentFreqs);
44220 if (segment.
score == 0) {
44221 if (++zeroScoreRun >= maxZeroScoreRun) {
44229 segmentSize =
MIN(segment.
end - segment.
begin + parameters.
d - 1, tail);
44230 if (segmentSize < parameters.
d) {
44237 tail -= segmentSize;
44238 memcpy(dict + tail, ctx->
samples + segment.
begin, segmentSize);
44241 (
unsigned)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
44269 size_t dictBufferCapacity =
data->dictBufferCapacity;
44270 size_t totalCompressedSize =
ERROR(GENERIC);
44272 U16* segmentFreqs = (
U16*)calloc(((
U64)1 << ctx->
f),
sizeof(
U16));
44274 BYTE *
const dict = (
BYTE*)malloc(dictBufferCapacity);
44276 U32* freqs = (
U32*) malloc(((
U64)1 << ctx->
f) *
sizeof(
U32));
44277 if (!segmentFreqs || !dict || !freqs) {
44278 DISPLAYLEVEL(1,
"Failed to allocate buffers: out of memory\n");
44282 memcpy(freqs, ctx->
freqs, ((
U64)1 << ctx->
f) *
sizeof(
U32));
44285 parameters, segmentFreqs);
44288 selection =
COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
44290 totalCompressedSize);
44301 free(segmentFreqs);
44311 coverParams->
k = fastCoverParams.
k;
44312 coverParams->
d = fastCoverParams.
d;
44324 unsigned f,
unsigned accel)
44326 fastCoverParams->
k = coverParams.
k;
44327 fastCoverParams->
d = coverParams.
d;
44331 fastCoverParams->
f =
f;
44332 fastCoverParams->
accel = accel;
44340 const void* samplesBuffer,
44341 const size_t* samplesSizes,
unsigned nbSamples,
44344 BYTE*
const dict = (
BYTE*)dictBuffer;
44352 parameters.
f = parameters.
f == 0 ?
DEFAULT_F : parameters.
f;
44355 memset(&coverParams, 0 ,
sizeof(coverParams));
44359 parameters.
accel)) {
44361 return ERROR(parameter_outOfBound);
44363 if (nbSamples == 0) {
44364 DISPLAYLEVEL(1,
"FASTCOVER must have at least one input file\n");
44365 return ERROR(srcSize_wrong);
44368 DISPLAYLEVEL(1,
"dictBufferCapacity must be at least %u\n",
44370 return ERROR(dstSize_tooSmall);
44376 size_t const initVal =
FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
44389 U16* segmentFreqs = (
U16 *)calloc(((
U64)1 << parameters.
f),
sizeof(
U16));
44391 dictBufferCapacity, coverParams, segmentFreqs);
44394 dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
44395 samplesBuffer, samplesSizes, nbFinalizeSamples, coverParams.
zParams);
44397 DISPLAYLEVEL(2,
"Constructed dictionary of size %u\n",
44398 (
unsigned)dictionarySize);
44401 free(segmentFreqs);
44402 return dictionarySize;
44409 void* dictBuffer,
size_t dictBufferCapacity,
44410 const void* samplesBuffer,
44411 const size_t* samplesSizes,
unsigned nbSamples,
44417 const unsigned nbThreads = parameters->
nbThreads;
44418 const double splitPoint =
44420 const unsigned kMinD = parameters->
d == 0 ? 6 : parameters->
d;
44421 const unsigned kMaxD = parameters->
d == 0 ? 8 : parameters->
d;
44422 const unsigned kMinK = parameters->
k == 0 ? 50 : parameters->
k;
44423 const unsigned kMaxK = parameters->
k == 0 ? 2000 : parameters->
k;
44424 const unsigned kSteps = parameters->
steps == 0 ? 40 : parameters->
steps;
44425 const unsigned kStepSize =
MAX((kMaxK - kMinK) / kSteps, 1);
44426 const unsigned kIterations =
44427 (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
44428 const unsigned f = parameters->
f == 0 ?
DEFAULT_F : parameters->
f;
44430 const unsigned shrinkDict = 0;
44433 unsigned iteration = 1;
44440 if (splitPoint <= 0 || splitPoint > 1) {
44442 return ERROR(parameter_outOfBound);
44446 return ERROR(parameter_outOfBound);
44448 if (kMinK < kMaxD || kMaxK < kMinK) {
44450 return ERROR(parameter_outOfBound);
44452 if (nbSamples == 0) {
44453 LOCALDISPLAYLEVEL(displayLevel, 1,
"FASTCOVER must have at least one input file\n");
44454 return ERROR(srcSize_wrong);
44459 return ERROR(dstSize_tooSmall);
44461 if (nbThreads > 1) {
44464 return ERROR(memory_allocation);
44469 memset(&coverParams, 0 ,
sizeof(coverParams));
44477 for (d = kMinD;
d <= kMaxD;
d += 2) {
44482 size_t const initVal =
FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams);
44495 for (k = kMinK; k <= kMaxK; k += kStepSize) {
44505 return ERROR(memory_allocation);
44508 data->best = &best;
44509 data->dictBufferCapacity = dictBufferCapacity;
44510 data->parameters = coverParams;
44511 data->parameters.k = k;
44512 data->parameters.d =
d;
44513 data->parameters.splitPoint = splitPoint;
44514 data->parameters.steps = kSteps;
44515 data->parameters.shrinkDict = shrinkDict;
44519 data->ctx->f, accel)) {
44533 (
unsigned)((iteration * 100) / kIterations));
44542 const size_t dictSize = best.
dictSize;
44550 memcpy(dictBuffer, best.
dict, dictSize);
44574 #define ZDICT_MAX_SAMPLES_SIZE (2000U << 20)
44575 #define ZDICT_MIN_SAMPLES_SIZE (ZDICT_CONTENTSIZE_MIN * MINRATIO)
44582 #define _FILE_OFFSET_BITS 64
44583 #if (defined(__sun__) && (!defined(__LP64__)))
44584 # ifndef _LARGEFILE_SOURCE
44585 # define _LARGEFILE_SOURCE
44587 #elif ! defined(__LP64__)
44588 # ifndef _LARGEFILE64_SOURCE
44589 # define _LARGEFILE64_SOURCE
44597 #include <stdlib.h>
44598 #include <string.h>
44602 #ifndef ZDICT_STATIC_LINKING_ONLY
44603 # define ZDICT_STATIC_LINKING_ONLY
44620 #define KB *(1 <<10)
44621 #define MB *(1 <<20)
44622 #define GB *(1U<<30)
44624 #define DICTLISTSIZE_DEFAULT 10000
44626 #define NOISELENGTH 32
44635 #define DISPLAY(...) { fprintf(stderr, __VA_ARGS__); fflush( stderr ); }
44636 #undef DISPLAYLEVEL
44637 #define DISPLAYLEVEL(l, ...) if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); }
44639 static clock_t
ZDICT_clockSpan(clock_t nPrevious) {
return clock() - nPrevious; }
44645 for (u=0; u<
length; u++) {
44647 if (c<32 || c>126) c =
'.';
44662 if (dictSize < 8)
return 0;
44674 if (!bs || !wksp) {
44675 headerSize =
ERROR(memory_allocation);
44695 static size_t ZDICT_count(
const void* pIn,
const void* pMatch)
44697 const char*
const pStart = (
const char*)pIn;
44701 pIn = (
const char*)pIn+
sizeof(
size_t);
44702 pMatch = (
const char*)pMatch+
sizeof(
size_t);
44706 return (
size_t)((
const char*)pIn - pStart);
44721 d->savings = (
U32)(-1);
44726 #define MINMATCHLENGTH 7
44729 const int* suffix,
U32 start,
44730 const void* buffer,
U32 minRatio,
U32 notificationLevel)
44735 const BYTE* b = (
const BYTE*)buffer;
44736 size_t maxLength =
LLIMIT;
44737 size_t pos = (
size_t)suffix[start];
44742 memset(&solution, 0,
sizeof(solution));
44743 doneMarks[pos] = 1;
44751 U32 u, patternEnd = 6;
44752 while (
MEM_read16(b+pos+patternEnd) == pattern16) patternEnd+=2 ;
44753 if (b[pos+patternEnd] == b[pos+patternEnd-1]) patternEnd++;
44754 for (u=1; u<patternEnd; u++)
44755 doneMarks[pos+u] = 1;
44776 if (end-start < minRatio) {
44778 for(idx=start; idx<end; idx++)
44779 doneMarks[suffix[idx]] = 1;
44786 U32 refinedEnd = end;
44793 BYTE currentChar = 0;
44794 U32 currentCount = 0;
44795 U32 currentID = refinedStart;
44797 U32 selectedCount = 0;
44798 U32 selectedID = currentID;
44799 for (
id =refinedStart;
id < refinedEnd;
id++) {
44800 if (b[suffix[
id] + mml] != currentChar) {
44801 if (currentCount > selectedCount) {
44802 selectedCount = currentCount;
44803 selectedID = currentID;
44806 currentChar = b[ suffix[id] + mml];
44811 if (currentCount > selectedCount) {
44812 selectedCount = currentCount;
44813 selectedID = currentID;
44816 if (selectedCount < minRatio)
44818 refinedStart = selectedID;
44819 refinedEnd = refinedStart + selectedCount;
44823 start = refinedStart;
44824 pos = suffix[refinedStart];
44826 memset(lengthList, 0,
sizeof(lengthList));
44849 memset(cumulLength, 0,
sizeof(cumulLength));
44850 cumulLength[maxLength-1] = lengthList[maxLength-1];
44851 for (i=(
int)(maxLength-2); i>=0; i--)
44852 cumulLength[i] = cumulLength[i+1] + lengthList[i];
44858 {
U32 l = (
U32)maxLength;
44859 BYTE const c = b[pos + maxLength-1];
44860 while (b[pos+l-2]==c) l--;
44868 savings[i] = savings[i-1] + (lengthList[i] * (i-3));
44870 DISPLAYLEVEL(4,
"Selected dict at position %u, of length %u : saves %u (ratio: %.2f) \n",
44871 (
unsigned)pos, (
unsigned)maxLength, (
unsigned)savings[maxLength], (
double)savings[maxLength] / (
double)maxLength);
44873 solution.
pos = (
U32)pos;
44875 solution.
savings = savings[maxLength];
44879 for (
id=start;
id<end;
id++) {
44881 U32 const testedPos = (
U32)suffix[
id];
44882 if (testedPos == pos)
44888 pEnd = (
U32)(testedPos + length);
44889 for (p=testedPos; p<pEnd; p++)
44897 static int isIncluded(
const void*
in,
const void* container,
size_t length)
44899 const char*
const ip = (
const char*)
in;
44900 const char*
const into = (
const char*) container;
44903 for (u=0; u<
length; u++) {
44918 const char*
const buf = (
const char*) buffer;
44921 U32 u;
for (u=1; u<tableSize; u++) {
44922 if (u==eltNbToSkip)
continue;
44923 if ((
table[u].pos > elt.
pos) && (
table[u].pos <= eltEnd)) {
44926 table[u].length += addedLength;
44939 for (u=1; u<tableSize; u++) {
44940 if (u==eltNbToSkip)
continue;
44944 int const addedLength = (
int)eltEnd - (
int)(
table[u].pos +
table[u].length);
44946 if (addedLength > 0) {
44947 table[u].length += addedLength;
44960 size_t const addedLength =
MAX( (
int)elt.
length - (
int)
table[u].length , 1 );
44979 for (u=
id; u<max-1; u++)
44994 mergeId = newMerge;
45002 if (nextElt >= maxSize) nextElt = maxSize-1;
45003 current = nextElt-1;
45008 table[current+1] = elt;
45009 table->pos = nextElt+1;
45016 U32 u, dictSize = 0;
45017 for (u=1; u<dictList[0].
pos; u++)
45018 dictSize += dictList[u].length;
45024 const void*
const buffer,
size_t bufferSize,
45025 const size_t* fileSizes,
unsigned nbFiles,
45026 unsigned minRatio,
U32 notificationLevel)
45028 int*
const suffix0 = (
int*)malloc((bufferSize+2)*
sizeof(*suffix0));
45029 int*
const suffix = suffix0+1;
45030 U32* reverseSuffix = (
U32*)malloc((bufferSize)*
sizeof(*reverseSuffix));
45031 BYTE* doneMarks = (
BYTE*)malloc((bufferSize+16)*
sizeof(*doneMarks));
45032 U32* filePos = (
U32*)malloc(nbFiles *
sizeof(*filePos));
45034 clock_t displayClock = 0;
45035 clock_t
const refreshRate = CLOCKS_PER_SEC * 3 / 10;
45037 # undef DISPLAYUPDATE
45038 # define DISPLAYUPDATE(l, ...) if (notificationLevel>=l) { \
45039 if (ZDICT_clockSpan(displayClock) > refreshRate) \
45040 { displayClock = clock(); DISPLAY(__VA_ARGS__); \
45041 if (notificationLevel>=4) fflush(stderr); } }
45045 if (!suffix0 || !reverseSuffix || !doneMarks || !filePos) {
45046 result =
ERROR(memory_allocation);
45050 memset(doneMarks, 0, bufferSize+16);
45057 DISPLAYLEVEL(2,
"sorting %u files of total size %u MB ...\n", nbFiles, (
unsigned)(bufferSize>>20));
45058 {
int const divSuftSortResult =
divsufsort((
const unsigned char*)buffer, suffix, (
int)bufferSize, 0);
45059 if (divSuftSortResult != 0) { result =
ERROR(GENERIC);
goto _cleanup; }
45061 suffix[bufferSize] = (
int)bufferSize;
45062 suffix0[0] = (
int)bufferSize;
45065 for (pos=0; pos < bufferSize; pos++)
45066 reverseSuffix[suffix[pos]] = (
U32)pos;
45070 for (pos=1; pos<nbFiles; pos++)
45071 filePos[pos] = (
U32)(filePos[pos-1] + fileSizes[pos-1]);
45077 {
U32 cursor;
for (cursor=0; cursor < bufferSize; ) {
45079 if (doneMarks[cursor]) { cursor++;
continue; }
45080 solution =
ZDICT_analyzePos(doneMarks, suffix, reverseSuffix[cursor], buffer, minRatio, notificationLevel);
45081 if (solution.
length==0) { cursor++;
continue; }
45083 cursor += solution.
length;
45084 DISPLAYUPDATE(2,
"\r%4.2f %% \r", (
double)cursor / (
double)bufferSize * 100.0);
45089 free(reverseSuffix);
45098 unsigned const prime1 = 2654435761U;
45099 unsigned const prime2 = 2246822519U;
45100 unsigned acc = prime1;
45102 for (p=0; p<length; p++) {
45104 ((
unsigned char*)buffer)[p] = (
unsigned char)(acc >> 21);
45116 #define MAXREPOFFSET 1024
45119 unsigned* countLit,
unsigned* offsetcodeCount,
unsigned* matchlengthCount,
unsigned* litlengthCount,
U32* repOffsets,
45120 const void* src,
size_t srcSize,
45121 U32 notificationLevel)
45138 {
const BYTE* bytePtr;
45139 for(bytePtr = seqStorePtr->
litStart; bytePtr < seqStorePtr->lit; bytePtr++)
45140 countLit[*bytePtr]++;
45147 {
const BYTE* codePtr = seqStorePtr->
ofCode;
45149 for (u=0; u<nbSeq; u++) offsetcodeCount[codePtr[u]]++;
45152 {
const BYTE* codePtr = seqStorePtr->
mlCode;
45154 for (u=0; u<nbSeq; u++) matchlengthCount[codePtr[u]]++;
45157 {
const BYTE* codePtr = seqStorePtr->
llCode;
45159 for (u=0; u<nbSeq; u++) litlengthCount[codePtr[u]]++;
45168 repOffsets[offset1] += 3;
45169 repOffsets[offset2] += 1;
45177 for (u=0; u<nbFiles; u++) total += fileSizes[u];
45204 for (u=1; u<256; u++) countLit[u] = 2;
45210 #define OFFCODE_MAX 30
45213 const void* srcBuffer,
const size_t* fileSizes,
unsigned nbFiles,
45214 const void* dictBuffer,
size_t dictBufferSize,
45215 unsigned notificationLevel)
45217 unsigned countLit[256];
45222 unsigned matchLengthCount[
MaxML+1];
45223 short matchLengthNCount[
MaxML+1];
45224 unsigned litLengthCount[
MaxLL+1];
45225 short litLengthNCount[
MaxLL+1];
45229 ZSTD_parameters params;
45231 size_t pos = 0, errorCode;
45234 size_t const averageSampleSize = totalSrcSize / (nbFiles + !nbFiles);
45239 DEBUGLOG(4,
"ZDICT_analyzeEntropy");
45240 if (offcodeMax>
OFFCODE_MAX) { eSize =
ERROR(dictionaryCreation_failed);
goto _cleanup; }
45241 for (u=0; u<256; u++) countLit[u] = 1;
45242 for (u=0; u<=offcodeMax; u++) offcodeCount[u] = 1;
45243 for (u=0; u<=
MaxML; u++) matchLengthCount[u] = 1;
45244 for (u=0; u<=
MaxLL; u++) litLengthCount[u] = 1;
45245 memset(repOffset, 0,
sizeof(repOffset));
45246 repOffset[1] = repOffset[4] = repOffset[8] = 1;
45247 memset(bestRepOffset, 0,
sizeof(bestRepOffset));
45255 eSize =
ERROR(memory_allocation);
45261 for (u=0; u<nbFiles; u++) {
45263 countLit, offcodeCount, matchLengthCount, litLengthCount, repOffset,
45264 (
const char*)srcBuffer + pos, fileSizes[u],
45265 notificationLevel);
45266 pos += fileSizes[u];
45269 if (notificationLevel >= 4) {
45272 for (u=0; u<=offcodeMax; u++) {
45277 {
size_t maxNbBits =
HUF_buildCTable_wksp(hufTable, countLit, 255, huffLog, wksp,
sizeof(wksp));
45283 if (maxNbBits==8) {
45284 DISPLAYLEVEL(2,
"warning : pathological dataset : literals are not compressible : samples are noisy or too regular \n");
45289 huffLog = (
U32)maxNbBits;
45299 total=0;
for (u=0; u<=offcodeMax; u++) total+=offcodeCount[u];
45300 errorCode =
FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax, 1);
45303 DISPLAYLEVEL(1,
"FSE_normalizeCount error with offcodeCount \n");
45306 Offlog = (
U32)errorCode;
45308 total=0;
for (u=0; u<=
MaxML; u++) total+=matchLengthCount[u];
45312 DISPLAYLEVEL(1,
"FSE_normalizeCount error with matchLengthCount \n");
45315 mlLog = (
U32)errorCode;
45317 total=0;
for (u=0; u<=
MaxLL; u++) total+=litLengthCount[u];
45321 DISPLAYLEVEL(1,
"FSE_normalizeCount error with litLengthCount \n");
45324 llLog = (
U32)errorCode;
45341 DISPLAYLEVEL(1,
"FSE_writeNCount error with offcodeNCount \n");
45352 DISPLAYLEVEL(1,
"FSE_writeNCount error with matchLengthNCount \n");
45363 DISPLAYLEVEL(1,
"FSE_writeNCount error with litlengthNCount \n");
45372 eSize =
ERROR(dstSize_tooSmall);
45373 DISPLAYLEVEL(1,
"not enough space to write RepOffsets \n");
45403 U32 maxRep = reps[0];
45406 maxRep =
MAX(maxRep, reps[r]);
45411 const void* customDictContent,
size_t dictContentSize,
45412 const void* samplesBuffer,
const size_t* samplesSizes,
45416 #define HBUFFSIZE 256
45422 size_t paddingSize;
45425 DEBUGLOG(4,
"ZDICT_finalizeDictionary");
45426 if (dictBufferCapacity < dictContentSize)
return ERROR(dstSize_tooSmall);
45431 {
U64 const randomID =
XXH64(customDictContent, dictContentSize, 0);
45432 U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
45443 samplesBuffer, samplesSizes, nbSamples,
45444 customDictContent, dictContentSize,
45445 notificationLevel);
45451 if (hSize + dictContentSize > dictBufferCapacity) {
45452 dictContentSize = dictBufferCapacity - hSize;
45456 if (dictContentSize < minContentSize) {
45457 RETURN_ERROR_IF(hSize + minContentSize > dictBufferCapacity, dstSize_tooSmall,
45458 "dictBufferCapacity too small to fit max repcode");
45459 paddingSize = minContentSize - dictContentSize;
45465 size_t const dictSize = hSize + paddingSize + dictContentSize;
45471 BYTE*
const outDictHeader = (
BYTE*)dictBuffer;
45472 BYTE*
const outDictPadding = outDictHeader + hSize;
45473 BYTE*
const outDictContent = outDictPadding + paddingSize;
45475 assert(dictSize <= dictBufferCapacity);
45476 assert(outDictContent + dictContentSize == (
BYTE*)dictBuffer + dictSize);
45483 memmove(outDictContent, customDictContent, dictContentSize);
45484 memcpy(outDictHeader, header, hSize);
45485 memset(outDictPadding, 0, paddingSize);
45493 void* dictBuffer,
size_t dictContentSize,
size_t dictBufferCapacity,
45494 const void* samplesBuffer,
const size_t* samplesSizes,
unsigned nbSamples,
45504 {
size_t const eSize =
ZDICT_analyzeEntropy((
char*)dictBuffer+hSize, dictBufferCapacity-hSize,
45506 samplesBuffer, samplesSizes, nbSamples,
45507 (
char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize,
45508 notificationLevel);
45515 {
U64 const randomID =
XXH64((
char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, 0);
45516 U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
45521 if (hSize + dictContentSize < dictBufferCapacity)
45522 memmove((
char*)dictBuffer + hSize, (
char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize);
45523 return MIN(dictBufferCapacity, hSize+dictContentSize);
45531 void* dictBuffer,
size_t maxDictSize,
45532 const void* samplesBuffer,
const size_t* samplesSizes,
unsigned nbSamples,
45536 dictItem*
const dictList = (
dictItem*)malloc(dictListSize *
sizeof(*dictList));
45538 unsigned const minRep = (selectivity > 30) ?
MINRATIO : nbSamples >> selectivity;
45539 size_t const targetDictSize = maxDictSize;
45541 size_t dictSize = 0;
45545 if (!dictList)
return ERROR(memory_allocation);
45554 samplesBuffer, samplesBuffSize,
45555 samplesSizes, nbSamples,
45556 minRep, notificationLevel);
45560 unsigned const nb =
MIN(25, dictList[0].pos);
45563 DISPLAYLEVEL(3,
"\n %u segments found, of total size %u \n", (
unsigned)dictList[0].pos-1, dictContentSize);
45565 for (u=1; u<nb; u++) {
45566 unsigned const pos = dictList[u].
pos;
45568 U32 const printedLength =
MIN(40, length);
45569 if ((pos > samplesBuffSize) || ((pos + length) > samplesBuffSize)) {
45571 return ERROR(GENERIC);
45573 DISPLAYLEVEL(3,
"%3u:%3u bytes at pos %8u, savings %7u bytes |",
45574 u, length, pos, (
unsigned)dictList[u].savings);
45583 if (dictContentSize < targetDictSize/4) {
45584 DISPLAYLEVEL(2,
"! warning : selected content significantly smaller than requested (%u < %u) \n", dictContentSize, (
unsigned)maxDictSize);
45585 if (samplesBuffSize < 10 * targetDictSize)
45586 DISPLAYLEVEL(2,
"! consider increasing the number of samples (total size : %u MB)\n", (
unsigned)(samplesBuffSize>>20));
45588 DISPLAYLEVEL(2,
"! consider increasing selectivity to produce larger dictionary (-s%u) \n", selectivity+1);
45589 DISPLAYLEVEL(2,
"! note : larger dictionaries are not necessarily better, test its efficiency on samples \n");
45593 if ((dictContentSize > targetDictSize*3) && (nbSamples > 2*
MINRATIO) && (selectivity>1)) {
45594 unsigned proposedSelectivity = selectivity-1;
45595 while ((nbSamples >> proposedSelectivity) <=
MINRATIO) { proposedSelectivity--; }
45596 DISPLAYLEVEL(2,
"! note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (
unsigned)maxDictSize);
45597 DISPLAYLEVEL(2,
"! consider increasing dictionary size, or produce denser dictionary (-s%u) \n", proposedSelectivity);
45598 DISPLAYLEVEL(2,
"! always test dictionary efficiency on real samples \n");
45602 {
U32 const max = dictList->
pos;
45603 U32 currentSize = 0;
45604 U32 n;
for (n=1; n<max; n++) {
45605 currentSize += dictList[n].
length;
45606 if (currentSize > targetDictSize) { currentSize -= dictList[n].
length;
break; }
45609 dictContentSize = currentSize;
45615 for (u=1; u<dictList->
pos; u++) {
45618 if (
ptr<(
BYTE*)dictBuffer) { free(dictList);
return ERROR(GENERIC); }
45619 memcpy(
ptr, (
const char*)samplesBuffer+dictList[u].pos, l);
45623 samplesBuffer, samplesSizes, nbSamples,
45637 const void* samplesBuffer,
const size_t* samplesSizes,
unsigned nbSamples,
45646 if (!newBuff)
return ERROR(memory_allocation);
45648 memcpy(newBuff, samplesBuffer, sBuffSize);
45653 samplesSizes, nbSamples, params);
45660 const void* samplesBuffer,
const size_t* samplesSizes,
unsigned nbSamples)
45663 DEBUGLOG(3,
"ZDICT_trainFromBuffer");
45664 memset(¶ms, 0,
sizeof(params));
45669 #if defined(DEBUGLEVEL) && (DEBUGLEVEL>=1)
45673 samplesBuffer, samplesSizes, nbSamples,
45678 const void* samplesBuffer,
const size_t* samplesSizes,
unsigned nbSamples)
45681 memset(¶ms, 0,
sizeof(params));
45683 samplesBuffer, samplesSizes, nbSamples,