zstd: Update to 1.5.7

Adds supports for x86_64 assembly for Windows/MinGW builds.
2025-03-04 20:52:56 +01:00
parent 5904f4f2d4
commit 6b09f6e6d9
50 changed files with 2737 additions and 1637 deletions
--- a/0001-zstd-Enable-x86_64-assembly-for-Windows-MinGW.patch
+++ b/0001-zstd-Enable-x86_64-assembly-for-Windows-MinGW.patch
@ -1,119 +0,0 @@
-From 76e9d2f867467a567cec330e19f25c763e9129a0 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?R=C3=A9mi=20Verschelde?= <rverschelde@gmail.com>
-Date: Thu, 5 Jun 2025 14:10:03 +0200
-Subject: [PATCH] zstd: Enable x86_64 assembly for Windows/MinGW
-
-Fix SCons setup to compile .S files with preprocessor support with MinGW.
---
- core/SCsub                                          | 10 ++++++----
- platform/windows/detect.py                          |  4 +++-
- thirdparty/README.md                                |  4 ----
- thirdparty/zstd/common/portability_macros.h         |  2 +-
- .../zstd/patches/0001-windows-turn-off-asm.patch    | 13 -------------
- 5 files changed, 10 insertions(+), 23 deletions(-)
- delete mode 100644 thirdparty/zstd/patches/0001-windows-turn-off-asm.patch
-
-diff --git a/core/SCsub b/core/SCsub
-index 5820b78b12..b4c4a691a2 100644
--- a/core/SCsub
-+++ b/core/SCsub
-@@ -111,9 +111,7 @@ thirdparty_minizip_sources = ["ioapi.c", "unzip.c", "zip.c"]
- thirdparty_minizip_sources = [thirdparty_minizip_dir + file for file in thirdparty_minizip_sources]
- env_thirdparty.add_source_files(thirdparty_obj, thirdparty_minizip_sources)
- 
-# Zstd library, can be unbundled in theory
-# though we currently use some private symbols
-# https://github.com/godotengine/godot/issues/17374
-+# Zstd library, can be unbundled
- if env["builtin_zstd"]:
-     thirdparty_zstd_dir = "#thirdparty/zstd/"
-     thirdparty_zstd_sources = [
-@@ -144,7 +142,11 @@ if env["builtin_zstd"]:
-         "decompress/zstd_decompress_block.c",
-         "decompress/zstd_decompress.c",
-     ]
-    if env["platform"] in ["android", "ios", "linuxbsd", "macos"] and env["arch"] == "x86_64":
-+    if (
-+        env["platform"] in ["android", "ios", "linuxbsd", "macos", "windows"]
-+        and env["arch"] == "x86_64"
-+        and not env.msvc
-+    ):
-         # Match platforms with ZSTD_ASM_SUPPORTED in common/portability_macros.h
-         thirdparty_zstd_sources.append("decompress/huf_decompress_amd64.S")
-     thirdparty_zstd_sources = [thirdparty_zstd_dir + file for file in thirdparty_zstd_sources]
-diff --git a/platform/windows/detect.py b/platform/windows/detect.py
-index 3e808ac8a9..9a5ee8e467 100644
--- a/platform/windows/detect.py
-+++ b/platform/windows/detect.py
-@@ -707,6 +707,7 @@ def configure_mingw(env: "SConsEnvironment"):
-         env["CXX"] = get_detected(env, "clang++")
-         env["AR"] = get_detected(env, "ar")
-         env["RANLIB"] = get_detected(env, "ranlib")
-+        env["AS"] = get_detected(env, "clang")
-         env.Append(ASFLAGS=["-c"])
-         env.extra_suffix = ".llvm" + env.extra_suffix
-     else:
-@@ -714,6 +715,8 @@ def configure_mingw(env: "SConsEnvironment"):
-         env["CXX"] = get_detected(env, "g++")
-         env["AR"] = get_detected(env, "gcc-ar" if os.name != "nt" else "ar")
-         env["RANLIB"] = get_detected(env, "gcc-ranlib")
-+        env["AS"] = get_detected(env, "gcc")
-+        env.Append(ASFLAGS=["-c"])
- 
-     env["RC"] = get_detected(env, "windres")
-     ARCH_TARGETS = {
-@@ -724,7 +727,6 @@ def configure_mingw(env: "SConsEnvironment"):
-     }
-     env.AppendUnique(RCFLAGS=f"--target={ARCH_TARGETS[env['arch']]}")
- 
-    env["AS"] = get_detected(env, "as")
-     env["OBJCOPY"] = get_detected(env, "objcopy")
-     env["STRIP"] = get_detected(env, "strip")
- 
-diff --git a/thirdparty/README.md b/thirdparty/README.md
-index 988349b137..f5730f30bd 100644
--- a/thirdparty/README.md
-+++ b/thirdparty/README.md
-@@ -1182,7 +1182,3 @@ Files extracted from upstream source:
- 
- - `lib/{common/,compress/,decompress/,zstd.h,zstd_errors.h}`
- - `LICENSE`
-
-Patches:
-
-- `0001-windows-turn-off-asm.patch` (GH-103596)
-diff --git a/thirdparty/zstd/common/portability_macros.h b/thirdparty/zstd/common/portability_macros.h
-index b2c028ccf1..860734141d 100644
--- a/thirdparty/zstd/common/portability_macros.h
-+++ b/thirdparty/zstd/common/portability_macros.h
-@@ -114,7 +114,7 @@
-  * 100% of code to be instrumented to work.
-  */
- #if defined(__GNUC__)
-#  if defined(__linux__) || defined(__linux) || defined(__APPLE__)
-+#  if defined(__linux__) || defined(__linux) || defined(__APPLE__) || defined(_WIN32)
- #    if ZSTD_MEMORY_SANITIZER
- #      define ZSTD_ASM_SUPPORTED 0
- #    elif ZSTD_DATAFLOW_SANITIZER
-diff --git a/thirdparty/zstd/patches/0001-windows-turn-off-asm.patch b/thirdparty/zstd/patches/0001-windows-turn-off-asm.patch
-deleted file mode 100644
-index 4765a17690..0000000000
--- a/thirdparty/zstd/patches/0001-windows-turn-off-asm.patch
-+++ /dev/null
-@@ -1,13 +0,0 @@
-diff --git a/thirdparty/zstd/common/portability_macros.h b/thirdparty/zstd/common/portability_macros.h
-index 860734141d..b2c028ccf1 100644
---- a/thirdparty/zstd/common/portability_macros.h
-+++ b/thirdparty/zstd/common/portability_macros.h
-@@ -114,7 +114,7 @@
-  * 100% of code to be instrumented to work.
-  */
- #if defined(__GNUC__)
--#  if defined(__linux__) || defined(__linux) || defined(__APPLE__) || defined(_WIN32)
-+#  if defined(__linux__) || defined(__linux) || defined(__APPLE__)
- #    if ZSTD_MEMORY_SANITIZER
- #      define ZSTD_ASM_SUPPORTED 0
- #    elif ZSTD_DATAFLOW_SANITIZER
-- 
-2.49.0
-
--- a/core/SCsub
+++ b/core/SCsub
@ -111,9 +111,7 @@ thirdparty_minizip_sources = ["ioapi.c", "unzip.c", "zip.c"]
 thirdparty_minizip_sources = [thirdparty_minizip_dir + file for file in thirdparty_minizip_sources]
 env_thirdparty.add_source_files(thirdparty_obj, thirdparty_minizip_sources)

-# Zstd library, can be unbundled in theory
-# though we currently use some private symbols
-# https://github.com/godotengine/godot/issues/17374
+# Zstd library, can be unbundled
 if env["builtin_zstd"]:
    thirdparty_zstd_dir = "#thirdparty/zstd/"
    thirdparty_zstd_sources = [
@ -134,6 +132,7 @@ if env["builtin_zstd"]:
        "compress/zstd_lazy.c",
        "compress/zstd_ldm.c",
        "compress/zstd_opt.c",
+        "compress/zstd_preSplit.c",
        "compress/zstdmt_compress.c",
        "compress/zstd_compress_literals.c",
        "compress/zstd_compress_sequences.c",
@ -143,7 +142,11 @@ if env["builtin_zstd"]:
        "decompress/zstd_decompress_block.c",
        "decompress/zstd_decompress.c",
    ]
-    if env["platform"] in ["android", "ios", "linuxbsd", "macos"] and env["arch"] == "x86_64":
+    if (
+        env["platform"] in ["android", "ios", "linuxbsd", "macos", "windows"]
+        and env["arch"] == "x86_64"
+        and not env.msvc
+    ):
        # Match platforms with ZSTD_ASM_SUPPORTED in common/portability_macros.h
        thirdparty_zstd_sources.append("decompress/huf_decompress_amd64.S")
    thirdparty_zstd_sources = [thirdparty_zstd_dir + file for file in thirdparty_zstd_sources]
--- a/thirdparty/README.md
+++ b/thirdparty/README.md
@ -1175,7 +1175,7 @@ Files extracted from upstream source:
 ## zstd

 - Upstream: https://github.com/facebook/zstd
- Version: 1.5.6 (794ea1b0afca0f020f4e57b6732332231fb23c70, 2024)
+- Version: 1.5.7 (f8745da6ff1ad1e7bab384bd1f9d742439278e99, 2025)
 - License: BSD-3-Clause

 Files extracted from upstream source:
--- a/thirdparty/zstd/common/bits.h
+++ b/thirdparty/zstd/common/bits.h
@ -28,27 +28,29 @@ MEM_STATIC unsigned ZSTD_countTrailingZeros32_fallback(U32 val)
 MEM_STATIC unsigned ZSTD_countTrailingZeros32(U32 val)
 {
    assert(val != 0);
-#   if defined(_MSC_VER)
-#       if STATIC_BMI2 == 1
-            return (unsigned)_tzcnt_u32(val);
-#       else
-            if (val != 0) {
-                unsigned long r;
-                _BitScanForward(&r, val);
-                return (unsigned)r;
-            } else {
-                /* Should not reach this code path */
-                __assume(0);
-            }
-#       endif
-#   elif defined(__GNUC__) && (__GNUC__ >= 4)
-        return (unsigned)__builtin_ctz(val);
-#   else
-        return ZSTD_countTrailingZeros32_fallback(val);
-#   endif
+#if defined(_MSC_VER)
+#  if STATIC_BMI2
+    return (unsigned)_tzcnt_u32(val);
+#  else
+    if (val != 0) {
+        unsigned long r;
+        _BitScanForward(&r, val);
+        return (unsigned)r;
+    } else {
+        __assume(0); /* Should not reach this code path */
+    }
+#  endif
+#elif defined(__GNUC__) && (__GNUC__ >= 4)
+    return (unsigned)__builtin_ctz(val);
+#elif defined(__ICCARM__)
+    return (unsigned)__builtin_ctz(val);
+#else
+    return ZSTD_countTrailingZeros32_fallback(val);
+#endif
 }

-MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val) {
+MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val)
+{
    assert(val != 0);
    {
        static const U32 DeBruijnClz[32] = {0, 9, 1, 10, 13, 21, 2, 29,
@ -67,86 +69,89 @@ MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val) {
 MEM_STATIC unsigned ZSTD_countLeadingZeros32(U32 val)
 {
    assert(val != 0);
-#   if defined(_MSC_VER)
-#       if STATIC_BMI2 == 1
-            return (unsigned)_lzcnt_u32(val);
-#       else
-            if (val != 0) {
-                unsigned long r;
-                _BitScanReverse(&r, val);
-                return (unsigned)(31 - r);
-            } else {
-                /* Should not reach this code path */
-                __assume(0);
-            }
-#       endif
-#   elif defined(__GNUC__) && (__GNUC__ >= 4)
-        return (unsigned)__builtin_clz(val);
-#   else
-        return ZSTD_countLeadingZeros32_fallback(val);
-#   endif
+#if defined(_MSC_VER)
+#  if STATIC_BMI2
+    return (unsigned)_lzcnt_u32(val);
+#  else
+    if (val != 0) {
+        unsigned long r;
+        _BitScanReverse(&r, val);
+        return (unsigned)(31 - r);
+    } else {
+        __assume(0); /* Should not reach this code path */
+    }
+#  endif
+#elif defined(__GNUC__) && (__GNUC__ >= 4)
+    return (unsigned)__builtin_clz(val);
+#elif defined(__ICCARM__)
+    return (unsigned)__builtin_clz(val);
+#else
+    return ZSTD_countLeadingZeros32_fallback(val);
+#endif
 }

 MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val)
 {
    assert(val != 0);
-#   if defined(_MSC_VER) && defined(_WIN64)
-#       if STATIC_BMI2 == 1
-            return (unsigned)_tzcnt_u64(val);
-#       else
-            if (val != 0) {
-                unsigned long r;
-                _BitScanForward64(&r, val);
-                return (unsigned)r;
-            } else {
-                /* Should not reach this code path */
-                __assume(0);
-            }
-#       endif
-#   elif defined(__GNUC__) && (__GNUC__ >= 4) && defined(__LP64__)
-        return (unsigned)__builtin_ctzll(val);
-#   else
-        {
-            U32 mostSignificantWord = (U32)(val >> 32);
-            U32 leastSignificantWord = (U32)val;
-            if (leastSignificantWord == 0) {
-                return 32 + ZSTD_countTrailingZeros32(mostSignificantWord);
-            } else {
-                return ZSTD_countTrailingZeros32(leastSignificantWord);
-            }
+#if defined(_MSC_VER) && defined(_WIN64)
+#  if STATIC_BMI2
+    return (unsigned)_tzcnt_u64(val);
+#  else
+    if (val != 0) {
+        unsigned long r;
+        _BitScanForward64(&r, val);
+        return (unsigned)r;
+    } else {
+        __assume(0); /* Should not reach this code path */
+    }
+#  endif
+#elif defined(__GNUC__) && (__GNUC__ >= 4) && defined(__LP64__)
+    return (unsigned)__builtin_ctzll(val);
+#elif defined(__ICCARM__)
+    return (unsigned)__builtin_ctzll(val);
+#else
+    {
+        U32 mostSignificantWord = (U32)(val >> 32);
+        U32 leastSignificantWord = (U32)val;
+        if (leastSignificantWord == 0) {
+            return 32 + ZSTD_countTrailingZeros32(mostSignificantWord);
+        } else {
+            return ZSTD_countTrailingZeros32(leastSignificantWord);
        }
-#   endif
+    }
+#endif
 }

 MEM_STATIC unsigned ZSTD_countLeadingZeros64(U64 val)
 {
    assert(val != 0);
-#   if defined(_MSC_VER) && defined(_WIN64)
-#       if STATIC_BMI2 == 1
-            return (unsigned)_lzcnt_u64(val);
-#       else
-            if (val != 0) {
-                unsigned long r;
-                _BitScanReverse64(&r, val);
-                return (unsigned)(63 - r);
-            } else {
-                /* Should not reach this code path */
-                __assume(0);
-            }
-#       endif
-#   elif defined(__GNUC__) && (__GNUC__ >= 4)
-        return (unsigned)(__builtin_clzll(val));
-#   else
-        {
-            U32 mostSignificantWord = (U32)(val >> 32);
-            U32 leastSignificantWord = (U32)val;
-            if (mostSignificantWord == 0) {
-                return 32 + ZSTD_countLeadingZeros32(leastSignificantWord);
-            } else {
-                return ZSTD_countLeadingZeros32(mostSignificantWord);
-            }
+#if defined(_MSC_VER) && defined(_WIN64)
+#  if STATIC_BMI2
+    return (unsigned)_lzcnt_u64(val);
+#  else
+    if (val != 0) {
+        unsigned long r;
+        _BitScanReverse64(&r, val);
+        return (unsigned)(63 - r);
+    } else {
+        __assume(0); /* Should not reach this code path */
+    }
+#  endif
+#elif defined(__GNUC__) && (__GNUC__ >= 4)
+    return (unsigned)(__builtin_clzll(val));
+#elif defined(__ICCARM__)
+    return (unsigned)(__builtin_clzll(val));
+#else
+    {
+        U32 mostSignificantWord = (U32)(val >> 32);
+        U32 leastSignificantWord = (U32)val;
+        if (mostSignificantWord == 0) {
+            return 32 + ZSTD_countLeadingZeros32(leastSignificantWord);
+        } else {
+            return ZSTD_countLeadingZeros32(mostSignificantWord);
        }
-#   endif
+    }
+#endif
 }

 MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val)
--- a/thirdparty/zstd/common/bitstream.h
+++ b/thirdparty/zstd/common/bitstream.h
@ -14,9 +14,6 @@
 #ifndef BITSTREAM_H_MODULE
 #define BITSTREAM_H_MODULE

-#if defined (__cplusplus)
-extern "C" {
-#endif
 /*
 *  This API consists of small unitary functions, which must be inlined for best performance.
 *  Since link-time-optimization is not available for all compilers,
@ -32,7 +29,6 @@ extern "C" {
 #include "error_private.h"  /* error codes and messages */
 #include "bits.h"           /* ZSTD_highbit32 */

-
 /*=========================================
 *  Target specific
 =========================================*/
@ -52,12 +48,13 @@ extern "C" {
 /*-******************************************
 *  bitStream encoding API (write forward)
 ********************************************/
+typedef size_t BitContainerType;
 /* bitStream can mix input from multiple sources.
 * A critical property of these streams is that they encode and decode in **reverse** direction.
 * So the first bit sequence you add will be the last to be read, like a LIFO stack.
 */
 typedef struct {
-    size_t bitContainer;
+    BitContainerType bitContainer;
    unsigned bitPos;
    char*  startPtr;
    char*  ptr;
@ -65,7 +62,7 @@ typedef struct {
 } BIT_CStream_t;

 MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);
-MEM_STATIC void   BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
+MEM_STATIC void   BIT_addBits(BIT_CStream_t* bitC, BitContainerType value, unsigned nbBits);
 MEM_STATIC void   BIT_flushBits(BIT_CStream_t* bitC);
 MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);

@ -74,7 +71,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
 *  `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
 *
 *  bits are first added to a local register.
-*  Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
+*  Local register is BitContainerType, 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
 *  Writing data into memory is an explicit operation, performed by the flushBits function.
 *  Hence keep track how many bits are potentially stored into local register to avoid register overflow.
 *  After a flushBits, a maximum of 7 bits might still be stored into local register.
@ -90,7 +87,6 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
 /*-********************************************
 *  bitStream decoding API (read backward)
 **********************************************/
-typedef size_t BitContainerType;
 typedef struct {
    BitContainerType bitContainer;
    unsigned bitsConsumed;
@ -106,7 +102,7 @@ typedef enum { BIT_DStream_unfinished = 0,  /* fully refilled */
    } BIT_DStream_status;  /* result of BIT_reloadDStream() */

 MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
-MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
+MEM_STATIC BitContainerType BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
 MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
 MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);

@ -125,7 +121,7 @@ MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
 /*-****************************************
 *  unsafe API
 ******************************************/
-MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
+MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, BitContainerType value, unsigned nbBits);
 /* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */

 MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
@ -163,10 +159,15 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
    return 0;
 }

-FORCE_INLINE_TEMPLATE size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
+FORCE_INLINE_TEMPLATE BitContainerType BIT_getLowerBits(BitContainerType bitContainer, U32 const nbBits)
 {
-#if defined(STATIC_BMI2) && STATIC_BMI2 == 1 && !defined(ZSTD_NO_INTRINSICS)
-    return  _bzhi_u64(bitContainer, nbBits);
+#if STATIC_BMI2 && !defined(ZSTD_NO_INTRINSICS)
+#  if (defined(__x86_64__) || defined(_M_X64)) && !defined(__ILP32__)
+    return _bzhi_u64(bitContainer, nbBits);
+#  else
+    DEBUG_STATIC_ASSERT(sizeof(bitContainer) == sizeof(U32));
+    return _bzhi_u32(bitContainer, nbBits);
+#  endif
 #else
    assert(nbBits < BIT_MASK_SIZE);
    return bitContainer & BIT_mask[nbBits];
@ -177,7 +178,7 @@ FORCE_INLINE_TEMPLATE size_t BIT_getLowerBits(size_t bitContainer, U32 const nbB
 *  can add up to 31 bits into `bitC`.
 *  Note : does not check for register overflow ! */
 MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
-                            size_t value, unsigned nbBits)
+                            BitContainerType value, unsigned nbBits)
 {
    DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
    assert(nbBits < BIT_MASK_SIZE);
@ -190,7 +191,7 @@ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
 *  works only if `value` is _clean_,
 *  meaning all high bits above nbBits are 0 */
 MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
-                                size_t value, unsigned nbBits)
+                                BitContainerType value, unsigned nbBits)
 {
    assert((value>>nbBits) == 0);
    assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
@ -237,7 +238,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
    BIT_addBitsFast(bitC, 1, 1);   /* endMark */
    BIT_flushBits(bitC);
    if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
-    return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
+    return (size_t)(bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
 }


@ -298,12 +299,12 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
    return srcSize;
 }

-FORCE_INLINE_TEMPLATE size_t BIT_getUpperBits(BitContainerType bitContainer, U32 const start)
+FORCE_INLINE_TEMPLATE BitContainerType BIT_getUpperBits(BitContainerType bitContainer, U32 const start)
 {
    return bitContainer >> start;
 }

-FORCE_INLINE_TEMPLATE size_t BIT_getMiddleBits(BitContainerType bitContainer, U32 const start, U32 const nbBits)
+FORCE_INLINE_TEMPLATE BitContainerType BIT_getMiddleBits(BitContainerType bitContainer, U32 const start, U32 const nbBits)
 {
    U32 const regMask = sizeof(bitContainer)*8 - 1;
    /* if start > regMask, bitstream is corrupted, and result is undefined */
@ -313,7 +314,7 @@ FORCE_INLINE_TEMPLATE size_t BIT_getMiddleBits(BitContainerType bitContainer, U3
     * such cpus old (pre-Haswell, 2013) and their performance is not of that
     * importance.
     */
-#if defined(__x86_64__) || defined(_M_X86)
+#if defined(__x86_64__) || defined(_M_X64)
    return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1);
 #else
    return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
@ -326,7 +327,7 @@ FORCE_INLINE_TEMPLATE size_t BIT_getMiddleBits(BitContainerType bitContainer, U3
 *  On 32-bits, maxNbBits==24.
 *  On 64-bits, maxNbBits==56.
 * @return : value extracted */
-FORCE_INLINE_TEMPLATE size_t BIT_lookBits(const BIT_DStream_t*  bitD, U32 nbBits)
+FORCE_INLINE_TEMPLATE BitContainerType BIT_lookBits(const BIT_DStream_t*  bitD, U32 nbBits)
 {
    /* arbitrate between double-shift and shift+mask */
 #if 1
@ -342,7 +343,7 @@ FORCE_INLINE_TEMPLATE size_t BIT_lookBits(const BIT_DStream_t*  bitD, U32 nbBits

 /*! BIT_lookBitsFast() :
 *  unsafe version; only works if nbBits >= 1 */
-MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
+MEM_STATIC BitContainerType BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
 {
    U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
    assert(nbBits >= 1);
@ -358,18 +359,18 @@ FORCE_INLINE_TEMPLATE void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
 *  Read (consume) next n bits from local register and update.
 *  Pay attention to not read more than nbBits contained into local register.
 * @return : extracted value. */
-FORCE_INLINE_TEMPLATE size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
+FORCE_INLINE_TEMPLATE BitContainerType BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
 {
-    size_t const value = BIT_lookBits(bitD, nbBits);
+    BitContainerType const value = BIT_lookBits(bitD, nbBits);
    BIT_skipBits(bitD, nbBits);
    return value;
 }

 /*! BIT_readBitsFast() :
 *  unsafe version; only works if nbBits >= 1 */
-MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
+MEM_STATIC BitContainerType BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
 {
-    size_t const value = BIT_lookBitsFast(bitD, nbBits);
+    BitContainerType const value = BIT_lookBitsFast(bitD, nbBits);
    assert(nbBits >= 1);
    BIT_skipBits(bitD, nbBits);
    return value;
@ -450,8 +451,4 @@ MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
    return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
 }

-#if defined (__cplusplus)
-}
-#endif
-
 #endif /* BITSTREAM_H_MODULE */
--- a/thirdparty/zstd/common/compiler.h
+++ b/thirdparty/zstd/common/compiler.h
@ -27,7 +27,7 @@
 #  define INLINE_KEYWORD
 #endif

-#if defined(__GNUC__) || defined(__ICCARM__)
+#if defined(__GNUC__) || defined(__IAR_SYSTEMS_ICC__)
 #  define FORCE_INLINE_ATTR __attribute__((always_inline))
 #elif defined(_MSC_VER)
 #  define FORCE_INLINE_ATTR __forceinline
@ -54,7 +54,7 @@
 #endif

 /* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
-#if defined(__GNUC__)
+#if defined(__GNUC__) || defined(__IAR_SYSTEMS_ICC__)
 #  define UNUSED_ATTR __attribute__((unused))
 #else
 #  define UNUSED_ATTR
@ -95,6 +95,8 @@
 #ifndef MEM_STATIC  /* already defined in Linux Kernel mem.h */
 #if defined(__GNUC__)
 #  define MEM_STATIC static __inline UNUSED_ATTR
+#elif defined(__IAR_SYSTEMS_ICC__)
+#  define MEM_STATIC static inline UNUSED_ATTR
 #elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
 #  define MEM_STATIC static inline
 #elif defined(_MSC_VER)
@ -108,7 +110,7 @@
 #ifdef _MSC_VER
 #  define FORCE_NOINLINE static __declspec(noinline)
 #else
-#  if defined(__GNUC__) || defined(__ICCARM__)
+#  if defined(__GNUC__) || defined(__IAR_SYSTEMS_ICC__)
 #    define FORCE_NOINLINE static __attribute__((__noinline__))
 #  else
 #    define FORCE_NOINLINE static
@ -117,7 +119,7 @@


 /* target attribute */
-#if defined(__GNUC__) || defined(__ICCARM__)
+#if defined(__GNUC__) || defined(__IAR_SYSTEMS_ICC__)
 #  define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
 #else
 #  define TARGET_ATTRIBUTE(target)
@ -205,30 +207,21 @@
 #  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
 #endif

-/*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/
-#ifndef STATIC_BMI2
-#  if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86))
-#    ifdef __AVX2__  //MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2
-#       define STATIC_BMI2 1
-#    endif
-#  elif defined(__BMI2__) && defined(__x86_64__) && defined(__GNUC__)
-#    define STATIC_BMI2 1
-#  endif
-#endif
-
-#ifndef STATIC_BMI2
-    #define STATIC_BMI2 0
-#endif
-
 /* compile time determination of SIMD support */
 #if !defined(ZSTD_NO_INTRINSICS)
-#  if defined(__SSE2__) || defined(_M_AMD64) || (defined (_M_IX86) && defined(_M_IX86_FP) && (_M_IX86_FP >= 2))
+#  if defined(__AVX2__)
+#    define ZSTD_ARCH_X86_AVX2
+#  endif
+#  if defined(__SSE2__) || defined(_M_X64) || (defined (_M_IX86) && defined(_M_IX86_FP) && (_M_IX86_FP >= 2))
 #    define ZSTD_ARCH_X86_SSE2
 #  endif
 #  if defined(__ARM_NEON) || defined(_M_ARM64)
 #    define ZSTD_ARCH_ARM_NEON
 #  endif
 #
+#  if defined(ZSTD_ARCH_X86_AVX2)
+#    include <immintrin.h>
+#  endif
 #  if defined(ZSTD_ARCH_X86_SSE2)
 #    include <emmintrin.h>
 #  elif defined(ZSTD_ARCH_ARM_NEON)
@ -273,9 +266,15 @@
 #endif

 /*-**************************************************************
-*  Alignment check
+*  Alignment
 *****************************************************************/

+/* @return 1 if @u is a 2^n value, 0 otherwise
+ * useful to check a value is valid for alignment restrictions */
+MEM_STATIC int ZSTD_isPower2(size_t u) {
+    return (u & (u-1)) == 0;
+}
+
 /* this test was initially positioned in mem.h,
 * but this file is removed (or replaced) for linux kernel
 * so it's now hosted in compiler.h,
@ -301,6 +300,21 @@
 # endif
 #endif /* ZSTD_ALIGNOF */

+#ifndef ZSTD_ALIGNED
+/* C90-compatible alignment macro (GCC/Clang). Adjust for other compilers if needed. */
+# if defined(__GNUC__) || defined(__clang__)
+#  define ZSTD_ALIGNED(a) __attribute__((aligned(a)))
+# elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */
+#  define ZSTD_ALIGNED(a) _Alignas(a)
+#elif defined(_MSC_VER)
+#  define ZSTD_ALIGNED(n) __declspec(align(n))
+# else
+   /* this compiler will require its own alignment instruction */
+#  define ZSTD_ALIGNED(...)
+# endif
+#endif /* ZSTD_ALIGNED */
+
+
 /*-**************************************************************
 *  Sanitizer
 *****************************************************************/
@ -324,7 +338,7 @@
 #endif

 /**
- * Helper function to perform a wrapped pointer difference without trigging
+ * Helper function to perform a wrapped pointer difference without triggering
 * UBSAN.
 *
 * @returns lhs - rhs with wrapping
--- a/thirdparty/zstd/common/cpu.h
+++ b/thirdparty/zstd/common/cpu.h
@ -35,7 +35,7 @@ MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
    U32 f7b = 0;
    U32 f7c = 0;
 #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
-#if !defined(__clang__)
+#if !defined(_M_X64) || !defined(__clang__) || __clang_major__ >= 16
    int reg[4];
    __cpuid((int*)reg, 0);
    {
--- a/thirdparty/zstd/common/debug.h
+++ b/thirdparty/zstd/common/debug.h
@ -32,10 +32,6 @@
 #ifndef DEBUG_H_12987983217
 #define DEBUG_H_12987983217

-#if defined (__cplusplus)
-extern "C" {
-#endif
-

 /* static assert is triggered at compile time, leaving no runtime artefact.
 * static assert only works with compile-time constants.
@ -108,9 +104,4 @@ extern int g_debuglevel; /* the variable is only declared,
 #  define DEBUGLOG(l, ...) do { } while (0)    /* disabled */
 #endif

-
-#if defined (__cplusplus)
-}
-#endif
-
 #endif /* DEBUG_H_12987983217 */
--- a/thirdparty/zstd/common/error_private.c
+++ b/thirdparty/zstd/common/error_private.c
@ -40,6 +40,7 @@ const char* ERR_getErrorString(ERR_enum code)
    case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported";
    case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large";
    case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small";
+    case PREFIX(cannotProduce_uncompressedBlock): return "This mode cannot generate an uncompressed block";
    case PREFIX(stabilityCondition_notRespected): return "pledged buffer stability condition is not respected";
    case PREFIX(dictionary_corrupted): return "Dictionary is corrupted";
    case PREFIX(dictionary_wrong): return "Dictionary mismatch";
--- a/thirdparty/zstd/common/error_private.h
+++ b/thirdparty/zstd/common/error_private.h
@ -13,11 +13,6 @@
 #ifndef ERROR_H_MODULE
 #define ERROR_H_MODULE

-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-
 /* ****************************************
 *  Dependencies
 ******************************************/
@ -26,7 +21,6 @@ extern "C" {
 #include "debug.h"
 #include "zstd_deps.h"       /* size_t */

-
 /* ****************************************
 *  Compiler-specific
 ******************************************/
@ -161,8 +155,4 @@ void _force_has_format_string(const char *format, ...) {
        }                                                                          \
    } while(0)

-#if defined (__cplusplus)
-}
-#endif
-
 #endif /* ERROR_H_MODULE */
--- a/thirdparty/zstd/common/fse.h
+++ b/thirdparty/zstd/common/fse.h
@ -11,11 +11,6 @@
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
 ****************************************************************** */
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
 #ifndef FSE_H
 #define FSE_H

@ -25,7 +20,6 @@ extern "C" {
 ******************************************/
 #include "zstd_deps.h"    /* size_t, ptrdiff_t */

-
 /*-*****************************************
 *  FSE_PUBLIC_API : control library symbols visibility
 ******************************************/
@ -232,11 +226,8 @@ If there is an error, the function will return an error code, which can be teste

 #if defined(FSE_STATIC_LINKING_ONLY) && !defined(FSE_H_FSE_STATIC_LINKING_ONLY)
 #define FSE_H_FSE_STATIC_LINKING_ONLY
-
-/* *** Dependency *** */
 #include "bitstream.h"

-
 /* *****************************************
 *  Static allocation
 *******************************************/
@ -465,13 +456,13 @@ MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, un
    FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
    const U16* const stateTable = (const U16*)(statePtr->stateTable);
    U32 const nbBitsOut  = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
-    BIT_addBits(bitC,  (size_t)statePtr->value, nbBitsOut);
+    BIT_addBits(bitC, (BitContainerType)statePtr->value, nbBitsOut);
    statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
 }

 MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr)
 {
-    BIT_addBits(bitC, (size_t)statePtr->value, statePtr->stateLog);
+    BIT_addBits(bitC, (BitContainerType)statePtr->value, statePtr->stateLog);
    BIT_flushBits(bitC);
 }

@ -631,10 +622,4 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)

 #define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3)

-
 #endif /* FSE_STATIC_LINKING_ONLY */
-
-
-#if defined (__cplusplus)
-}
-#endif
--- a/thirdparty/zstd/common/fse_decompress.c
+++ b/thirdparty/zstd/common/fse_decompress.c
@ -190,6 +190,8 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
    FSE_initDState(&state1, &bitD, dt);
    FSE_initDState(&state2, &bitD, dt);

+    RETURN_ERROR_IF(BIT_reloadDStream(&bitD)==BIT_DStream_overflow, corruption_detected, "");
+
 #define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)

    /* 4 symbols per loop */
--- a/thirdparty/zstd/common/huf.h
+++ b/thirdparty/zstd/common/huf.h
@ -12,10 +12,6 @@
 * You may select, at your option, one of the above-listed licenses.
 ****************************************************************** */

-#if defined (__cplusplus)
-extern "C" {
-#endif
-
 #ifndef HUF_H_298734234
 #define HUF_H_298734234

@ -25,7 +21,6 @@ extern "C" {
 #define FSE_STATIC_LINKING_ONLY
 #include "fse.h"

-
 /* ***   Tool functions *** */
 #define HUF_BLOCKSIZE_MAX (128 * 1024)   /**< maximum input size for a single block compressed with HUF_compress */
 size_t HUF_compressBound(size_t size);   /**< maximum compressed size (worst case) */
@ -280,7 +275,3 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
 #endif

 #endif   /* HUF_H_298734234 */
-
-#if defined (__cplusplus)
-}
-#endif
--- a/thirdparty/zstd/common/mem.h
+++ b/thirdparty/zstd/common/mem.h
@ -11,10 +11,6 @@
 #ifndef MEM_H_MODULE
 #define MEM_H_MODULE

-#if defined (__cplusplus)
-extern "C" {
-#endif
-
 /*-****************************************
 *  Dependencies
 ******************************************/
@ -30,6 +26,8 @@ extern "C" {
 #if defined(_MSC_VER)   /* Visual Studio */
 #   include <stdlib.h>  /* _byteswap_ulong */
 #   include <intrin.h>  /* _byteswap_* */
+#elif defined(__ICCARM__)
+#   include <intrinsics.h>
 #endif

 /*-**************************************************************
@ -74,7 +72,6 @@ extern "C" {
  typedef   signed long long  S64;
 #endif

-
 /*-**************************************************************
 *  Memory I/O API
 *****************************************************************/
@ -150,10 +147,12 @@ MEM_STATIC unsigned MEM_isLittleEndian(void)
    return 1;
 #elif defined(__clang__) && __BIG_ENDIAN__
    return 0;
-#elif defined(_MSC_VER) && (_M_AMD64 || _M_IX86)
+#elif defined(_MSC_VER) && (_M_X64 || _M_IX86)
    return 1;
 #elif defined(__DMC__) && defined(_M_IX86)
    return 1;
+#elif defined(__IAR_SYSTEMS_ICC__) && __LITTLE_ENDIAN__
+    return 1;
 #else
    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
    return one.c[0];
@ -246,6 +245,8 @@ MEM_STATIC U32 MEM_swap32(U32 in)
 #elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \
  || (defined(__clang__) && __has_builtin(__builtin_bswap32))
    return __builtin_bswap32(in);
+#elif defined(__ICCARM__)
+    return __REV(in);
 #else
    return MEM_swap32_fallback(in);
 #endif
@ -418,9 +419,4 @@ MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val)
 /* code only tested on 32 and 64 bits systems */
 MEM_STATIC void MEM_check(void) { DEBUG_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }

-
-#if defined (__cplusplus)
-}
-#endif
-
 #endif /* MEM_H_MODULE */
--- a/thirdparty/zstd/common/pool.h
+++ b/thirdparty/zstd/common/pool.h
@ -11,10 +11,6 @@
 #ifndef POOL_H
 #define POOL_H

-#if defined (__cplusplus)
-extern "C" {
-#endif
-

 #include "zstd_deps.h"
 #define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_customMem */
@ -82,9 +78,4 @@ void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque);
 */
 int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque);

-
-#if defined (__cplusplus)
-}
-#endif
-
 #endif
--- a/thirdparty/zstd/common/portability_macros.h
+++ b/thirdparty/zstd/common/portability_macros.h
@ -74,26 +74,39 @@
 # define ZSTD_HIDE_ASM_FUNCTION(func)
 #endif

+/* Compile time determination of BMI2 support */
+#ifndef STATIC_BMI2
+#  if defined(__BMI2__)
+#    define STATIC_BMI2 1
+#  elif defined(_MSC_VER) && defined(__AVX2__)
+#    define STATIC_BMI2 1 /* MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2 */
+#  endif
+#endif
+
+#ifndef STATIC_BMI2
+#  define STATIC_BMI2 0
+#endif
+
 /* Enable runtime BMI2 dispatch based on the CPU.
 * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
 */
 #ifndef DYNAMIC_BMI2
-  #if ((defined(__clang__) && __has_attribute(__target__)) \
+#  if ((defined(__clang__) && __has_attribute(__target__)) \
      || (defined(__GNUC__) \
          && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
-      && (defined(__x86_64__) || defined(_M_X64)) \
+      && (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)) \
      && !defined(__BMI2__)
-  #  define DYNAMIC_BMI2 1
-  #else
-  #  define DYNAMIC_BMI2 0
-  #endif
+#    define DYNAMIC_BMI2 1
+#  else
+#    define DYNAMIC_BMI2 0
+#  endif
 #endif

 /**
- * Only enable assembly for GNUC compatible compilers,
+ * Only enable assembly for GNU C compatible compilers,
 * because other platforms may not support GAS assembly syntax.
 *
- * Only enable assembly for Linux / MacOS, other platforms may
+ * Only enable assembly for Linux / MacOS / Win32, other platforms may
 * work, but they haven't been tested. This could likely be
 * extended to BSD systems.
 *
@ -101,7 +114,7 @@
 * 100% of code to be instrumented to work.
 */
 #if defined(__GNUC__)
-#  if defined(__linux__) || defined(__linux) || defined(__APPLE__)
+#  if defined(__linux__) || defined(__linux) || defined(__APPLE__) || defined(_WIN32)
 #    if ZSTD_MEMORY_SANITIZER
 #      define ZSTD_ASM_SUPPORTED 0
 #    elif ZSTD_DATAFLOW_SANITIZER
--- a/thirdparty/zstd/common/threading.h
+++ b/thirdparty/zstd/common/threading.h
@ -16,10 +16,6 @@

 #include "debug.h"

-#if defined (__cplusplus)
-extern "C" {
-#endif
-
 #if defined(ZSTD_MULTITHREAD) && defined(_WIN32)

 /**
@ -72,7 +68,6 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread);
 * add here more wrappers as required
 */

-
 #elif defined(ZSTD_MULTITHREAD)    /* posix assumed ; need a better detection method */
 /* ===   POSIX Systems   === */
 #  include <pthread.h>
@ -143,8 +138,5 @@ typedef int ZSTD_pthread_cond_t;

 #endif /* ZSTD_MULTITHREAD */

-#if defined (__cplusplus)
-}
-#endif

 #endif /* THREADING_H_938743 */
--- a/thirdparty/zstd/common/xxhash.h
+++ b/thirdparty/zstd/common/xxhash.h
@ -227,10 +227,6 @@
 * xxHash prototypes and implementation
 */

-#if defined (__cplusplus)
-extern "C" {
-#endif
-
 /* ****************************
 *  INLINE mode
 ******************************/
@ -537,6 +533,9 @@ extern "C" {
 /*! @brief Version number, encoded as two digits each */
 #define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)

+#if defined (__cplusplus)
+extern "C" {
+#endif
 /*!
 * @brief Obtains the xxHash version.
 *
@ -547,6 +546,9 @@ extern "C" {
 */
 XXH_PUBLIC_API XXH_CONSTF unsigned XXH_versionNumber (void);

+#if defined (__cplusplus)
+}
+#endif

 /* ****************************
 *  Common basic types
@ -593,6 +595,10 @@ typedef uint32_t XXH32_hash_t;
 #   endif
 #endif

+#if defined (__cplusplus)
+extern "C" {
+#endif
+
 /*!
 * @}
 *
@ -821,6 +827,9 @@ XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canoni
 #endif
 /*! @endcond */

+#if defined (__cplusplus)
+} /* end of extern "C" */
+#endif

 /*!
 * @}
@ -859,6 +868,9 @@ typedef uint64_t XXH64_hash_t;
 #  endif
 #endif

+#if defined (__cplusplus)
+extern "C" {
+#endif
 /*!
 * @}
 *
@ -1562,6 +1574,11 @@ XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128_hashFromCanonical(XXH_NOESCAPE con


 #endif  /* !XXH_NO_XXH3 */
+
+#if defined (__cplusplus)
+} /* extern "C" */
+#endif
+
 #endif  /* XXH_NO_LONG_LONG */

 /*!
@ -1748,6 +1765,10 @@ struct XXH3_state_s {
    } while(0)


+#if defined (__cplusplus)
+extern "C" {
+#endif
+
 /*!
 * @brief Calculates the 128-bit hash of @p data using XXH3.
 *
@ -1963,8 +1984,13 @@ XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
                                     XXH64_hash_t seed64);
 #endif /* !XXH_NO_STREAM */

+#if defined (__cplusplus)
+} /* extern "C" */
+#endif
+
 #endif  /* !XXH_NO_XXH3 */
 #endif  /* XXH_NO_LONG_LONG */
+
 #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
 #  define XXH_IMPLEMENTATION
 #endif
@ -2263,10 +2289,12 @@ XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
 * @{
 */

-
 /* *************************************
 *  Includes & Memory related functions
 ***************************************/
+#include <string.h>   /* memcmp, memcpy */
+#include <limits.h>   /* ULLONG_MAX */
+
 #if defined(XXH_NO_STREAM)
 /* nothing */
 #elif defined(XXH_NO_STDLIB)
@ -2280,9 +2308,17 @@ XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
 * without access to dynamic allocation.
 */

+#if defined (__cplusplus)
+extern "C" {
+#endif
+
 static XXH_CONSTF void* XXH_malloc(size_t s) { (void)s; return NULL; }
 static void XXH_free(void* p) { (void)p; }

+#if defined (__cplusplus)
+} /* extern "C" */
+#endif
+
 #else

 /*
@ -2291,6 +2327,9 @@ static void XXH_free(void* p) { (void)p; }
 */
 #include <stdlib.h>

+#if defined (__cplusplus)
+extern "C" {
+#endif
 /*!
 * @internal
 * @brief Modify this function to use a different routine than malloc().
@ -2303,10 +2342,15 @@ static XXH_MALLOCF void* XXH_malloc(size_t s) { return malloc(s); }
 */
 static void XXH_free(void* p) { free(p); }

+#if defined (__cplusplus)
+} /* extern "C" */
+#endif
+
 #endif  /* XXH_NO_STDLIB */

-#include <string.h>
-
+#if defined (__cplusplus)
+extern "C" {
+#endif
 /*!
 * @internal
 * @brief Modify this function to use a different routine than memcpy().
@ -2316,8 +2360,9 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size)
    return memcpy(dest,src,size);
 }

-#include <limits.h>   /* ULLONG_MAX */
-
+#if defined (__cplusplus)
+} /* extern "C" */
+#endif

 /* *************************************
 *  Compiler Specific Options
@ -2452,6 +2497,10 @@ typedef XXH32_hash_t xxh_u32;
 #  define U32  xxh_u32
 #endif

+#if defined (__cplusplus)
+extern "C" {
+#endif
+
 /* ***   Memory access   *** */

 /*!
@ -3608,6 +3657,10 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_can
    return XXH_readBE64(src);
 }

+#if defined (__cplusplus)
+}
+#endif
+
 #ifndef XXH_NO_XXH3

 /* *********************************************************************
@ -3839,7 +3892,7 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
 #    define XXH_VECTOR XXH_AVX512
 #  elif defined(__AVX2__)
 #    define XXH_VECTOR XXH_AVX2
-#  elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2))
+#  elif defined(__SSE2__) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2))
 #    define XXH_VECTOR XXH_SSE2
 #  elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) \
     || (defined(__s390x__) && defined(__VEC__)) \
@ -3928,6 +3981,10 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
 #  pragma GCC optimize("-O2")
 #endif

+#if defined (__cplusplus)
+extern "C" {
+#endif
+
 #if XXH_VECTOR == XXH_NEON

 /*
@ -4050,6 +4107,10 @@ XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
 # endif
 #endif  /* XXH_VECTOR == XXH_NEON */

+#if defined (__cplusplus)
+} /* extern "C" */
+#endif
+
 /*
 * VSX and Z Vector helpers.
 *
@ -4111,6 +4172,9 @@ typedef xxh_u64x2 xxh_aliasing_u64x2 XXH_ALIASING;
 #  if defined(__POWER9_VECTOR__) || (defined(__clang__) && defined(__s390x__))
 #    define XXH_vec_revb vec_revb
 #  else
+#if defined (__cplusplus)
+extern "C" {
+#endif
 /*!
 * A polyfill for POWER9's vec_revb().
 */
@ -4120,9 +4184,15 @@ XXH_FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2 val)
                                  0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 };
    return vec_perm(val, val, vByteSwap);
 }
+#if defined (__cplusplus)
+} /* extern "C" */
+#endif
 #  endif
 # endif /* XXH_VSX_BE */

+#if defined (__cplusplus)
+extern "C" {
+#endif
 /*!
 * Performs an unaligned vector load and byte swaps it on big endian.
 */
@ -4167,6 +4237,11 @@ XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b)
    return result;
 }
 # endif /* XXH_vec_mulo, XXH_vec_mule */
+
+#if defined (__cplusplus)
+} /* extern "C" */
+#endif
+
 #endif /* XXH_VECTOR == XXH_VSX */

 #if XXH_VECTOR == XXH_SVE
@ -4200,7 +4275,9 @@ do { \
 #  endif
 #endif  /* XXH_NO_PREFETCH */

-
+#if defined (__cplusplus)
+extern "C" {
+#endif
 /* ==========================================
 * XXH3 default settings
 * ========================================== */
@ -6877,8 +6954,6 @@ XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_
 #endif /* !XXH_NO_STREAM */
 /* 128-bit utility functions */

-#include <string.h>   /* memcmp, memcpy */
-
 /* return : 1 is equal, 0 if different */
 /*! @ingroup XXH3_family */
 XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2)
@ -7005,16 +7080,15 @@ XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed)
 #  pragma GCC pop_options
 #endif

-#endif  /* XXH_NO_LONG_LONG */

+#if defined (__cplusplus)
+} /* extern "C" */
+#endif
+
+#endif  /* XXH_NO_LONG_LONG */
 #endif  /* XXH_NO_XXH3 */

 /*!
 * @}
 */
 #endif  /* XXH_IMPLEMENTATION */
-
-
-#if defined (__cplusplus)
-} /* extern "C" */
-#endif
--- a/thirdparty/zstd/common/zstd_deps.h
+++ b/thirdparty/zstd/common/zstd_deps.h
@ -24,6 +24,18 @@
 #ifndef ZSTD_DEPS_COMMON
 #define ZSTD_DEPS_COMMON

+/* Even though we use qsort_r only for the dictionary builder, the macro
+ * _GNU_SOURCE has to be declared *before* the inclusion of any standard
+ * header and the script 'combine.sh' combines the whole zstd source code
+ * in a single file.
+ */
+#if defined(__linux) || defined(__linux__) || defined(linux) || defined(__gnu_linux__) || \
+    defined(__CYGWIN__) || defined(__MSYS__)
+#if !defined(_GNU_SOURCE) && !defined(__ANDROID__) /* NDK doesn't ship qsort_r(). */
+#define _GNU_SOURCE
+#endif
+#endif
+
 #include <limits.h>
 #include <stddef.h>
 #include <string.h>
--- a/thirdparty/zstd/common/zstd_internal.h
+++ b/thirdparty/zstd/common/zstd_internal.h
@ -39,10 +39,6 @@
 #  define ZSTD_TRACE 0
 #endif

-#if defined (__cplusplus)
-extern "C" {
-#endif
-
 /* ---- static assert (debug) --- */
 #define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)
 #define ZSTD_isError ERR_isError   /* for inlining */
@ -95,7 +91,7 @@ typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
 #define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */)   /* for a non-null block */
 #define MIN_LITERALS_FOR_4_STREAMS 6

-typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
+typedef enum { set_basic, set_rle, set_compressed, set_repeat } SymbolEncodingType_e;

 #define LONGNBSEQ 0x7F00

@ -278,62 +274,6 @@ typedef enum {
 /*-*******************************************
 *  Private declarations
 *********************************************/
-typedef struct seqDef_s {
-    U32 offBase;   /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */
-    U16 litLength;
-    U16 mlBase;    /* mlBase == matchLength - MINMATCH */
-} seqDef;
-
-/* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */
-typedef enum {
-    ZSTD_llt_none = 0,             /* no longLengthType */
-    ZSTD_llt_literalLength = 1,    /* represents a long literal */
-    ZSTD_llt_matchLength = 2       /* represents a long match */
-} ZSTD_longLengthType_e;
-
-typedef struct {
-    seqDef* sequencesStart;
-    seqDef* sequences;      /* ptr to end of sequences */
-    BYTE*  litStart;
-    BYTE*  lit;             /* ptr to end of literals */
-    BYTE*  llCode;
-    BYTE*  mlCode;
-    BYTE*  ofCode;
-    size_t maxNbSeq;
-    size_t maxNbLit;
-
-    /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength
-     * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
-     * the existing value of the litLength or matchLength by 0x10000.
-     */
-    ZSTD_longLengthType_e longLengthType;
-    U32                   longLengthPos;  /* Index of the sequence to apply long length modification to */
-} seqStore_t;
-
-typedef struct {
-    U32 litLength;
-    U32 matchLength;
-} ZSTD_sequenceLength;
-
-/**
- * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences
- * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength.
- */
-MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq)
-{
-    ZSTD_sequenceLength seqLen;
-    seqLen.litLength = seq->litLength;
-    seqLen.matchLength = seq->mlBase + MINMATCH;
-    if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
-        if (seqStore->longLengthType == ZSTD_llt_literalLength) {
-            seqLen.litLength += 0x10000;
-        }
-        if (seqStore->longLengthType == ZSTD_llt_matchLength) {
-            seqLen.matchLength += 0x10000;
-        }
-    }
-    return seqLen;
-}

 /**
 * Contains the compressed frame size and an upper-bound for the decompressed frame size.
@ -347,10 +287,6 @@ typedef struct {
    unsigned long long decompressedBound;
 } ZSTD_frameSizeInfo;   /* decompress & legacy */

-const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);   /* compress & dictBuilder */
-int ZSTD_seqToCodes(const seqStore_t* seqStorePtr);   /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
-
-
 /* ZSTD_invalidateRepCodes() :
 * ensures next compression will not use repcodes from previous block.
 * Note : only works with regular variant;
@ -385,8 +321,4 @@ MEM_STATIC int ZSTD_cpuSupportsBmi2(void)
    return ZSTD_cpuid_bmi1(cpuid) && ZSTD_cpuid_bmi2(cpuid);
 }

-#if defined (__cplusplus)
-}
-#endif
-
 #endif   /* ZSTD_CCOMMON_H_MODULE */
--- a/thirdparty/zstd/common/zstd_trace.h
+++ b/thirdparty/zstd/common/zstd_trace.h
@ -11,23 +11,20 @@
 #ifndef ZSTD_TRACE_H
 #define ZSTD_TRACE_H

-#if defined (__cplusplus)
-extern "C" {
-#endif
-
 #include <stddef.h>

 /* weak symbol support
 * For now, enable conservatively:
 * - Only GNUC
 * - Only ELF
- * - Only x86-64, i386 and aarch64
+ * - Only x86-64, i386, aarch64 and risc-v.
 * Also, explicitly disable on platforms known not to work so they aren't
 * forgotten in the future.
 */
 #if !defined(ZSTD_HAVE_WEAK_SYMBOLS) && \
    defined(__GNUC__) && defined(__ELF__) && \
-    (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86) || defined(__aarch64__)) && \
+    (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
+     defined(_M_IX86) || defined(__aarch64__) || defined(__riscv)) && \
    !defined(__APPLE__) && !defined(_WIN32) && !defined(__MINGW32__) && \
    !defined(__CYGWIN__) && !defined(_AIX)
 #  define ZSTD_HAVE_WEAK_SYMBOLS 1
@ -64,7 +61,7 @@ typedef struct {
    /**
     * Non-zero if streaming (de)compression is used.
     */
-    unsigned streaming;
+    int streaming;
    /**
     * The dictionary ID.
     */
@ -73,7 +70,7 @@ typedef struct {
     * Is the dictionary cold?
     * Only set on decompression.
     */
-    unsigned dictionaryIsCold;
+    int dictionaryIsCold;
    /**
     * The dictionary size or zero if no dictionary.
     */
@ -156,8 +153,4 @@ ZSTD_WEAK_ATTR void ZSTD_trace_decompress_end(

 #endif /* ZSTD_TRACE */

-#if defined (__cplusplus)
-}
-#endif
-
 #endif /* ZSTD_TRACE_H */
--- a/thirdparty/zstd/compress/hist.c
+++ b/thirdparty/zstd/compress/hist.c
@ -26,6 +26,16 @@ unsigned HIST_isError(size_t code) { return ERR_isError(code); }
 /*-**************************************************************
 *  Histogram functions
 ****************************************************************/
+void HIST_add(unsigned* count, const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    const BYTE* const end = ip + srcSize;
+
+    while (ip<end) {
+        count[*ip++]++;
+    }
+}
+
 unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
                           const void* src, size_t srcSize)
 {
--- a/thirdparty/zstd/compress/hist.h
+++ b/thirdparty/zstd/compress/hist.h
@ -73,3 +73,10 @@ size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
 */
 unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
                           const void* src, size_t srcSize);
+
+/*! HIST_add() :
+ *  Lowest level: just add nb of occurrences of characters from @src into @count.
+ *  @count is not reset. @count array is presumed large enough (i.e. 1 KB).
+ @  This function does not need any additional stack memory.
+ */
+void HIST_add(unsigned* count, const void* src, size_t srcSize);
--- a/thirdparty/zstd/compress/zstd_compress.c
+++ b/thirdparty/zstd/compress/zstd_compress.c
--- a/thirdparty/zstd/compress/zstd_compress_internal.h
+++ b/thirdparty/zstd/compress/zstd_compress_internal.h
@ -24,10 +24,7 @@
 #  include "zstdmt_compress.h"
 #endif
 #include "../common/bits.h" /* ZSTD_highbit32, ZSTD_NbCommonBytes */
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
+#include "zstd_preSplit.h" /* ZSTD_SLIPBLOCK_WORKSPACESIZE */

 /*-*************************************
 *  Constants
@ -82,6 +79,70 @@ typedef struct {
    ZSTD_fseCTables_t fse;
 } ZSTD_entropyCTables_t;

+/***********************************************
+*  Sequences *
+***********************************************/
+typedef struct SeqDef_s {
+    U32 offBase;   /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */
+    U16 litLength;
+    U16 mlBase;    /* mlBase == matchLength - MINMATCH */
+} SeqDef;
+
+/* Controls whether seqStore has a single "long" litLength or matchLength. See SeqStore_t. */
+typedef enum {
+    ZSTD_llt_none = 0,             /* no longLengthType */
+    ZSTD_llt_literalLength = 1,    /* represents a long literal */
+    ZSTD_llt_matchLength = 2       /* represents a long match */
+} ZSTD_longLengthType_e;
+
+typedef struct {
+    SeqDef* sequencesStart;
+    SeqDef* sequences;      /* ptr to end of sequences */
+    BYTE*  litStart;
+    BYTE*  lit;             /* ptr to end of literals */
+    BYTE*  llCode;
+    BYTE*  mlCode;
+    BYTE*  ofCode;
+    size_t maxNbSeq;
+    size_t maxNbLit;
+
+    /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength
+     * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
+     * the existing value of the litLength or matchLength by 0x10000.
+     */
+    ZSTD_longLengthType_e longLengthType;
+    U32                   longLengthPos;  /* Index of the sequence to apply long length modification to */
+} SeqStore_t;
+
+typedef struct {
+    U32 litLength;
+    U32 matchLength;
+} ZSTD_SequenceLength;
+
+/**
+ * Returns the ZSTD_SequenceLength for the given sequences. It handles the decoding of long sequences
+ * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength.
+ */
+MEM_STATIC ZSTD_SequenceLength ZSTD_getSequenceLength(SeqStore_t const* seqStore, SeqDef const* seq)
+{
+    ZSTD_SequenceLength seqLen;
+    seqLen.litLength = seq->litLength;
+    seqLen.matchLength = seq->mlBase + MINMATCH;
+    if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
+        if (seqStore->longLengthType == ZSTD_llt_literalLength) {
+            seqLen.litLength += 0x10000;
+        }
+        if (seqStore->longLengthType == ZSTD_llt_matchLength) {
+            seqLen.matchLength += 0x10000;
+        }
+    }
+    return seqLen;
+}
+
+const SeqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);   /* compress & dictBuilder */
+int ZSTD_seqToCodes(const SeqStore_t* seqStorePtr);   /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
+
+
 /***********************************************
 *  Entropy buffer statistics structs and funcs *
 ***********************************************/
@ -91,7 +152,7 @@ typedef struct {
 *  hufDesSize refers to the size of huffman tree description in bytes.
 *  This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */
 typedef struct {
-    symbolEncodingType_e hType;
+    SymbolEncodingType_e hType;
    BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
    size_t hufDesSize;
 } ZSTD_hufCTablesMetadata_t;
@ -102,9 +163,9 @@ typedef struct {
 *  fseTablesSize refers to the size of fse tables in bytes.
 *  This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */
 typedef struct {
-    symbolEncodingType_e llType;
-    symbolEncodingType_e ofType;
-    symbolEncodingType_e mlType;
+    SymbolEncodingType_e llType;
+    SymbolEncodingType_e ofType;
+    SymbolEncodingType_e mlType;
    BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
    size_t fseTablesSize;
    size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
@ -119,7 +180,7 @@ typedef struct {
 *  Builds entropy for the block.
 *  @return : 0 on success or error code */
 size_t ZSTD_buildBlockEntropyStats(
-                    const seqStore_t* seqStorePtr,
+                    const SeqStore_t* seqStorePtr,
                    const ZSTD_entropyCTables_t* prevEntropy,
                          ZSTD_entropyCTables_t* nextEntropy,
                    const ZSTD_CCtx_params* cctxParams,
@ -148,15 +209,9 @@ typedef struct {
                           stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */
  size_t size;          /* The number of sequences. <= capacity. */
  size_t capacity;      /* The capacity starting from `seq` pointer */
-} rawSeqStore_t;
+} RawSeqStore_t;

-typedef struct {
-    U32 idx;            /* Index in array of ZSTD_Sequence */
-    U32 posInSequence;  /* Position within sequence at idx */
-    size_t posInSrc;    /* Number of bytes given by sequences provided so far */
-} ZSTD_sequencePosition;
-
-UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
+UNUSED_ATTR static const RawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};

 typedef struct {
    int price;  /* price from beginning of segment to this position */
@ -188,7 +243,7 @@ typedef struct {
    U32  offCodeSumBasePrice;    /* to compare to log2(offreq)  */
    ZSTD_OptPrice_e priceType;   /* prices can be determined dynamically, or follow a pre-defined cost structure */
    const ZSTD_entropyCTables_t* symbolCosts;  /* pre-calculated dictionary statistics */
-    ZSTD_paramSwitch_e literalCompressionMode;
+    ZSTD_ParamSwitch_e literalCompressionMode;
 } optState_t;

 typedef struct {
@ -210,11 +265,11 @@ typedef struct {

 #define ZSTD_WINDOW_START_INDEX 2

-typedef struct ZSTD_matchState_t ZSTD_matchState_t;
+typedef struct ZSTD_MatchState_t ZSTD_MatchState_t;

 #define ZSTD_ROW_HASH_CACHE_SIZE 8       /* Size of prefetching hash cache for row-based matchfinder */

-struct ZSTD_matchState_t {
+struct ZSTD_MatchState_t {
    ZSTD_window_t window;   /* State for window round buffer management */
    U32 loadedDictEnd;      /* index of end of dictionary, within context's referential.
                             * When loadedDictEnd != 0, a dictionary is in use, and still valid.
@ -236,15 +291,15 @@ struct ZSTD_matchState_t {
    U32* hashTable3;
    U32* chainTable;

-    U32 forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */
+    int forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */

    int dedicatedDictSearch;  /* Indicates whether this matchState is using the
                               * dedicated dictionary search structure.
                               */
    optState_t opt;         /* optimal parser state */
-    const ZSTD_matchState_t* dictMatchState;
+    const ZSTD_MatchState_t* dictMatchState;
    ZSTD_compressionParameters cParams;
-    const rawSeqStore_t* ldmSeqStore;
+    const RawSeqStore_t* ldmSeqStore;

    /* Controls prefetching in some dictMatchState matchfinders.
     * This behavior is controlled from the cctx ms.
@ -262,7 +317,7 @@ struct ZSTD_matchState_t {
 typedef struct {
    ZSTD_compressedBlockState_t* prevCBlock;
    ZSTD_compressedBlockState_t* nextCBlock;
-    ZSTD_matchState_t matchState;
+    ZSTD_MatchState_t matchState;
 } ZSTD_blockState_t;

 typedef struct {
@ -289,7 +344,7 @@ typedef struct {
 } ldmState_t;

 typedef struct {
-    ZSTD_paramSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */
+    ZSTD_ParamSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */
    U32 hashLog;            /* Log size of hashTable */
    U32 bucketSizeLog;      /* Log bucket size for collision resolution, at most 8 */
    U32 minMatchLength;     /* Minimum match length */
@ -320,7 +375,7 @@ struct ZSTD_CCtx_params_s {
                                * There is no guarantee that hint is close to actual source size */

    ZSTD_dictAttachPref_e attachDictPref;
-    ZSTD_paramSwitch_e literalCompressionMode;
+    ZSTD_ParamSwitch_e literalCompressionMode;

    /* Multithreading: used to pass parameters to mtctx */
    int nbWorkers;
@ -339,14 +394,27 @@ struct ZSTD_CCtx_params_s {
    ZSTD_bufferMode_e outBufferMode;

    /* Sequence compression API */
-    ZSTD_sequenceFormat_e blockDelimiters;
+    ZSTD_SequenceFormat_e blockDelimiters;
    int validateSequences;

-    /* Block splitting */
-    ZSTD_paramSwitch_e useBlockSplitter;
+    /* Block splitting
+     * @postBlockSplitter executes split analysis after sequences are produced,
+     * it's more accurate but consumes more resources.
+     * @preBlockSplitter_level splits before knowing sequences,
+     * it's more approximative but also cheaper.
+     * Valid @preBlockSplitter_level values range from 0 to 6 (included).
+     * 0 means auto, 1 means do not split,
+     * then levels are sorted in increasing cpu budget, from 2 (fastest) to 6 (slowest).
+     * Highest @preBlockSplitter_level combines well with @postBlockSplitter.
+     */
+    ZSTD_ParamSwitch_e postBlockSplitter;
+    int preBlockSplitter_level;
+
+    /* Adjust the max block size*/
+    size_t maxBlockSize;

    /* Param for deciding whether to use row-based matchfinder */
-    ZSTD_paramSwitch_e useRowMatchFinder;
+    ZSTD_ParamSwitch_e useRowMatchFinder;

    /* Always load a dictionary in ext-dict mode (not prefix mode)? */
    int deterministicRefPrefix;
@ -355,7 +423,7 @@ struct ZSTD_CCtx_params_s {
    ZSTD_customMem customMem;

    /* Controls prefetching in some dictMatchState matchfinders */
-    ZSTD_paramSwitch_e prefetchCDictTables;
+    ZSTD_ParamSwitch_e prefetchCDictTables;

    /* Controls whether zstd will fall back to an internal matchfinder
     * if the external matchfinder returns an error code. */
@ -367,15 +435,13 @@ struct ZSTD_CCtx_params_s {
    void* extSeqProdState;
    ZSTD_sequenceProducer_F extSeqProdFunc;

-    /* Adjust the max block size*/
-    size_t maxBlockSize;
-
    /* Controls repcode search in external sequence parsing */
-    ZSTD_paramSwitch_e searchForExternalRepcodes;
+    ZSTD_ParamSwitch_e searchForExternalRepcodes;
 };  /* typedef'd to ZSTD_CCtx_params within "zstd.h" */

 #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
 #define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE)
+#define TMP_WORKSPACE_SIZE (MAX(ENTROPY_WORKSPACE_SIZE, ZSTD_SLIPBLOCK_WORKSPACESIZE))

 /**
 * Indicates whether this compression proceeds directly from user-provided
@ -393,11 +459,11 @@ typedef enum {
 */
 #define ZSTD_MAX_NB_BLOCK_SPLITS 196
 typedef struct {
-    seqStore_t fullSeqStoreChunk;
-    seqStore_t firstHalfSeqStore;
-    seqStore_t secondHalfSeqStore;
-    seqStore_t currSeqStore;
-    seqStore_t nextSeqStore;
+    SeqStore_t fullSeqStoreChunk;
+    SeqStore_t firstHalfSeqStore;
+    SeqStore_t secondHalfSeqStore;
+    SeqStore_t currSeqStore;
+    SeqStore_t nextSeqStore;

    U32 partitions[ZSTD_MAX_NB_BLOCK_SPLITS];
    ZSTD_entropyCTablesMetadata_t entropyMetadata;
@ -414,7 +480,7 @@ struct ZSTD_CCtx_s {
    size_t dictContentSize;

    ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
-    size_t blockSize;
+    size_t blockSizeMax;
    unsigned long long pledgedSrcSizePlusOne;  /* this way, 0 (default) == unknown */
    unsigned long long consumedSrcSize;
    unsigned long long producedCSize;
@ -426,13 +492,14 @@ struct ZSTD_CCtx_s {
    int isFirstBlock;
    int initialized;

-    seqStore_t seqStore;      /* sequences storage ptrs */
+    SeqStore_t seqStore;      /* sequences storage ptrs */
    ldmState_t ldmState;      /* long distance matching state */
    rawSeq* ldmSequences;     /* Storage for the ldm output sequences */
    size_t maxNbLdmSequences;
-    rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
+    RawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
    ZSTD_blockState_t blockState;
-    U32* entropyWorkspace;  /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */
+    void* tmpWorkspace;  /* used as substitute of stack space - must be aligned for S64 type */
+    size_t tmpWkspSize;

    /* Whether we are streaming or not */
    ZSTD_buffered_policy_e bufferedPolicy;
@ -506,12 +573,12 @@ typedef enum {
                                 * behavior of taking both the source size and the dict size into account
                                 * when selecting and adjusting parameters.
                                 */
-} ZSTD_cParamMode_e;
+} ZSTD_CParamMode_e;

-typedef size_t (*ZSTD_blockCompressor) (
-        ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+typedef size_t (*ZSTD_BlockCompressor_f) (
+        ZSTD_MatchState_t* bs, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
-ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);
+ZSTD_BlockCompressor_f ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_ParamSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);


 MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
@ -557,6 +624,25 @@ MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
    return 1;
 }

+/* ZSTD_selectAddr:
+ * @return index >= lowLimit ? candidate : backup,
+ * tries to force branchless codegen. */
+MEM_STATIC const BYTE*
+ZSTD_selectAddr(U32 index, U32 lowLimit, const BYTE* candidate, const BYTE* backup)
+{
+#if defined(__GNUC__) && defined(__x86_64__)
+    __asm__ (
+        "cmp %1, %2\n"
+        "cmova %3, %0\n"
+        : "+r"(candidate)
+        : "r"(index), "r"(lowLimit), "r"(backup)
+        );
+    return candidate;
+#else
+    return index >= lowLimit ? candidate : backup;
+#endif
+}
+
 /* ZSTD_noCompressBlock() :
 * Writes uncompressed block to dst buffer from given src.
 * Returns the size of the block */
@ -639,14 +725,55 @@ ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE con
 #define OFFBASE_TO_OFFSET(o)  (assert(OFFBASE_IS_OFFSET(o)), (o) - ZSTD_REP_NUM)
 #define OFFBASE_TO_REPCODE(o) (assert(OFFBASE_IS_REPCODE(o)), (o))  /* returns ID 1,2,3 */

+/*! ZSTD_storeSeqOnly() :
+ *  Store a sequence (litlen, litPtr, offBase and matchLength) into SeqStore_t.
+ *  Literals themselves are not copied, but @litPtr is updated.
+ *  @offBase : Users should employ macros REPCODE_TO_OFFBASE() and OFFSET_TO_OFFBASE().
+ *  @matchLength : must be >= MINMATCH
+*/
+HINT_INLINE UNUSED_ATTR void
+ZSTD_storeSeqOnly(SeqStore_t* seqStorePtr,
+              size_t litLength,
+              U32 offBase,
+              size_t matchLength)
+{
+    assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
+
+    /* literal Length */
+    assert(litLength <= ZSTD_BLOCKSIZE_MAX);
+    if (UNLIKELY(litLength>0xFFFF)) {
+        assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
+        seqStorePtr->longLengthType = ZSTD_llt_literalLength;
+        seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+    }
+    seqStorePtr->sequences[0].litLength = (U16)litLength;
+
+    /* match offset */
+    seqStorePtr->sequences[0].offBase = offBase;
+
+    /* match Length */
+    assert(matchLength <= ZSTD_BLOCKSIZE_MAX);
+    assert(matchLength >= MINMATCH);
+    {   size_t const mlBase = matchLength - MINMATCH;
+        if (UNLIKELY(mlBase>0xFFFF)) {
+            assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
+            seqStorePtr->longLengthType = ZSTD_llt_matchLength;
+            seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+        }
+        seqStorePtr->sequences[0].mlBase = (U16)mlBase;
+    }
+
+    seqStorePtr->sequences++;
+}
+
 /*! ZSTD_storeSeq() :
- *  Store a sequence (litlen, litPtr, offBase and matchLength) into seqStore_t.
+ *  Store a sequence (litlen, litPtr, offBase and matchLength) into SeqStore_t.
 *  @offBase : Users should employ macros REPCODE_TO_OFFBASE() and OFFSET_TO_OFFBASE().
 *  @matchLength : must be >= MINMATCH
 *  Allowed to over-read literals up to litLimit.
 */
 HINT_INLINE UNUSED_ATTR void
-ZSTD_storeSeq(seqStore_t* seqStorePtr,
+ZSTD_storeSeq(SeqStore_t* seqStorePtr,
              size_t litLength, const BYTE* literals, const BYTE* litLimit,
              U32 offBase,
              size_t matchLength)
@ -680,29 +807,7 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr,
    }
    seqStorePtr->lit += litLength;

-    /* literal Length */
-    if (litLength>0xFFFF) {
-        assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
-        seqStorePtr->longLengthType = ZSTD_llt_literalLength;
-        seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
-    }
-    seqStorePtr->sequences[0].litLength = (U16)litLength;
-
-    /* match offset */
-    seqStorePtr->sequences[0].offBase = offBase;
-
-    /* match Length */
-    assert(matchLength >= MINMATCH);
-    {   size_t const mlBase = matchLength - MINMATCH;
-        if (mlBase>0xFFFF) {
-            assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
-            seqStorePtr->longLengthType = ZSTD_llt_matchLength;
-            seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
-        }
-        seqStorePtr->sequences[0].mlBase = (U16)mlBase;
-    }
-
-    seqStorePtr->sequences++;
+    ZSTD_storeSeqOnly(seqStorePtr, litLength, offBase, matchLength);
 }

 /* ZSTD_updateRep() :
@ -731,12 +836,12 @@ ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0)

 typedef struct repcodes_s {
    U32 rep[3];
-} repcodes_t;
+} Repcodes_t;

-MEM_STATIC repcodes_t
+MEM_STATIC Repcodes_t
 ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0)
 {
-    repcodes_t newReps;
+    Repcodes_t newReps;
    ZSTD_memcpy(&newReps, rep, sizeof(newReps));
    ZSTD_updateRep(newReps.rep, offBase, ll0);
    return newReps;
@ -779,8 +884,8 @@ ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
    size_t const matchLength = ZSTD_count(ip, match, vEnd);
    if (match + matchLength != mEnd) return matchLength;
    DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength);
-    DEBUGLOG(7, "distance from match beginning to end dictionary = %zi", mEnd - match);
-    DEBUGLOG(7, "distance from current pos to end buffer = %zi", iEnd - ip);
+    DEBUGLOG(7, "distance from match beginning to end dictionary = %i", (int)(mEnd - match));
+    DEBUGLOG(7, "distance from current pos to end buffer = %i", (int)(iEnd - ip));
    DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart);
    DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd));
    return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
@ -918,11 +1023,12 @@ MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64
 /*-*************************************
 *  Round buffer management
 ***************************************/
-#if (ZSTD_WINDOWLOG_MAX_64 > 31)
-# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX"
-#endif
-/* Max current allowed */
-#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
+/* Max @current value allowed:
+ * In 32-bit mode: we want to avoid crossing the 2 GB limit,
+ *                 reducing risks of side effects in case of signed operations on indexes.
+ * In 64-bit mode: we want to ensure that adding the maximum job size (512 MB)
+ *                 doesn't overflow U32 index capacity (4 GB) */
+#define ZSTD_CURRENT_MAX (MEM_64bits() ? 3500U MB : 2000U MB)
 /* Maximum chunk size before overflow correction needs to be called again */
 #define ZSTD_CHUNKSIZE_MAX                                                     \
    ( ((U32)-1)                  /* Maximum ending current index */            \
@ -962,7 +1068,7 @@ MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window)
 * Inspects the provided matchState and figures out what dictMode should be
 * passed to the compressor.
 */
-MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
+MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_MatchState_t *ms)
 {
    return ZSTD_window_hasExtDict(ms->window) ?
        ZSTD_extDict :
@ -1151,7 +1257,7 @@ ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
                     const void* blockEnd,
                           U32   maxDist,
                           U32*  loadedDictEndPtr,
-                     const ZSTD_matchState_t** dictMatchStatePtr)
+                     const ZSTD_MatchState_t** dictMatchStatePtr)
 {
    U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
    U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
@ -1196,7 +1302,7 @@ ZSTD_checkDictValidity(const ZSTD_window_t* window,
                       const void* blockEnd,
                             U32   maxDist,
                             U32*  loadedDictEndPtr,
-                       const ZSTD_matchState_t** dictMatchStatePtr)
+                       const ZSTD_MatchState_t** dictMatchStatePtr)
 {
    assert(loadedDictEndPtr != NULL);
    assert(dictMatchStatePtr != NULL);
@ -1246,8 +1352,8 @@ MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
 MEM_STATIC
 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 U32 ZSTD_window_update(ZSTD_window_t* window,
-                                  void const* src, size_t srcSize,
-                                  int forceNonContiguous)
+                 const void* src, size_t srcSize,
+                       int forceNonContiguous)
 {
    BYTE const* const ip = (BYTE const*)src;
    U32 contiguous = 1;
@ -1274,8 +1380,9 @@ U32 ZSTD_window_update(ZSTD_window_t* window,
    /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
    if ( (ip+srcSize > window->dictBase + window->lowLimit)
       & (ip < window->dictBase + window->dictLimit)) {
-        ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase;
-        U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx;
+        size_t const highInputIdx = (size_t)((ip + srcSize) - window->dictBase);
+        U32 const lowLimitMax = (highInputIdx > (size_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx;
+        assert(highInputIdx < UINT_MAX);
        window->lowLimit = lowLimitMax;
        DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit);
    }
@ -1285,7 +1392,7 @@ U32 ZSTD_window_update(ZSTD_window_t* window,
 /**
 * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix.
 */
-MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
+MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_MatchState_t* ms, U32 curr, unsigned windowLog)
 {
    U32 const maxDistance = 1U << windowLog;
    U32 const lowestValid = ms->window.lowLimit;
@ -1302,7 +1409,7 @@ MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, u
 /**
 * Returns the lowest allowed match index in the prefix.
 */
-MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
+MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_MatchState_t* ms, U32 curr, unsigned windowLog)
 {
    U32    const maxDistance = 1U << windowLog;
    U32    const lowestValid = ms->window.dictLimit;
@ -1315,6 +1422,13 @@ MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr,
    return matchLowest;
 }

+/* index_safety_check:
+ * intentional underflow : ensure repIndex isn't overlapping dict + prefix
+ * @return 1 if values are not overlapping,
+ * 0 otherwise */
+MEM_STATIC int ZSTD_index_overlap_check(const U32 prefixLowestIndex, const U32 repIndex) {
+    return ((U32)((prefixLowestIndex-1)  - repIndex) >= 3);
+}


 /* debug functions */
@ -1385,10 +1499,6 @@ MEM_STATIC int ZSTD_comparePackedTags(size_t packedTag1, size_t packedTag2) {
    return tag1 == tag2;
 }

-#if defined (__cplusplus)
-}
-#endif
-
 /* ===============================================================
 * Shared internal declarations
 * These prototypes may be called from sources not in lib/compress
@ -1404,6 +1514,25 @@ size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,

 void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);

+typedef struct {
+    U32 idx;            /* Index in array of ZSTD_Sequence */
+    U32 posInSequence;  /* Position within sequence at idx */
+    size_t posInSrc;    /* Number of bytes given by sequences provided so far */
+} ZSTD_SequencePosition;
+
+/* for benchmark */
+size_t ZSTD_convertBlockSequences(ZSTD_CCtx* cctx,
+                        const ZSTD_Sequence* const inSeqs, size_t nbSequences,
+                        int const repcodeResolution);
+
+typedef struct {
+    size_t nbSequences;
+    size_t blockSize;
+    size_t litSize;
+} BlockSummary;
+
+BlockSummary ZSTD_get1BlockSummary(const ZSTD_Sequence* seqs, size_t nbSeqs);
+
 /* ==============================================================
 * Private declarations
 * These prototypes shall only be called from within lib/compress
@ -1415,7 +1544,7 @@ void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
 * Note: srcSizeHint == 0 means 0!
 */
 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
-        const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
+        const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_CParamMode_e mode);

 /*! ZSTD_initCStream_internal() :
 *  Private use only. Init streaming operation.
@ -1427,7 +1556,7 @@ size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
                     const ZSTD_CDict* cdict,
                     const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);

-void ZSTD_resetSeqStore(seqStore_t* ssPtr);
+void ZSTD_resetSeqStore(SeqStore_t* ssPtr);

 /*! ZSTD_getCParamsFromCDict() :
 *  as the name implies */
@ -1480,33 +1609,6 @@ U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
 */
 void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);

-/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of
- * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter.
- * Note that the block delimiter must include the last literals of the block.
- */
-size_t
-ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
-                                              ZSTD_sequencePosition* seqPos,
-                                        const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
-                                        const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
-
-/* Returns the number of bytes to move the current read position back by.
- * Only non-zero if we ended up splitting a sequence.
- * Otherwise, it may return a ZSTD error if something went wrong.
- *
- * This function will attempt to scan through blockSize bytes
- * represented by the sequences in @inSeqs,
- * storing any (partial) sequences.
- *
- * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to
- * avoid splitting a match, or to avoid splitting a match such that it would produce a match
- * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block.
- */
-size_t
-ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
-                                   const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
-                                   const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
-
 /* Returns 1 if an external sequence producer is registered, otherwise returns 0. */
 MEM_STATIC int ZSTD_hasExtSeqProd(const ZSTD_CCtx_params* params) {
    return params->extSeqProdFunc != NULL;
--- a/thirdparty/zstd/compress/zstd_compress_literals.c
+++ b/thirdparty/zstd/compress/zstd_compress_literals.c
@ -140,7 +140,7 @@ size_t ZSTD_compressLiterals (
    size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
    BYTE*  const ostart = (BYTE*)dst;
    U32 singleStream = srcSize < 256;
-    symbolEncodingType_e hType = set_compressed;
+    SymbolEncodingType_e hType = set_compressed;
    size_t cLitSize;

    DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i, srcSize=%u, dstCapacity=%zu)",
--- a/thirdparty/zstd/compress/zstd_compress_sequences.c
+++ b/thirdparty/zstd/compress/zstd_compress_sequences.c
@ -153,13 +153,13 @@ size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
    return cost >> 8;
 }

-symbolEncodingType_e
+SymbolEncodingType_e
 ZSTD_selectEncodingType(
        FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
        size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
        FSE_CTable const* prevCTable,
        short const* defaultNorm, U32 defaultNormLog,
-        ZSTD_defaultPolicy_e const isDefaultAllowed,
+        ZSTD_DefaultPolicy_e const isDefaultAllowed,
        ZSTD_strategy const strategy)
 {
    ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
@ -241,7 +241,7 @@ typedef struct {

 size_t
 ZSTD_buildCTable(void* dst, size_t dstCapacity,
-                FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
+                FSE_CTable* nextCTable, U32 FSELog, SymbolEncodingType_e type,
                unsigned* count, U32 max,
                const BYTE* codeTable, size_t nbSeq,
                const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
@ -293,7 +293,7 @@ ZSTD_encodeSequences_body(
            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
-            seqDef const* sequences, size_t nbSeq, int longOffsets)
+            SeqDef const* sequences, size_t nbSeq, int longOffsets)
 {
    BIT_CStream_t blockStream;
    FSE_CState_t  stateMatchLength;
@ -387,7 +387,7 @@ ZSTD_encodeSequences_default(
            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
-            seqDef const* sequences, size_t nbSeq, int longOffsets)
+            SeqDef const* sequences, size_t nbSeq, int longOffsets)
 {
    return ZSTD_encodeSequences_body(dst, dstCapacity,
                                    CTable_MatchLength, mlCodeTable,
@ -405,7 +405,7 @@ ZSTD_encodeSequences_bmi2(
            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
-            seqDef const* sequences, size_t nbSeq, int longOffsets)
+            SeqDef const* sequences, size_t nbSeq, int longOffsets)
 {
    return ZSTD_encodeSequences_body(dst, dstCapacity,
                                    CTable_MatchLength, mlCodeTable,
@ -421,7 +421,7 @@ size_t ZSTD_encodeSequences(
            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
-            seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
+            SeqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
 {
    DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity);
 #if DYNAMIC_BMI2
--- a/thirdparty/zstd/compress/zstd_compress_sequences.h
+++ b/thirdparty/zstd/compress/zstd_compress_sequences.h
@ -11,26 +11,27 @@
 #ifndef ZSTD_COMPRESS_SEQUENCES_H
 #define ZSTD_COMPRESS_SEQUENCES_H

+#include "zstd_compress_internal.h" /* SeqDef */
 #include "../common/fse.h" /* FSE_repeat, FSE_CTable */
-#include "../common/zstd_internal.h" /* symbolEncodingType_e, ZSTD_strategy */
+#include "../common/zstd_internal.h" /* SymbolEncodingType_e, ZSTD_strategy */

 typedef enum {
    ZSTD_defaultDisallowed = 0,
    ZSTD_defaultAllowed = 1
-} ZSTD_defaultPolicy_e;
+} ZSTD_DefaultPolicy_e;

-symbolEncodingType_e
+SymbolEncodingType_e
 ZSTD_selectEncodingType(
        FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
        size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
        FSE_CTable const* prevCTable,
        short const* defaultNorm, U32 defaultNormLog,
-        ZSTD_defaultPolicy_e const isDefaultAllowed,
+        ZSTD_DefaultPolicy_e const isDefaultAllowed,
        ZSTD_strategy const strategy);

 size_t
 ZSTD_buildCTable(void* dst, size_t dstCapacity,
-                FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
+                FSE_CTable* nextCTable, U32 FSELog, SymbolEncodingType_e type,
                unsigned* count, U32 max,
                const BYTE* codeTable, size_t nbSeq,
                const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
@ -42,7 +43,7 @@ size_t ZSTD_encodeSequences(
            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
-            seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2);
+            SeqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2);

 size_t ZSTD_fseBitCost(
    FSE_CTable const* ctable,
--- a/thirdparty/zstd/compress/zstd_compress_superblock.c
+++ b/thirdparty/zstd/compress/zstd_compress_superblock.c
@ -51,7 +51,7 @@ ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
    BYTE* const oend = ostart + dstSize;
    BYTE* op = ostart + lhSize;
    U32 const singleStream = lhSize == 3;
-    symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat;
+    SymbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat;
    size_t cLitSize = 0;

    DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy);
@ -126,15 +126,15 @@ ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
 }

 static size_t
-ZSTD_seqDecompressedSize(seqStore_t const* seqStore,
-                   const seqDef* sequences, size_t nbSeqs,
+ZSTD_seqDecompressedSize(SeqStore_t const* seqStore,
+                   const SeqDef* sequences, size_t nbSeqs,
                         size_t litSize, int lastSubBlock)
 {
    size_t matchLengthSum = 0;
    size_t litLengthSum = 0;
    size_t n;
    for (n=0; n<nbSeqs; n++) {
-        const ZSTD_sequenceLength seqLen = ZSTD_getSequenceLength(seqStore, sequences+n);
+        const ZSTD_SequenceLength seqLen = ZSTD_getSequenceLength(seqStore, sequences+n);
        litLengthSum += seqLen.litLength;
        matchLengthSum += seqLen.matchLength;
    }
@ -162,7 +162,7 @@ ZSTD_seqDecompressedSize(seqStore_t const* seqStore,
 static size_t
 ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
                                const ZSTD_fseCTablesMetadata_t* fseMetadata,
-                                const seqDef* sequences, size_t nbSeq,
+                                const SeqDef* sequences, size_t nbSeq,
                                const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
                                const ZSTD_CCtx_params* cctxParams,
                                void* dst, size_t dstCapacity,
@ -262,7 +262,7 @@ ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
 *            Or 0 if it failed to compress. */
 static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
                                    const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
-                                    const seqDef* sequences, size_t nbSeq,
+                                    const SeqDef* sequences, size_t nbSeq,
                                    const BYTE* literals, size_t litSize,
                                    const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
                                    const ZSTD_CCtx_params* cctxParams,
@ -327,7 +327,7 @@ static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t lit
    return 0;
 }

-static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
+static size_t ZSTD_estimateSubBlockSize_symbolType(SymbolEncodingType_e type,
                        const BYTE* codeTable, unsigned maxCode,
                        size_t nbSeq, const FSE_CTable* fseCTable,
                        const U8* additionalBits,
@ -426,7 +426,7 @@ static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMe
    return 0;
 }

-static size_t countLiterals(seqStore_t const* seqStore, const seqDef* sp, size_t seqCount)
+static size_t countLiterals(SeqStore_t const* seqStore, const SeqDef* sp, size_t seqCount)
 {
    size_t n, total = 0;
    assert(sp != NULL);
@ -439,7 +439,7 @@ static size_t countLiterals(seqStore_t const* seqStore, const seqDef* sp, size_t

 #define BYTESCALE 256

-static size_t sizeBlockSequences(const seqDef* sp, size_t nbSeqs,
+static size_t sizeBlockSequences(const SeqDef* sp, size_t nbSeqs,
                size_t targetBudget, size_t avgLitCost, size_t avgSeqCost,
                int firstSubBlock)
 {
@ -476,7 +476,7 @@ static size_t sizeBlockSequences(const seqDef* sp, size_t nbSeqs,
 *  Sub-blocks are all compressed, except the last one when beneficial.
 *  @return : compressed size of the super block (which features multiple ZSTD blocks)
 *            or 0 if it failed to compress. */
-static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
+static size_t ZSTD_compressSubBlock_multi(const SeqStore_t* seqStorePtr,
                            const ZSTD_compressedBlockState_t* prevCBlock,
                            ZSTD_compressedBlockState_t* nextCBlock,
                            const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
@ -486,9 +486,9 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
                            const int bmi2, U32 lastBlock,
                            void* workspace, size_t wkspSize)
 {
-    const seqDef* const sstart = seqStorePtr->sequencesStart;
-    const seqDef* const send = seqStorePtr->sequences;
-    const seqDef* sp = sstart; /* tracks progresses within seqStorePtr->sequences */
+    const SeqDef* const sstart = seqStorePtr->sequencesStart;
+    const SeqDef* const send = seqStorePtr->sequences;
+    const SeqDef* sp = sstart; /* tracks progresses within seqStorePtr->sequences */
    size_t const nbSeqs = (size_t)(send - sstart);
    const BYTE* const lstart = seqStorePtr->litStart;
    const BYTE* const lend = seqStorePtr->lit;
@ -647,8 +647,8 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
        op += cSize;
        /* We have to regenerate the repcodes because we've skipped some sequences */
        if (sp < send) {
-            const seqDef* seq;
-            repcodes_t rep;
+            const SeqDef* seq;
+            Repcodes_t rep;
            ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
            for (seq = sstart; seq < sp; ++seq) {
                ZSTD_updateRep(rep.rep, seq->offBase, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
@ -674,7 +674,7 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
          &zc->blockState.nextCBlock->entropy,
          &zc->appliedParams,
          &entropyMetadata,
-          zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
+          zc->tmpWorkspace, zc->tmpWkspSize /* statically allocated in resetCCtx */), "");

    return ZSTD_compressSubBlock_multi(&zc->seqStore,
            zc->blockState.prevCBlock,
@ -684,5 +684,5 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
            dst, dstCapacity,
            src, srcSize,
            zc->bmi2, lastBlock,
-            zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */);
+            zc->tmpWorkspace, zc->tmpWkspSize /* statically allocated in resetCCtx */);
 }
--- a/thirdparty/zstd/compress/zstd_cwksp.h
+++ b/thirdparty/zstd/compress/zstd_cwksp.h
@ -17,10 +17,7 @@
 #include "../common/allocations.h"  /* ZSTD_customMalloc, ZSTD_customFree */
 #include "../common/zstd_internal.h"
 #include "../common/portability_macros.h"
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
+#include "../common/compiler.h" /* ZS2_isPower2 */

 /*-*************************************
 *  Constants
@ -206,9 +203,9 @@ MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
 /**
 * Align must be a power of 2.
 */
-MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) {
+MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t align) {
    size_t const mask = align - 1;
-    assert((align & mask) == 0);
+    assert(ZSTD_isPower2(align));
    return (size + mask) & ~mask;
 }

@ -222,7 +219,7 @@ MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) {
 * to figure out how much space you need for the matchState tables. Everything
 * else is though.
 *
- * Do not use for sizing aligned buffers. Instead, use ZSTD_cwksp_aligned_alloc_size().
+ * Do not use for sizing aligned buffers. Instead, use ZSTD_cwksp_aligned64_alloc_size().
 */
 MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
    if (size == 0)
@ -234,12 +231,16 @@ MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
 #endif
 }

+MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size, size_t alignment) {
+    return ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(size, alignment));
+}
+
 /**
 * Returns an adjusted alloc size that is the nearest larger multiple of 64 bytes.
 * Used to determine the number of bytes required for a given "aligned".
 */
-MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) {
-    return ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(size, ZSTD_CWKSP_ALIGNMENT_BYTES));
+MEM_STATIC size_t ZSTD_cwksp_aligned64_alloc_size(size_t size) {
+    return ZSTD_cwksp_aligned_alloc_size(size, ZSTD_CWKSP_ALIGNMENT_BYTES);
 }

 /**
@ -262,7 +263,7 @@ MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) {
 MEM_STATIC size_t ZSTD_cwksp_bytes_to_align_ptr(void* ptr, const size_t alignBytes) {
    size_t const alignBytesMask = alignBytes - 1;
    size_t const bytes = (alignBytes - ((size_t)ptr & (alignBytesMask))) & alignBytesMask;
-    assert((alignBytes & alignBytesMask) == 0);
+    assert(ZSTD_isPower2(alignBytes));
    assert(bytes < alignBytes);
    return bytes;
 }
@ -271,8 +272,12 @@ MEM_STATIC size_t ZSTD_cwksp_bytes_to_align_ptr(void* ptr, const size_t alignByt
 * Returns the initial value for allocStart which is used to determine the position from
 * which we can allocate from the end of the workspace.
 */
-MEM_STATIC void*  ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws) {
-    return (void*)((size_t)ws->workspaceEnd & ~(ZSTD_CWKSP_ALIGNMENT_BYTES-1));
+MEM_STATIC void*  ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws)
+{
+    char* endPtr = (char*)ws->workspaceEnd;
+    assert(ZSTD_isPower2(ZSTD_CWKSP_ALIGNMENT_BYTES));
+    endPtr = endPtr - ((size_t)endPtr % ZSTD_CWKSP_ALIGNMENT_BYTES);
+    return (void*)endPtr;
 }

 /**
@ -287,7 +292,7 @@ ZSTD_cwksp_reserve_internal_buffer_space(ZSTD_cwksp* ws, size_t const bytes)
 {
    void* const alloc = (BYTE*)ws->allocStart - bytes;
    void* const bottom = ws->tableEnd;
-    DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining",
+    DEBUGLOG(5, "cwksp: reserving [0x%p]:%zd bytes; %zd bytes remaining",
        alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
    ZSTD_cwksp_assert_internal_consistency(ws);
    assert(alloc >= bottom);
@ -404,7 +409,7 @@ MEM_STATIC void* ZSTD_cwksp_reserve_aligned_init_once(ZSTD_cwksp* ws, size_t byt
 {
    size_t const alignedBytes = ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES);
    void* ptr = ZSTD_cwksp_reserve_internal(ws, alignedBytes, ZSTD_cwksp_alloc_aligned_init_once);
-    assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
+    assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1)) == 0);
    if(ptr && ptr < ws->initOnceStart) {
        /* We assume the memory following the current allocation is either:
         * 1. Not usable as initOnce memory (end of workspace)
@ -424,11 +429,12 @@ MEM_STATIC void* ZSTD_cwksp_reserve_aligned_init_once(ZSTD_cwksp* ws, size_t byt
 /**
 * Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
 */
-MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes)
+MEM_STATIC void* ZSTD_cwksp_reserve_aligned64(ZSTD_cwksp* ws, size_t bytes)
 {
-    void* ptr = ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES),
-                                            ZSTD_cwksp_alloc_aligned);
-    assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
+    void* const ptr = ZSTD_cwksp_reserve_internal(ws,
+                        ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES),
+                        ZSTD_cwksp_alloc_aligned);
+    assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1)) == 0);
    return ptr;
 }

@ -474,7 +480,7 @@ MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes)
 #endif

    assert((bytes & (ZSTD_CWKSP_ALIGNMENT_BYTES-1)) == 0);
-    assert(((size_t)alloc & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
+    assert(((size_t)alloc & (ZSTD_CWKSP_ALIGNMENT_BYTES-1)) == 0);
    return alloc;
 }

@ -520,6 +526,20 @@ MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes)

    return alloc;
 }
+/**
+ * with alignment control
+ * Note : should happen only once, at workspace first initialization
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_object_aligned(ZSTD_cwksp* ws, size_t byteSize, size_t alignment)
+{
+    size_t const mask = alignment - 1;
+    size_t const surplus = (alignment > sizeof(void*)) ? alignment - sizeof(void*) : 0;
+    void* const start = ZSTD_cwksp_reserve_object(ws, byteSize + surplus);
+    if (start == NULL) return NULL;
+    if (surplus == 0) return start;
+    assert(ZSTD_isPower2(alignment));
+    return (void*)(((size_t)start + surplus) & ~mask);
+}

 MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws)
 {
@ -577,7 +597,8 @@ MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) {
 * Invalidates table allocations.
 * All other allocations remain valid.
 */
-MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) {
+MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws)
+{
    DEBUGLOG(4, "cwksp: clearing tables!");

 #if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
@ -741,8 +762,4 @@ MEM_STATIC void ZSTD_cwksp_bump_oversized_duration(
    }
 }

-#if defined (__cplusplus)
-}
-#endif
-
 #endif /* ZSTD_CWKSP_H */
--- a/thirdparty/zstd/compress/zstd_double_fast.c
+++ b/thirdparty/zstd/compress/zstd_double_fast.c
@ -15,7 +15,7 @@

 static
 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms,
+void ZSTD_fillDoubleHashTableForCDict(ZSTD_MatchState_t* ms,
                              void const* end, ZSTD_dictTableLoadMethod_e dtlm)
 {
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
@ -53,7 +53,7 @@ void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms,

 static
 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms,
+void ZSTD_fillDoubleHashTableForCCtx(ZSTD_MatchState_t* ms,
                              void const* end, ZSTD_dictTableLoadMethod_e dtlm)
 {
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
@ -87,7 +87,7 @@ void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms,
        }   }
 }

-void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
+void ZSTD_fillDoubleHashTable(ZSTD_MatchState_t* ms,
                        const void* const end,
                        ZSTD_dictTableLoadMethod_e dtlm,
                        ZSTD_tableFillPurpose_e tfp)
@ -103,7 +103,7 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
 FORCE_INLINE_TEMPLATE
 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 size_t ZSTD_compressBlock_doubleFast_noDict_generic(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize, U32 const mls /* template */)
 {
    ZSTD_compressionParameters const* cParams = &ms->cParams;
@ -142,9 +142,14 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
    const BYTE* matchl0; /* the long match for ip */
    const BYTE* matchs0; /* the short match for ip */
    const BYTE* matchl1; /* the long match for ip1 */
+    const BYTE* matchs0_safe; /* matchs0 or safe address */

    const BYTE* ip = istart; /* the current position */
    const BYTE* ip1; /* the next position */
+    /* Array of ~random data, should have low probability of matching data
+     * we load from here instead of from tables, if matchl0/matchl1 are
+     * invalid indices. Used to avoid unpredictable branches. */
+    const BYTE dummy[] = {0x12,0x34,0x56,0x78,0x9a,0xbc,0xde,0xf0,0xe2,0xb4};

    DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic");

@ -191,24 +196,29 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(

            hl1 = ZSTD_hashPtr(ip1, hBitsL, 8);

-            if (idxl0 > prefixLowestIndex) {
+            /* idxl0 > prefixLowestIndex is a (somewhat) unpredictable branch.
+             * However expression below complies into conditional move. Since
+             * match is unlikely and we only *branch* on idxl0 > prefixLowestIndex
+             * if there is a match, all branches become predictable. */
+            {   const BYTE*  const matchl0_safe = ZSTD_selectAddr(idxl0, prefixLowestIndex, matchl0, &dummy[0]);
+
                /* check prefix long match */
-                if (MEM_read64(matchl0) == MEM_read64(ip)) {
+                if (MEM_read64(matchl0_safe) == MEM_read64(ip) && matchl0_safe == matchl0) {
                    mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8;
                    offset = (U32)(ip-matchl0);
                    while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */
                    goto _match_found;
-                }
-            }
+            }   }

            idxl1 = hashLong[hl1];
            matchl1 = base + idxl1;

-            if (idxs0 > prefixLowestIndex) {
-                /* check prefix short match */
-                if (MEM_read32(matchs0) == MEM_read32(ip)) {
-                    goto _search_next_long;
-                }
+            /* Same optimization as matchl0 above */
+            matchs0_safe = ZSTD_selectAddr(idxs0, prefixLowestIndex, matchs0, &dummy[0]);
+
+            /* check prefix short match */
+            if(MEM_read32(matchs0_safe) == MEM_read32(ip) && matchs0_safe == matchs0) {
+                  goto _search_next_long;
            }

            if (ip1 >= nextStep) {
@ -242,21 +252,23 @@ _cleanup:

 _search_next_long:

-        /* check prefix long +1 match */
-        if (idxl1 > prefixLowestIndex) {
-            if (MEM_read64(matchl1) == MEM_read64(ip1)) {
+        /* short match found: let's check for a longer one */
+        mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4;
+        offset = (U32)(ip - matchs0);
+
+        /* check long match at +1 position */
+        if ((idxl1 > prefixLowestIndex) && (MEM_read64(matchl1) == MEM_read64(ip1))) {
+            size_t const l1len = ZSTD_count(ip1+8, matchl1+8, iend) + 8;
+            if (l1len > mLength) {
+                /* use the long match instead */
                ip = ip1;
-                mLength = ZSTD_count(ip+8, matchl1+8, iend) + 8;
+                mLength = l1len;
                offset = (U32)(ip-matchl1);
-                while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */
-                goto _match_found;
+                matchs0 = matchl1;
            }
        }

-        /* if no long +1 match, explore the short match we found */
-        mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4;
-        offset = (U32)(ip - matchs0);
-        while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */
+        while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* complete backward */

        /* fall-through */

@ -314,7 +326,7 @@ _match_stored:
 FORCE_INLINE_TEMPLATE
 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize,
        U32 const mls /* template */)
 {
@ -335,7 +347,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
    const BYTE* const ilimit = iend - HASH_READ_SIZE;
    U32 offset_1=rep[0], offset_2=rep[1];

-    const ZSTD_matchState_t* const dms = ms->dictMatchState;
+    const ZSTD_MatchState_t* const dms = ms->dictMatchState;
    const ZSTD_compressionParameters* const dictCParams = &dms->cParams;
    const U32* const dictHashLong  = dms->hashTable;
    const U32* const dictHashSmall = dms->chainTable;
@ -392,7 +404,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
        hashLong[h2] = hashSmall[h] = curr;   /* update hash tables */

        /* check repcode */
-        if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
+        if ((ZSTD_index_overlap_check(prefixLowestIndex, repIndex))
            && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
            const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
@ -401,14 +413,12 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
            goto _match_stored;
        }

-        if (matchIndexL > prefixLowestIndex) {
+        if ((matchIndexL >= prefixLowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
            /* check prefix long match */
-            if (MEM_read64(matchLong) == MEM_read64(ip)) {
-                mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
-                offset = (U32)(ip-matchLong);
-                while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
-                goto _match_found;
-            }
+            mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
+            offset = (U32)(ip-matchLong);
+            while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
+            goto _match_found;
        } else if (dictTagsMatchL) {
            /* check dictMatchState long match */
            U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS;
@ -423,7 +433,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
        }   }

        if (matchIndexS > prefixLowestIndex) {
-            /* check prefix short match */
+            /* short match  candidate */
            if (MEM_read32(match) == MEM_read32(ip)) {
                goto _search_next_long;
            }
@ -453,14 +463,12 @@ _search_next_long:
            hashLong[hl3] = curr + 1;

            /* check prefix long +1 match */
-            if (matchIndexL3 > prefixLowestIndex) {
-                if (MEM_read64(matchL3) == MEM_read64(ip+1)) {
-                    mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
-                    ip++;
-                    offset = (U32)(ip-matchL3);
-                    while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
-                    goto _match_found;
-                }
+            if ((matchIndexL3 >= prefixLowestIndex) && (MEM_read64(matchL3) == MEM_read64(ip+1))) {
+                mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
+                ip++;
+                offset = (U32)(ip-matchL3);
+                while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
+                goto _match_found;
            } else if (dictTagsMatchL3) {
                /* check dict long +1 match */
                U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS;
@ -513,7 +521,7 @@ _match_stored:
                const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ?
                        dictBase + repIndex2 - dictIndexDelta :
                        base + repIndex2;
-                if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
+                if ( (ZSTD_index_overlap_check(prefixLowestIndex, repIndex2))
                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
                    const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
@ -540,7 +548,7 @@ _match_stored:

 #define ZSTD_GEN_DFAST_FN(dictMode, mls)                                                                 \
    static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls(                                      \
-            ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],                          \
+            ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],                          \
            void const* src, size_t srcSize)                                                             \
    {                                                                                                    \
        return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \
@ -558,7 +566,7 @@ ZSTD_GEN_DFAST_FN(dictMatchState, 7)


 size_t ZSTD_compressBlock_doubleFast(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
    const U32 mls = ms->cParams.minMatch;
@ -578,7 +586,7 @@ size_t ZSTD_compressBlock_doubleFast(


 size_t ZSTD_compressBlock_doubleFast_dictMatchState(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
    const U32 mls = ms->cParams.minMatch;
@ -600,7 +608,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState(
 static
 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 size_t ZSTD_compressBlock_doubleFast_extDict_generic(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize,
        U32 const mls /* template */)
 {
@ -651,7 +659,7 @@ size_t ZSTD_compressBlock_doubleFast_extDict_generic(
        size_t mLength;
        hashSmall[hSmall] = hashLong[hLong] = curr;   /* update hash table */

-        if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
+        if (((ZSTD_index_overlap_check(prefixStartIndex, repIndex))
            & (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
          && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
            const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
@ -719,7 +727,7 @@ size_t ZSTD_compressBlock_doubleFast_extDict_generic(
                U32 const current2 = (U32)(ip-base);
                U32 const repIndex2 = current2 - offset_2;
                const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
-                if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3)   /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
+                if ( ((ZSTD_index_overlap_check(prefixStartIndex, repIndex2))
                    & (offset_2 <= current2 - dictStartIndex))
                  && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
                    const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
@ -749,7 +757,7 @@ ZSTD_GEN_DFAST_FN(extDict, 6)
 ZSTD_GEN_DFAST_FN(extDict, 7)

 size_t ZSTD_compressBlock_doubleFast_extDict(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
    U32 const mls = ms->cParams.minMatch;
--- a/thirdparty/zstd/compress/zstd_double_fast.h
+++ b/thirdparty/zstd/compress/zstd_double_fast.h
@ -11,27 +11,23 @@
 #ifndef ZSTD_DOUBLE_FAST_H
 #define ZSTD_DOUBLE_FAST_H

-#if defined (__cplusplus)
-extern "C" {
-#endif
-
 #include "../common/mem.h"      /* U32 */
 #include "zstd_compress_internal.h"     /* ZSTD_CCtx, size_t */

 #ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR

-void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
+void ZSTD_fillDoubleHashTable(ZSTD_MatchState_t* ms,
                              void const* end, ZSTD_dictTableLoadMethod_e dtlm,
                              ZSTD_tableFillPurpose_e tfp);

 size_t ZSTD_compressBlock_doubleFast(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_doubleFast_dictMatchState(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_doubleFast_extDict(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);

 #define ZSTD_COMPRESSBLOCK_DOUBLEFAST ZSTD_compressBlock_doubleFast
@ -43,8 +39,4 @@ size_t ZSTD_compressBlock_doubleFast_extDict(
 #define ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT NULL
 #endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */

-#if defined (__cplusplus)
-}
-#endif
-
 #endif /* ZSTD_DOUBLE_FAST_H */
--- a/thirdparty/zstd/compress/zstd_fast.c
+++ b/thirdparty/zstd/compress/zstd_fast.c
@ -13,7 +13,7 @@

 static
 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-void ZSTD_fillHashTableForCDict(ZSTD_matchState_t* ms,
+void ZSTD_fillHashTableForCDict(ZSTD_MatchState_t* ms,
                        const void* const end,
                        ZSTD_dictTableLoadMethod_e dtlm)
 {
@ -45,12 +45,12 @@ void ZSTD_fillHashTableForCDict(ZSTD_matchState_t* ms,
                size_t const hashAndTag = ZSTD_hashPtr(ip + p, hBits, mls);
                if (hashTable[hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) {  /* not yet filled */
                    ZSTD_writeTaggedIndex(hashTable, hashAndTag, curr + p);
-                }   }   }   }
+    }   }   }   }
 }

 static
 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-void ZSTD_fillHashTableForCCtx(ZSTD_matchState_t* ms,
+void ZSTD_fillHashTableForCCtx(ZSTD_MatchState_t* ms,
                        const void* const end,
                        ZSTD_dictTableLoadMethod_e dtlm)
 {
@ -84,7 +84,7 @@ void ZSTD_fillHashTableForCCtx(ZSTD_matchState_t* ms,
    }   }   }   }
 }

-void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
+void ZSTD_fillHashTable(ZSTD_MatchState_t* ms,
                        const void* const end,
                        ZSTD_dictTableLoadMethod_e dtlm,
                        ZSTD_tableFillPurpose_e tfp)
@ -97,6 +97,50 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
 }


+typedef int (*ZSTD_match4Found) (const BYTE* currentPtr, const BYTE* matchAddress, U32 matchIdx, U32 idxLowLimit);
+
+static int
+ZSTD_match4Found_cmov(const BYTE* currentPtr, const BYTE* matchAddress, U32 matchIdx, U32 idxLowLimit)
+{
+    /* Array of ~random data, should have low probability of matching data.
+     * Load from here if the index is invalid.
+     * Used to avoid unpredictable branches. */
+    static const BYTE dummy[] = {0x12,0x34,0x56,0x78};
+
+    /* currentIdx >= lowLimit is a (somewhat) unpredictable branch.
+     * However expression below compiles into conditional move.
+     */
+    const BYTE* mvalAddr = ZSTD_selectAddr(matchIdx, idxLowLimit, matchAddress, dummy);
+    /* Note: this used to be written as : return test1 && test2;
+     * Unfortunately, once inlined, these tests become branches,
+     * in which case it becomes critical that they are executed in the right order (test1 then test2).
+     * So we have to write these tests in a specific manner to ensure their ordering.
+     */
+    if (MEM_read32(currentPtr) != MEM_read32(mvalAddr)) return 0;
+    /* force ordering of these tests, which matters once the function is inlined, as they become branches */
+#if defined(__GNUC__)
+    __asm__("");
+#endif
+    return matchIdx >= idxLowLimit;
+}
+
+static int
+ZSTD_match4Found_branch(const BYTE* currentPtr, const BYTE* matchAddress, U32 matchIdx, U32 idxLowLimit)
+{
+    /* using a branch instead of a cmov,
+     * because it's faster in scenarios where matchIdx >= idxLowLimit is generally true,
+     * aka almost all candidates are within range */
+    U32 mval;
+    if (matchIdx >= idxLowLimit) {
+        mval = MEM_read32(matchAddress);
+    } else {
+        mval = MEM_read32(currentPtr) ^ 1; /* guaranteed to not match. */
+    }
+
+    return (MEM_read32(currentPtr) == mval);
+}
+
+
 /**
 * If you squint hard enough (and ignore repcodes), the search operation at any
 * given position is broken into 4 stages:
@ -146,15 +190,14 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
 FORCE_INLINE_TEMPLATE
 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 size_t ZSTD_compressBlock_fast_noDict_generic(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize,
-        U32 const mls, U32 const hasStep)
+        U32 const mls, int useCmov)
 {
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
    U32* const hashTable = ms->hashTable;
    U32 const hlog = cParams->hashLog;
-    /* support stepSize of 0 */
-    size_t const stepSize = hasStep ? (cParams->targetLength + !(cParams->targetLength) + 1) : 2;
+    size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1; /* min 2 */
    const BYTE* const base = ms->window.base;
    const BYTE* const istart = (const BYTE*)src;
    const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
@ -176,8 +219,7 @@ size_t ZSTD_compressBlock_fast_noDict_generic(

    size_t hash0; /* hash for ip0 */
    size_t hash1; /* hash for ip1 */
-    U32 idx; /* match idx for ip0 */
-    U32 mval; /* src value at match idx */
+    U32 matchIdx; /* match idx for ip0 */

    U32 offcode;
    const BYTE* match0;
@ -190,6 +232,7 @@ size_t ZSTD_compressBlock_fast_noDict_generic(
    size_t step;
    const BYTE* nextStep;
    const size_t kStepIncr = (1 << (kSearchStrength - 1));
+    const ZSTD_match4Found matchFound = useCmov ? ZSTD_match4Found_cmov : ZSTD_match4Found_branch;

    DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
    ip0 += (ip0 == prefixStart);
@ -218,7 +261,7 @@ _start: /* Requires: ip0 */
    hash0 = ZSTD_hashPtr(ip0, hlog, mls);
    hash1 = ZSTD_hashPtr(ip1, hlog, mls);

-    idx = hashTable[hash0];
+    matchIdx = hashTable[hash0];

    do {
        /* load repcode match for ip[2]*/
@ -238,35 +281,25 @@ _start: /* Requires: ip0 */
            offcode = REPCODE1_TO_OFFBASE;
            mLength += 4;

-            /* First write next hash table entry; we've already calculated it.
-             * This write is known to be safe because the ip1 is before the
+            /* Write next hash table entry: it's already calculated.
+             * This write is known to be safe because ip1 is before the
             * repcode (ip2). */
            hashTable[hash1] = (U32)(ip1 - base);

            goto _match;
        }

-        /* load match for ip[0] */
-        if (idx >= prefixStartIndex) {
-            mval = MEM_read32(base + idx);
-        } else {
-            mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */
-        }
-
-        /* check match at ip[0] */
-        if (MEM_read32(ip0) == mval) {
-            /* found a match! */
-
-            /* First write next hash table entry; we've already calculated it.
-             * This write is known to be safe because the ip1 == ip0 + 1, so
-             * we know we will resume searching after ip1 */
+         if (matchFound(ip0, base + matchIdx, matchIdx, prefixStartIndex)) {
+            /* Write next hash table entry (it's already calculated).
+            * This write is known to be safe because the ip1 == ip0 + 1,
+            * so searching will resume after ip1 */
            hashTable[hash1] = (U32)(ip1 - base);

            goto _offset;
        }

        /* lookup ip[1] */
-        idx = hashTable[hash1];
+        matchIdx = hashTable[hash1];

        /* hash ip[2] */
        hash0 = hash1;
@ -281,36 +314,19 @@ _start: /* Requires: ip0 */
        current0 = (U32)(ip0 - base);
        hashTable[hash0] = current0;

-        /* load match for ip[0] */
-        if (idx >= prefixStartIndex) {
-            mval = MEM_read32(base + idx);
-        } else {
-            mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */
-        }
-
-        /* check match at ip[0] */
-        if (MEM_read32(ip0) == mval) {
-            /* found a match! */
-
-            /* first write next hash table entry; we've already calculated it */
+         if (matchFound(ip0, base + matchIdx, matchIdx, prefixStartIndex)) {
+            /* Write next hash table entry, since it's already calculated */
            if (step <= 4) {
-                /* We need to avoid writing an index into the hash table >= the
-                 * position at which we will pick up our searching after we've
-                 * taken this match.
-                 *
-                 * The minimum possible match has length 4, so the earliest ip0
-                 * can be after we take this match will be the current ip0 + 4.
-                 * ip1 is ip0 + step - 1. If ip1 is >= ip0 + 4, we can't safely
-                 * write this position.
-                 */
+                /* Avoid writing an index if it's >= position where search will resume.
+                * The minimum possible match has length 4, so search can resume at ip0 + 4.
+                */
                hashTable[hash1] = (U32)(ip1 - base);
            }
-
            goto _offset;
        }

        /* lookup ip[1] */
-        idx = hashTable[hash1];
+        matchIdx = hashTable[hash1];

        /* hash ip[2] */
        hash0 = hash1;
@ -332,7 +348,7 @@ _start: /* Requires: ip0 */
    } while (ip3 < ilimit);

 _cleanup:
-    /* Note that there are probably still a couple positions we could search.
+    /* Note that there are probably still a couple positions one could search.
     * However, it seems to be a meaningful performance hit to try to search
     * them. So let's not. */

@ -361,7 +377,7 @@ _cleanup:
 _offset: /* Requires: ip0, idx */

    /* Compute the offset code. */
-    match0 = base + idx;
+    match0 = base + matchIdx;
    rep_offset2 = rep_offset1;
    rep_offset1 = (U32)(ip0-match0);
    offcode = OFFSET_TO_OFFBASE(rep_offset1);
@ -406,12 +422,12 @@ _match: /* Requires: ip0, match0, offcode */
    goto _start;
 }

-#define ZSTD_GEN_FAST_FN(dictMode, mls, step)                                                            \
-    static size_t ZSTD_compressBlock_fast_##dictMode##_##mls##_##step(                                      \
-            ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],                    \
+#define ZSTD_GEN_FAST_FN(dictMode, mml, cmov)                                                       \
+    static size_t ZSTD_compressBlock_fast_##dictMode##_##mml##_##cmov(                              \
+            ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],                    \
            void const* src, size_t srcSize)                                                       \
    {                                                                                              \
-        return ZSTD_compressBlock_fast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls, step); \
+        return ZSTD_compressBlock_fast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mml, cmov); \
    }

 ZSTD_GEN_FAST_FN(noDict, 4, 1)
@ -425,13 +441,15 @@ ZSTD_GEN_FAST_FN(noDict, 6, 0)
 ZSTD_GEN_FAST_FN(noDict, 7, 0)

 size_t ZSTD_compressBlock_fast(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
-    U32 const mls = ms->cParams.minMatch;
+    U32 const mml = ms->cParams.minMatch;
+    /* use cmov when "candidate in range" branch is likely unpredictable */
+    int const useCmov = ms->cParams.windowLog < 19;
    assert(ms->dictMatchState == NULL);
-    if (ms->cParams.targetLength > 1) {
-        switch(mls)
+    if (useCmov) {
+        switch(mml)
        {
        default: /* includes case 3 */
        case 4 :
@ -444,7 +462,8 @@ size_t ZSTD_compressBlock_fast(
            return ZSTD_compressBlock_fast_noDict_7_1(ms, seqStore, rep, src, srcSize);
        }
    } else {
-        switch(mls)
+        /* use a branch instead */
+        switch(mml)
        {
        default: /* includes case 3 */
        case 4 :
@ -456,14 +475,13 @@ size_t ZSTD_compressBlock_fast(
        case 7 :
            return ZSTD_compressBlock_fast_noDict_7_0(ms, seqStore, rep, src, srcSize);
        }
-
    }
 }

 FORCE_INLINE_TEMPLATE
 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 size_t ZSTD_compressBlock_fast_dictMatchState_generic(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
 {
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
@ -482,7 +500,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
    const BYTE* const ilimit = iend - HASH_READ_SIZE;
    U32 offset_1=rep[0], offset_2=rep[1];

-    const ZSTD_matchState_t* const dms = ms->dictMatchState;
+    const ZSTD_MatchState_t* const dms = ms->dictMatchState;
    const ZSTD_compressionParameters* const dictCParams = &dms->cParams ;
    const U32* const dictHashTable = dms->hashTable;
    const U32 dictStartIndex       = dms->window.dictLimit;
@ -546,8 +564,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
            size_t const dictHashAndTag1 = ZSTD_hashPtr(ip1, dictHBits, mls);
            hashTable[hash0] = curr;   /* update hash table */

-            if (((U32) ((prefixStartIndex - 1) - repIndex) >=
-                 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
+            if ((ZSTD_index_overlap_check(prefixStartIndex, repIndex))
                && (MEM_read32(repMatch) == MEM_read32(ip0 + 1))) {
                const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
                mLength = ZSTD_count_2segments(ip0 + 1 + 4, repMatch + 4, iend, repMatchEnd, prefixStart) + 4;
@ -580,8 +597,8 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
                }
            }

-            if (matchIndex > prefixStartIndex && MEM_read32(match) == MEM_read32(ip0)) {
-                /* found a regular match */
+            if (ZSTD_match4Found_cmov(ip0, match, matchIndex, prefixStartIndex)) {
+                /* found a regular match of size >= 4 */
                U32 const offset = (U32) (ip0 - match);
                mLength = ZSTD_count(ip0 + 4, match + 4, iend) + 4;
                while (((ip0 > anchor) & (match > prefixStart))
@ -631,7 +648,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
                const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
                        dictBase - dictIndexDelta + repIndex2 :
                        base + repIndex2;
-                if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
+                if ( (ZSTD_index_overlap_check(prefixStartIndex, repIndex2))
                   && (MEM_read32(repMatch2) == MEM_read32(ip0))) {
                    const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
                    size_t const repLength2 = ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
@ -667,7 +684,7 @@ ZSTD_GEN_FAST_FN(dictMatchState, 6, 0)
 ZSTD_GEN_FAST_FN(dictMatchState, 7, 0)

 size_t ZSTD_compressBlock_fast_dictMatchState(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
    U32 const mls = ms->cParams.minMatch;
@ -690,7 +707,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState(
 static
 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 size_t ZSTD_compressBlock_fast_extDict_generic(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
 {
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
@ -925,7 +942,7 @@ _match: /* Requires: ip0, match0, offcode, matchEnd */
        while (ip0 <= ilimit) {
            U32 const repIndex2 = (U32)(ip0-base) - offset_2;
            const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
-            if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 > 0))  /* intentional underflow */
+            if ( ((ZSTD_index_overlap_check(prefixStartIndex, repIndex2)) & (offset_2 > 0))
                 && (MEM_read32(repMatch2) == MEM_read32(ip0)) ) {
                const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
                size_t const repLength2 = ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
@ -948,7 +965,7 @@ ZSTD_GEN_FAST_FN(extDict, 6, 0)
 ZSTD_GEN_FAST_FN(extDict, 7, 0)

 size_t ZSTD_compressBlock_fast_extDict(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
    U32 const mls = ms->cParams.minMatch;
--- a/thirdparty/zstd/compress/zstd_fast.h
+++ b/thirdparty/zstd/compress/zstd_fast.h
@ -11,28 +11,20 @@
 #ifndef ZSTD_FAST_H
 #define ZSTD_FAST_H

-#if defined (__cplusplus)
-extern "C" {
-#endif
-
 #include "../common/mem.h"      /* U32 */
 #include "zstd_compress_internal.h"

-void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
+void ZSTD_fillHashTable(ZSTD_MatchState_t* ms,
                        void const* end, ZSTD_dictTableLoadMethod_e dtlm,
                        ZSTD_tableFillPurpose_e tfp);
 size_t ZSTD_compressBlock_fast(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_fast_dictMatchState(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_fast_extDict(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);

-#if defined (__cplusplus)
-}
-#endif
-
 #endif /* ZSTD_FAST_H */
--- a/thirdparty/zstd/compress/zstd_lazy.c
+++ b/thirdparty/zstd/compress/zstd_lazy.c
@ -26,7 +26,7 @@

 static
 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-void ZSTD_updateDUBT(ZSTD_matchState_t* ms,
+void ZSTD_updateDUBT(ZSTD_MatchState_t* ms,
                const BYTE* ip, const BYTE* iend,
                U32 mls)
 {
@ -71,7 +71,7 @@ void ZSTD_updateDUBT(ZSTD_matchState_t* ms,
 *  doesn't fail */
 static
 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-void ZSTD_insertDUBT1(const ZSTD_matchState_t* ms,
+void ZSTD_insertDUBT1(const ZSTD_MatchState_t* ms,
                 U32 curr, const BYTE* inputEnd,
                 U32 nbCompares, U32 btLow,
                 const ZSTD_dictMode_e dictMode)
@ -162,7 +162,7 @@ void ZSTD_insertDUBT1(const ZSTD_matchState_t* ms,
 static
 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 size_t ZSTD_DUBT_findBetterDictMatch (
-        const ZSTD_matchState_t* ms,
+        const ZSTD_MatchState_t* ms,
        const BYTE* const ip, const BYTE* const iend,
        size_t* offsetPtr,
        size_t bestLength,
@ -170,7 +170,7 @@ size_t ZSTD_DUBT_findBetterDictMatch (
        U32 const mls,
        const ZSTD_dictMode_e dictMode)
 {
-    const ZSTD_matchState_t * const dms = ms->dictMatchState;
+    const ZSTD_MatchState_t * const dms = ms->dictMatchState;
    const ZSTD_compressionParameters* const dmsCParams = &dms->cParams;
    const U32 * const dictHashTable = dms->hashTable;
    U32         const hashLog = dmsCParams->hashLog;
@ -240,7 +240,7 @@ size_t ZSTD_DUBT_findBetterDictMatch (

 static
 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-size_t ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
+size_t ZSTD_DUBT_findBestMatch(ZSTD_MatchState_t* ms,
                        const BYTE* const ip, const BYTE* const iend,
                        size_t* offBasePtr,
                        U32 const mls,
@ -392,7 +392,7 @@ size_t ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
 /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
 FORCE_INLINE_TEMPLATE
 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-size_t ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
+size_t ZSTD_BtFindBestMatch( ZSTD_MatchState_t* ms,
                const BYTE* const ip, const BYTE* const iLimit,
                      size_t* offBasePtr,
                const U32 mls /* template */,
@ -408,7 +408,7 @@ size_t ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
 * Dedicated dict search
 ***********************************/

-void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip)
+void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_MatchState_t* ms, const BYTE* const ip)
 {
    const BYTE* const base = ms->window.base;
    U32 const target = (U32)(ip - base);
@ -527,7 +527,7 @@ void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const B
 */
 FORCE_INLINE_TEMPLATE
 size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nbAttempts,
-                                            const ZSTD_matchState_t* const dms,
+                                            const ZSTD_MatchState_t* const dms,
                                            const BYTE* const ip, const BYTE* const iLimit,
                                            const BYTE* const prefixStart, const U32 curr,
                                            const U32 dictLimit, const size_t ddsIdx) {
@ -630,7 +630,7 @@ size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nb
 FORCE_INLINE_TEMPLATE
 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 U32 ZSTD_insertAndFindFirstIndex_internal(
-                        ZSTD_matchState_t* ms,
+                        ZSTD_MatchState_t* ms,
                        const ZSTD_compressionParameters* const cParams,
                        const BYTE* ip, U32 const mls, U32 const lazySkipping)
 {
@ -656,7 +656,7 @@ U32 ZSTD_insertAndFindFirstIndex_internal(
    return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
 }

-U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
+U32 ZSTD_insertAndFindFirstIndex(ZSTD_MatchState_t* ms, const BYTE* ip) {
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
    return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch, /* lazySkipping*/ 0);
 }
@ -665,7 +665,7 @@ U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
 FORCE_INLINE_TEMPLATE
 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 size_t ZSTD_HcFindBestMatch(
-                        ZSTD_matchState_t* ms,
+                        ZSTD_MatchState_t* ms,
                        const BYTE* const ip, const BYTE* const iLimit,
                        size_t* offsetPtr,
                        const U32 mls, const ZSTD_dictMode_e dictMode)
@ -689,7 +689,7 @@ size_t ZSTD_HcFindBestMatch(
    U32 nbAttempts = 1U << cParams->searchLog;
    size_t ml=4-1;

-    const ZSTD_matchState_t* const dms = ms->dictMatchState;
+    const ZSTD_MatchState_t* const dms = ms->dictMatchState;
    const U32 ddsHashLog = dictMode == ZSTD_dedicatedDictSearch
                         ? dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
    const size_t ddsIdx = dictMode == ZSTD_dedicatedDictSearch
@ -834,7 +834,7 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, BYTE const* t
 */
 FORCE_INLINE_TEMPLATE
 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const BYTE* base,
+void ZSTD_row_fillHashCache(ZSTD_MatchState_t* ms, const BYTE* base,
                                   U32 const rowLog, U32 const mls,
                                   U32 idx, const BYTE* const iLimit)
 {
@ -882,7 +882,7 @@ U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable,
 */
 FORCE_INLINE_TEMPLATE
 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
+void ZSTD_row_update_internalImpl(ZSTD_MatchState_t* ms,
                                  U32 updateStartIdx, U32 const updateEndIdx,
                                  U32 const mls, U32 const rowLog,
                                  U32 const rowMask, U32 const useCache)
@ -913,7 +913,7 @@ void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
 */
 FORCE_INLINE_TEMPLATE
 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip,
+void ZSTD_row_update_internal(ZSTD_MatchState_t* ms, const BYTE* ip,
                              U32 const mls, U32 const rowLog,
                              U32 const rowMask, U32 const useCache)
 {
@ -946,7 +946,7 @@ void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip,
 * External wrapper for ZSTD_row_update_internal(). Used for filling the hashtable during dictionary
 * processing.
 */
-void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip) {
+void ZSTD_row_update(ZSTD_MatchState_t* const ms, const BYTE* ip) {
    const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6);
    const U32 rowMask = (1u << rowLog) - 1;
    const U32 mls = MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */);
@ -1123,9 +1123,9 @@ ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 headGr

 /* The high-level approach of the SIMD row based match finder is as follows:
 * - Figure out where to insert the new entry:
- *      - Generate a hash for current input posistion and split it into a one byte of tag and `rowHashLog` bits of index.
- *           - The hash is salted by a value that changes on every contex reset, so when the same table is used
- *             we will avoid collisions that would otherwise slow us down by intorducing phantom matches.
+ *      - Generate a hash for current input position and split it into a one byte of tag and `rowHashLog` bits of index.
+ *           - The hash is salted by a value that changes on every context reset, so when the same table is used
+ *             we will avoid collisions that would otherwise slow us down by introducing phantom matches.
 *      - The hashTable is effectively split into groups or "rows" of 15 or 31 entries of U32, and the index determines
 *        which row to insert into.
 *      - Determine the correct position within the row to insert the entry into. Each row of 15 or 31 can
@ -1139,7 +1139,7 @@ ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 headGr
 FORCE_INLINE_TEMPLATE
 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 size_t ZSTD_RowFindBestMatch(
-                        ZSTD_matchState_t* ms,
+                        ZSTD_MatchState_t* ms,
                        const BYTE* const ip, const BYTE* const iLimit,
                        size_t* offsetPtr,
                        const U32 mls, const ZSTD_dictMode_e dictMode,
@ -1171,7 +1171,7 @@ size_t ZSTD_RowFindBestMatch(
    U32 hash;

    /* DMS/DDS variables that may be referenced laster */
-    const ZSTD_matchState_t* const dms = ms->dictMatchState;
+    const ZSTD_MatchState_t* const dms = ms->dictMatchState;

    /* Initialize the following variables to satisfy static analyzer */
    size_t ddsIdx = 0;
@ -1340,7 +1340,7 @@ size_t ZSTD_RowFindBestMatch(
 * ZSTD_searchMax() dispatches to the correct implementation function.
 *
 * TODO: The start of the search function involves loading and calculating a
- * bunch of constants from the ZSTD_matchState_t. These computations could be
+ * bunch of constants from the ZSTD_MatchState_t. These computations could be
 * done in an initialization function, and saved somewhere in the match state.
 * Then we could pass a pointer to the saved state instead of the match state,
 * and avoid duplicate computations.
@ -1364,7 +1364,7 @@ size_t ZSTD_RowFindBestMatch(

 #define GEN_ZSTD_BT_SEARCH_FN(dictMode, mls)                                           \
    ZSTD_SEARCH_FN_ATTRS size_t ZSTD_BT_SEARCH_FN(dictMode, mls)(                      \
-            ZSTD_matchState_t* ms,                                                     \
+            ZSTD_MatchState_t* ms,                                                     \
            const BYTE* ip, const BYTE* const iLimit,                                  \
            size_t* offBasePtr)                                                        \
    {                                                                                  \
@ -1374,7 +1374,7 @@ size_t ZSTD_RowFindBestMatch(

 #define GEN_ZSTD_HC_SEARCH_FN(dictMode, mls)                                          \
    ZSTD_SEARCH_FN_ATTRS size_t ZSTD_HC_SEARCH_FN(dictMode, mls)(                     \
-            ZSTD_matchState_t* ms,                                                    \
+            ZSTD_MatchState_t* ms,                                                    \
            const BYTE* ip, const BYTE* const iLimit,                                 \
            size_t* offsetPtr)                                                        \
    {                                                                                 \
@ -1384,7 +1384,7 @@ size_t ZSTD_RowFindBestMatch(

 #define GEN_ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog)                                          \
    ZSTD_SEARCH_FN_ATTRS size_t ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog)(                     \
-            ZSTD_matchState_t* ms,                                                             \
+            ZSTD_MatchState_t* ms,                                                             \
            const BYTE* ip, const BYTE* const iLimit,                                          \
            size_t* offsetPtr)                                                                 \
    {                                                                                          \
@ -1485,7 +1485,7 @@ typedef enum { search_hashChain=0, search_binaryTree=1, search_rowHash=2 } searc
 * If a match is found its offset is stored in @p offsetPtr.
 */
 FORCE_INLINE_TEMPLATE size_t ZSTD_searchMax(
-    ZSTD_matchState_t* ms,
+    ZSTD_MatchState_t* ms,
    const BYTE* ip,
    const BYTE* iend,
    size_t* offsetPtr,
@ -1514,7 +1514,7 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_searchMax(
 FORCE_INLINE_TEMPLATE
 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 size_t ZSTD_compressBlock_lazy_generic(
-                        ZSTD_matchState_t* ms, seqStore_t* seqStore,
+                        ZSTD_MatchState_t* ms, SeqStore_t* seqStore,
                        U32 rep[ZSTD_REP_NUM],
                        const void* src, size_t srcSize,
                        const searchMethod_e searchMethod, const U32 depth,
@ -1537,7 +1537,7 @@ size_t ZSTD_compressBlock_lazy_generic(
    const int isDMS = dictMode == ZSTD_dictMatchState;
    const int isDDS = dictMode == ZSTD_dedicatedDictSearch;
    const int isDxS = isDMS || isDDS;
-    const ZSTD_matchState_t* const dms = ms->dictMatchState;
+    const ZSTD_MatchState_t* const dms = ms->dictMatchState;
    const U32 dictLowestIndex      = isDxS ? dms->window.dictLimit : 0;
    const BYTE* const dictBase     = isDxS ? dms->window.base : NULL;
    const BYTE* const dictLowest   = isDxS ? dictBase + dictLowestIndex : NULL;
@ -1590,7 +1590,7 @@ size_t ZSTD_compressBlock_lazy_generic(
                                && repIndex < prefixLowestIndex) ?
                                   dictBase + (repIndex - dictIndexDelta) :
                                   base + repIndex;
-            if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
+            if ((ZSTD_index_overlap_check(prefixLowestIndex, repIndex))
                && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
                const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
                matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
@ -1642,7 +1642,7 @@ size_t ZSTD_compressBlock_lazy_generic(
                const BYTE* repMatch = repIndex < prefixLowestIndex ?
                               dictBase + (repIndex - dictIndexDelta) :
                               base + repIndex;
-                if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
+                if ((ZSTD_index_overlap_check(prefixLowestIndex, repIndex))
                    && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
                    const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
                    size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
@ -1678,7 +1678,7 @@ size_t ZSTD_compressBlock_lazy_generic(
                    const BYTE* repMatch = repIndex < prefixLowestIndex ?
                                   dictBase + (repIndex - dictIndexDelta) :
                                   base + repIndex;
-                    if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
+                    if ((ZSTD_index_overlap_check(prefixLowestIndex, repIndex))
                        && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
                        const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
                        size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
@ -1740,7 +1740,7 @@ _storeSequence:
                const BYTE* repMatch = repIndex < prefixLowestIndex ?
                        dictBase - dictIndexDelta + repIndex :
                        base + repIndex;
-                if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */)
+                if ( (ZSTD_index_overlap_check(prefixLowestIndex, repIndex))
                   && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
                    const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
                    matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
@ -1782,42 +1782,42 @@ _storeSequence:

 #ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
 size_t ZSTD_compressBlock_greedy(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
 }

 size_t ZSTD_compressBlock_greedy_dictMatchState(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
 }

 size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
 }

 size_t ZSTD_compressBlock_greedy_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict);
 }

 size_t ZSTD_compressBlock_greedy_dictMatchState_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState);
 }

 size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch);
@ -1826,42 +1826,42 @@ size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(

 #ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
 size_t ZSTD_compressBlock_lazy(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
 }

 size_t ZSTD_compressBlock_lazy_dictMatchState(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
 }

 size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
 }

 size_t ZSTD_compressBlock_lazy_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict);
 }

 size_t ZSTD_compressBlock_lazy_dictMatchState_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState);
 }

 size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch);
@ -1870,42 +1870,42 @@ size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(

 #ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
 size_t ZSTD_compressBlock_lazy2(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
 }

 size_t ZSTD_compressBlock_lazy2_dictMatchState(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
 }

 size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
 }

 size_t ZSTD_compressBlock_lazy2_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict);
 }

 size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState);
 }

 size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dedicatedDictSearch);
@ -1914,14 +1914,14 @@ size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(

 #ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
 size_t ZSTD_compressBlock_btlazy2(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
 }

 size_t ZSTD_compressBlock_btlazy2_dictMatchState(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
@ -1935,7 +1935,7 @@ size_t ZSTD_compressBlock_btlazy2_dictMatchState(
 FORCE_INLINE_TEMPLATE
 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 size_t ZSTD_compressBlock_lazy_extDict_generic(
-                        ZSTD_matchState_t* ms, seqStore_t* seqStore,
+                        ZSTD_MatchState_t* ms, SeqStore_t* seqStore,
                        U32 rep[ZSTD_REP_NUM],
                        const void* src, size_t srcSize,
                        const searchMethod_e searchMethod, const U32 depth)
@ -1986,7 +1986,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
            const U32 repIndex = (U32)(curr+1 - offset_1);
            const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
            const BYTE* const repMatch = repBase + repIndex;
-            if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */
+            if ( (ZSTD_index_overlap_check(dictLimit, repIndex))
               & (offset_1 <= curr+1 - windowLow) ) /* note: we are searching at curr+1 */
            if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
                /* repcode detected we should take it */
@ -2027,7 +2027,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
                const U32 repIndex = (U32)(curr - offset_1);
                const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
                const BYTE* const repMatch = repBase + repIndex;
-                if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments  */
+                if ( (ZSTD_index_overlap_check(dictLimit, repIndex))
                   & (offset_1 <= curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
                if (MEM_read32(ip) == MEM_read32(repMatch)) {
                    /* repcode detected */
@ -2059,7 +2059,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
                    const U32 repIndex = (U32)(curr - offset_1);
                    const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
                    const BYTE* const repMatch = repBase + repIndex;
-                    if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments  */
+                    if ( (ZSTD_index_overlap_check(dictLimit, repIndex))
                       & (offset_1 <= curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
                    if (MEM_read32(ip) == MEM_read32(repMatch)) {
                        /* repcode detected */
@ -2113,7 +2113,7 @@ _storeSequence:
            const U32 repIndex = repCurrent - offset_2;
            const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
            const BYTE* const repMatch = repBase + repIndex;
-            if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments  */
+            if ( (ZSTD_index_overlap_check(dictLimit, repIndex))
               & (offset_2 <= repCurrent - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
            if (MEM_read32(ip) == MEM_read32(repMatch)) {
                /* repcode detected we should take it */
@ -2139,14 +2139,14 @@ _storeSequence:

 #ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
 size_t ZSTD_compressBlock_greedy_extDict(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
 }

 size_t ZSTD_compressBlock_greedy_extDict_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0);
@ -2155,7 +2155,7 @@ size_t ZSTD_compressBlock_greedy_extDict_row(

 #ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
 size_t ZSTD_compressBlock_lazy_extDict(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)

 {
@ -2163,7 +2163,7 @@ size_t ZSTD_compressBlock_lazy_extDict(
 }

 size_t ZSTD_compressBlock_lazy_extDict_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)

 {
@ -2173,7 +2173,7 @@ size_t ZSTD_compressBlock_lazy_extDict_row(

 #ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
 size_t ZSTD_compressBlock_lazy2_extDict(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)

 {
@ -2181,7 +2181,7 @@ size_t ZSTD_compressBlock_lazy2_extDict(
 }

 size_t ZSTD_compressBlock_lazy2_extDict_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2);
@ -2190,7 +2190,7 @@ size_t ZSTD_compressBlock_lazy2_extDict_row(

 #ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
 size_t ZSTD_compressBlock_btlazy2_extDict(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)

 {
--- a/thirdparty/zstd/compress/zstd_lazy.h
+++ b/thirdparty/zstd/compress/zstd_lazy.h
@ -11,10 +11,6 @@
 #ifndef ZSTD_LAZY_H
 #define ZSTD_LAZY_H

-#if defined (__cplusplus)
-extern "C" {
-#endif
-
 #include "zstd_compress_internal.h"

 /**
@ -31,38 +27,38 @@ extern "C" {
 || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
 || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \
 || !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR)
-U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
-void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip);
+U32 ZSTD_insertAndFindFirstIndex(ZSTD_MatchState_t* ms, const BYTE* ip);
+void ZSTD_row_update(ZSTD_MatchState_t* const ms, const BYTE* ip);

-void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip);
+void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_MatchState_t* ms, const BYTE* const ip);

 void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue);  /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */
 #endif

 #ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
 size_t ZSTD_compressBlock_greedy(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_greedy_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_greedy_dictMatchState(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_greedy_dictMatchState_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_greedy_extDict(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_greedy_extDict_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);

 #define ZSTD_COMPRESSBLOCK_GREEDY ZSTD_compressBlock_greedy
@ -86,28 +82,28 @@ size_t ZSTD_compressBlock_greedy_extDict_row(

 #ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
 size_t ZSTD_compressBlock_lazy(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_lazy_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_lazy_dictMatchState(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_lazy_dictMatchState_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_lazy_extDict(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_lazy_extDict_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);

 #define ZSTD_COMPRESSBLOCK_LAZY ZSTD_compressBlock_lazy
@ -131,28 +127,28 @@ size_t ZSTD_compressBlock_lazy_extDict_row(

 #ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
 size_t ZSTD_compressBlock_lazy2(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_lazy2_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_lazy2_dictMatchState(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_lazy2_extDict(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_lazy2_extDict_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);

 #define ZSTD_COMPRESSBLOCK_LAZY2 ZSTD_compressBlock_lazy2
@ -176,13 +172,13 @@ size_t ZSTD_compressBlock_lazy2_extDict_row(

 #ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
 size_t ZSTD_compressBlock_btlazy2(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_btlazy2_dictMatchState(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_btlazy2_extDict(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);

 #define ZSTD_COMPRESSBLOCK_BTLAZY2 ZSTD_compressBlock_btlazy2
@ -194,9 +190,4 @@ size_t ZSTD_compressBlock_btlazy2_extDict(
 #define ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT NULL
 #endif

-
-#if defined (__cplusplus)
-}
-#endif
-
 #endif /* ZSTD_LAZY_H */
--- a/thirdparty/zstd/compress/zstd_ldm.c
+++ b/thirdparty/zstd/compress/zstd_ldm.c
@ -16,7 +16,7 @@
 #include "zstd_double_fast.h"   /* ZSTD_fillDoubleHashTable() */
 #include "zstd_ldm_geartab.h"

-#define LDM_BUCKET_SIZE_LOG 3
+#define LDM_BUCKET_SIZE_LOG 4
 #define LDM_MIN_MATCH_LENGTH 64
 #define LDM_HASH_RLOG 7

@ -133,21 +133,35 @@ done:
 }

 void ZSTD_ldm_adjustParameters(ldmParams_t* params,
-                               ZSTD_compressionParameters const* cParams)
+                        const ZSTD_compressionParameters* cParams)
 {
    params->windowLog = cParams->windowLog;
    ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
    DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
-    if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
-    if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;
-    if (params->hashLog == 0) {
-        params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
-        assert(params->hashLog <= ZSTD_HASHLOG_MAX);
-    }
    if (params->hashRateLog == 0) {
-        params->hashRateLog = params->windowLog < params->hashLog
-                                   ? 0
-                                   : params->windowLog - params->hashLog;
+        if (params->hashLog > 0) {
+            /* if params->hashLog is set, derive hashRateLog from it */
+            assert(params->hashLog <= ZSTD_HASHLOG_MAX);
+            if (params->windowLog > params->hashLog) {
+                params->hashRateLog = params->windowLog - params->hashLog;
+            }
+        } else {
+            assert(1 <= (int)cParams->strategy && (int)cParams->strategy <= 9);
+            /* mapping from [fast, rate7] to [btultra2, rate4] */
+            params->hashRateLog = 7 - (cParams->strategy/3);
+        }
+    }
+    if (params->hashLog == 0) {
+        params->hashLog = BOUNDED(ZSTD_HASHLOG_MIN, params->windowLog - params->hashRateLog, ZSTD_HASHLOG_MAX);
+    }
+    if (params->minMatchLength == 0) {
+        params->minMatchLength = LDM_MIN_MATCH_LENGTH;
+        if (cParams->strategy >= ZSTD_btultra)
+            params->minMatchLength /= 2;
+    }
+    if (params->bucketSizeLog==0) {
+        assert(1 <= (int)cParams->strategy && (int)cParams->strategy <= 9);
+        params->bucketSizeLog = BOUNDED(LDM_BUCKET_SIZE_LOG, (U32)cParams->strategy, ZSTD_LDM_BUCKETSIZELOG_MAX);
    }
    params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
 }
@ -170,22 +184,22 @@ size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
 /** ZSTD_ldm_getBucket() :
 *  Returns a pointer to the start of the bucket associated with hash. */
 static ldmEntry_t* ZSTD_ldm_getBucket(
-        ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams)
+        const ldmState_t* ldmState, size_t hash, U32 const bucketSizeLog)
 {
-    return ldmState->hashTable + (hash << ldmParams.bucketSizeLog);
+    return ldmState->hashTable + (hash << bucketSizeLog);
 }

 /** ZSTD_ldm_insertEntry() :
 *  Insert the entry with corresponding hash into the hash table */
 static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
                                 size_t const hash, const ldmEntry_t entry,
-                                 ldmParams_t const ldmParams)
+                                 U32 const bucketSizeLog)
 {
    BYTE* const pOffset = ldmState->bucketOffsets + hash;
    unsigned const offset = *pOffset;

-    *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + offset) = entry;
-    *pOffset = (BYTE)((offset + 1) & ((1u << ldmParams.bucketSizeLog) - 1));
+    *(ZSTD_ldm_getBucket(ldmState, hash, bucketSizeLog) + offset) = entry;
+    *pOffset = (BYTE)((offset + 1) & ((1u << bucketSizeLog) - 1));

 }

@ -234,7 +248,7 @@ static size_t ZSTD_ldm_countBackwardsMatch_2segments(
 *
 *  The tables for the other strategies are filled within their
 *  block compressors. */
-static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
+static size_t ZSTD_ldm_fillFastTables(ZSTD_MatchState_t* ms,
                                      void const* end)
 {
    const BYTE* const iend = (const BYTE*)end;
@ -273,7 +287,8 @@ void ZSTD_ldm_fillHashTable(
            const BYTE* iend, ldmParams_t const* params)
 {
    U32 const minMatchLength = params->minMatchLength;
-    U32 const hBits = params->hashLog - params->bucketSizeLog;
+    U32 const bucketSizeLog = params->bucketSizeLog;
+    U32 const hBits = params->hashLog - bucketSizeLog;
    BYTE const* const base = ldmState->window.base;
    BYTE const* const istart = ip;
    ldmRollingHashState_t hashState;
@ -288,7 +303,7 @@ void ZSTD_ldm_fillHashTable(
        unsigned n;

        numSplits = 0;
-        hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits);
+        hashed = ZSTD_ldm_gear_feed(&hashState, ip, (size_t)(iend - ip), splits, &numSplits);

        for (n = 0; n < numSplits; n++) {
            if (ip + splits[n] >= istart + minMatchLength) {
@ -299,7 +314,7 @@ void ZSTD_ldm_fillHashTable(

                entry.offset = (U32)(split - base);
                entry.checksum = (U32)(xxhash >> 32);
-                ZSTD_ldm_insertEntry(ldmState, hash, entry, *params);
+                ZSTD_ldm_insertEntry(ldmState, hash, entry, params->bucketSizeLog);
            }
        }

@ -313,7 +328,7 @@ void ZSTD_ldm_fillHashTable(
 *  Sets cctx->nextToUpdate to a position corresponding closer to anchor
 *  if it is far way
 *  (after a long match, only update tables a limited amount). */
-static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
+static void ZSTD_ldm_limitTableUpdate(ZSTD_MatchState_t* ms, const BYTE* anchor)
 {
    U32 const curr = (U32)(anchor - ms->window.base);
    if (curr > ms->nextToUpdate + 1024) {
@ -325,7 +340,7 @@ static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
 static
 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 size_t ZSTD_ldm_generateSequences_internal(
-        ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,
+        ldmState_t* ldmState, RawSeqStore_t* rawSeqStore,
        ldmParams_t const* params, void const* src, size_t srcSize)
 {
    /* LDM parameters */
@ -379,7 +394,7 @@ size_t ZSTD_ldm_generateSequences_internal(
            candidates[n].split = split;
            candidates[n].hash = hash;
            candidates[n].checksum = (U32)(xxhash >> 32);
-            candidates[n].bucket = ZSTD_ldm_getBucket(ldmState, hash, *params);
+            candidates[n].bucket = ZSTD_ldm_getBucket(ldmState, hash, params->bucketSizeLog);
            PREFETCH_L1(candidates[n].bucket);
        }

@ -402,7 +417,7 @@ size_t ZSTD_ldm_generateSequences_internal(
             * the previous one, we merely register it in the hash table and
             * move on */
            if (split < anchor) {
-                ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
+                ZSTD_ldm_insertEntry(ldmState, hash, newEntry, params->bucketSizeLog);
                continue;
            }

@ -449,7 +464,7 @@ size_t ZSTD_ldm_generateSequences_internal(
            /* No match found -- insert an entry into the hash table
             * and process the next candidate match */
            if (bestEntry == NULL) {
-                ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
+                ZSTD_ldm_insertEntry(ldmState, hash, newEntry, params->bucketSizeLog);
                continue;
            }

@ -470,7 +485,7 @@ size_t ZSTD_ldm_generateSequences_internal(

            /* Insert the current entry into the hash table --- it must be
             * done after the previous block to avoid clobbering bestEntry */
-            ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
+            ZSTD_ldm_insertEntry(ldmState, hash, newEntry, params->bucketSizeLog);

            anchor = split + forwardMatchLength;

@ -509,7 +524,7 @@ static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
 }

 size_t ZSTD_ldm_generateSequences(
-        ldmState_t* ldmState, rawSeqStore_t* sequences,
+        ldmState_t* ldmState, RawSeqStore_t* sequences,
        ldmParams_t const* params, void const* src, size_t srcSize)
 {
    U32 const maxDist = 1U << params->windowLog;
@ -586,7 +601,7 @@ size_t ZSTD_ldm_generateSequences(
 }

 void
-ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch)
+ZSTD_ldm_skipSequences(RawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch)
 {
    while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {
        rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;
@ -622,7 +637,7 @@ ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const min
 * Returns the current sequence to handle, or if the rest of the block should
 * be literals, it returns a sequence with offset == 0.
 */
-static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
+static rawSeq maybeSplitSequence(RawSeqStore_t* rawSeqStore,
                                 U32 const remaining, U32 const minMatch)
 {
    rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos];
@ -646,7 +661,7 @@ static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
    return sequence;
 }

-void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
+void ZSTD_ldm_skipRawSeqStoreBytes(RawSeqStore_t* rawSeqStore, size_t nbBytes) {
    U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
    while (currPos && rawSeqStore->pos < rawSeqStore->size) {
        rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
@ -663,14 +678,14 @@ void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
    }
 }

-size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
-    ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-    ZSTD_paramSwitch_e useRowMatchFinder,
+size_t ZSTD_ldm_blockCompress(RawSeqStore_t* rawSeqStore,
+    ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+    ZSTD_ParamSwitch_e useRowMatchFinder,
    void const* src, size_t srcSize)
 {
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
    unsigned const minMatch = cParams->minMatch;
-    ZSTD_blockCompressor const blockCompressor =
+    ZSTD_BlockCompressor_f const blockCompressor =
        ZSTD_selectBlockCompressor(cParams->strategy, useRowMatchFinder, ZSTD_matchState_dictMode(ms));
    /* Input bounds */
    BYTE const* const istart = (BYTE const*)src;
--- a/thirdparty/zstd/compress/zstd_ldm.h
+++ b/thirdparty/zstd/compress/zstd_ldm.h
@ -11,10 +11,6 @@
 #ifndef ZSTD_LDM_H
 #define ZSTD_LDM_H

-#if defined (__cplusplus)
-extern "C" {
-#endif
-
 #include "zstd_compress_internal.h"   /* ldmParams_t, U32 */
 #include "../zstd.h"   /* ZSTD_CCtx, size_t */

@ -43,7 +39,7 @@ void ZSTD_ldm_fillHashTable(
 *       sequences.
 */
 size_t ZSTD_ldm_generateSequences(
-            ldmState_t* ldms, rawSeqStore_t* sequences,
+            ldmState_t* ldms, RawSeqStore_t* sequences,
            ldmParams_t const* params, void const* src, size_t srcSize);

 /**
@ -64,9 +60,9 @@ size_t ZSTD_ldm_generateSequences(
 * two. We handle that case correctly, and update `rawSeqStore` appropriately.
 * NOTE: This function does not return any errors.
 */
-size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
-            ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-            ZSTD_paramSwitch_e useRowMatchFinder,
+size_t ZSTD_ldm_blockCompress(RawSeqStore_t* rawSeqStore,
+            ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+            ZSTD_ParamSwitch_e useRowMatchFinder,
            void const* src, size_t srcSize);

 /**
@ -76,7 +72,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
 * Avoids emitting matches less than `minMatch` bytes.
 * Must be called for data that is not passed to ZSTD_ldm_blockCompress().
 */
-void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize,
+void ZSTD_ldm_skipSequences(RawSeqStore_t* rawSeqStore, size_t srcSize,
    U32 const minMatch);

 /* ZSTD_ldm_skipRawSeqStoreBytes():
@ -84,7 +80,7 @@ void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize,
 * Not to be used in conjunction with ZSTD_ldm_skipSequences().
 * Must be called for data with is not passed to ZSTD_ldm_blockCompress().
 */
-void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes);
+void ZSTD_ldm_skipRawSeqStoreBytes(RawSeqStore_t* rawSeqStore, size_t nbBytes);

 /** ZSTD_ldm_getTableSize() :
 *  Estimate the space needed for long distance matching tables or 0 if LDM is
@ -110,8 +106,4 @@ size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize);
 void ZSTD_ldm_adjustParameters(ldmParams_t* params,
                               ZSTD_compressionParameters const* cParams);

-#if defined (__cplusplus)
-}
-#endif
-
 #endif /* ZSTD_FAST_H */
--- a/thirdparty/zstd/compress/zstd_opt.c
+++ b/thirdparty/zstd/compress/zstd_opt.c
@ -408,7 +408,7 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
   Assumption : always within prefix (i.e. not within extDict) */
 static
 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
+U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_MatchState_t* ms,
                                       U32* nextToUpdate3,
                                       const BYTE* const ip)
 {
@ -440,7 +440,7 @@ U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
 static
 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 U32 ZSTD_insertBt1(
-                const ZSTD_matchState_t* ms,
+                const ZSTD_MatchState_t* ms,
                const BYTE* const ip, const BYTE* const iend,
                U32 const target,
                U32 const mls, const int extDict)
@ -560,7 +560,7 @@ U32 ZSTD_insertBt1(
 FORCE_INLINE_TEMPLATE
 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 void ZSTD_updateTree_internal(
-                ZSTD_matchState_t* ms,
+                ZSTD_MatchState_t* ms,
                const BYTE* const ip, const BYTE* const iend,
                const U32 mls, const ZSTD_dictMode_e dictMode)
 {
@ -580,7 +580,7 @@ void ZSTD_updateTree_internal(
    ms->nextToUpdate = target;
 }

-void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
+void ZSTD_updateTree(ZSTD_MatchState_t* ms, const BYTE* ip, const BYTE* iend) {
    ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
 }

@ -589,7 +589,7 @@ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 U32
 ZSTD_insertBtAndGetAllMatches (
                ZSTD_match_t* matches,  /* store result (found matches) in this table (presumed large enough) */
-                ZSTD_matchState_t* ms,
+                ZSTD_MatchState_t* ms,
                U32* nextToUpdate3,
                const BYTE* const ip, const BYTE* const iLimit,
                const ZSTD_dictMode_e dictMode,
@ -625,7 +625,7 @@ ZSTD_insertBtAndGetAllMatches (
    U32 mnum = 0;
    U32 nbCompares = 1U << cParams->searchLog;

-    const ZSTD_matchState_t* dms    = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
+    const ZSTD_MatchState_t* dms    = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
    const ZSTD_compressionParameters* const dmsCParams =
                                      dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
    const BYTE* const dmsBase       = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
@ -664,13 +664,13 @@ ZSTD_insertBtAndGetAllMatches (
                assert(curr >= windowLow);
                if ( dictMode == ZSTD_extDict
                  && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow)  /* equivalent to `curr > repIndex >= windowLow` */
-                     & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
+                     & (ZSTD_index_overlap_check(dictLimit, repIndex)) )
                  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
                    repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
                }
                if (dictMode == ZSTD_dictMatchState
                  && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta))  /* equivalent to `curr > repIndex >= dmsLowLimit` */
-                     & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
+                     & (ZSTD_index_overlap_check(dictLimit, repIndex)) )
                  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
                    repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
            }   }
@ -819,7 +819,7 @@ ZSTD_insertBtAndGetAllMatches (

 typedef U32 (*ZSTD_getAllMatchesFn)(
    ZSTD_match_t*,
-    ZSTD_matchState_t*,
+    ZSTD_MatchState_t*,
    U32*,
    const BYTE*,
    const BYTE*,
@ -831,7 +831,7 @@ FORCE_INLINE_TEMPLATE
 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 U32 ZSTD_btGetAllMatches_internal(
        ZSTD_match_t* matches,
-        ZSTD_matchState_t* ms,
+        ZSTD_MatchState_t* ms,
        U32* nextToUpdate3,
        const BYTE* ip,
        const BYTE* const iHighLimit,
@ -854,7 +854,7 @@ U32 ZSTD_btGetAllMatches_internal(
 #define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls)            \
    static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)(      \
            ZSTD_match_t* matches,                             \
-            ZSTD_matchState_t* ms,                             \
+            ZSTD_MatchState_t* ms,                             \
            U32* nextToUpdate3,                                \
            const BYTE* ip,                                    \
            const BYTE* const iHighLimit,                      \
@ -886,7 +886,7 @@ GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
    }

 static ZSTD_getAllMatchesFn
-ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
+ZSTD_selectBtGetAllMatches(ZSTD_MatchState_t const* ms, ZSTD_dictMode_e const dictMode)
 {
    ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
        ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
@ -905,7 +905,7 @@ ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const di

 /* Struct containing info needed to make decision about ldm inclusion */
 typedef struct {
-    rawSeqStore_t seqStore;   /* External match candidates store for this block */
+    RawSeqStore_t seqStore;   /* External match candidates store for this block */
    U32 startPosInBlock;      /* Start position of the current match candidate */
    U32 endPosInBlock;        /* End position of the current match candidate */
    U32 offset;               /* Offset of the match candidate */
@ -915,7 +915,7 @@ typedef struct {
 * Moves forward in @rawSeqStore by @nbBytes,
 * which will update the fields 'pos' and 'posInSequence'.
 */
-static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes)
+static void ZSTD_optLdm_skipRawSeqStoreBytes(RawSeqStore_t* rawSeqStore, size_t nbBytes)
 {
    U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
    while (currPos && rawSeqStore->pos < rawSeqStore->size) {
@ -972,7 +972,7 @@ ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock
        return;
    }

-    /* Matches may be < MINMATCH by this process. In that case, we will reject them
+    /* Matches may be < minMatch by this process. In that case, we will reject them
       when we are deciding whether or not to add the ldm */
    optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
    optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
@ -994,7 +994,8 @@ ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock
 * into 'matches'. Maintains the correct ordering of 'matches'.
 */
 static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
-                                      const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
+                                      const ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
+                                      U32 minMatch)
 {
    U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
    /* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */
@ -1003,7 +1004,7 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
    /* Ensure that current block position is not outside of the match */
    if (currPosInBlock < optLdm->startPosInBlock
      || currPosInBlock >= optLdm->endPosInBlock
-      || candidateMatchLength < MINMATCH) {
+      || candidateMatchLength < minMatch) {
        return;
    }

@ -1023,7 +1024,8 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
 static void
 ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
                                  ZSTD_match_t* matches, U32* nbMatches,
-                                  U32 currPosInBlock, U32 remainingBytes)
+                                  U32 currPosInBlock, U32 remainingBytes,
+                                  U32 minMatch)
 {
    if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
        return;
@ -1040,7 +1042,7 @@ ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
        }
        ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
    }
-    ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
+    ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock, minMatch);
 }


@ -1072,8 +1074,8 @@ listStats(const U32* table, int lastEltID)
 FORCE_INLINE_TEMPLATE
 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 size_t
-ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
-                               seqStore_t* seqStore,
+ZSTD_compressBlock_opt_generic(ZSTD_MatchState_t* ms,
+                               SeqStore_t* seqStore,
                               U32 rep[ZSTD_REP_NUM],
                         const void* src, size_t srcSize,
                         const int optLevel,
@ -1122,7 +1124,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
            U32 const ll0 = !litlen;
            U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
            ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
-                                              (U32)(ip-istart), (U32)(iend-ip));
+                                              (U32)(ip-istart), (U32)(iend-ip),
+                                              minMatch);
            if (!nbMatches) {
                DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart));
                ip++;
@ -1197,7 +1200,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
        for (cur = 1; cur <= last_pos; cur++) {
            const BYTE* const inr = ip + cur;
            assert(cur <= ZSTD_OPT_NUM);
-            DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur);
+            DEBUGLOG(7, "cPos:%i==rPos:%u", (int)(inr-istart), cur);

            /* Fix current position with one literal if cheaper */
            {   U32 const litlen = opt[cur-1].litlen + 1;
@ -1207,8 +1210,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                assert(price < 1000000000); /* overflow check */
                if (price <= opt[cur].price) {
                    ZSTD_optimal_t const prevMatch = opt[cur];
-                    DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
-                                inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
+                    DEBUGLOG(7, "cPos:%i==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
+                                (int)(inr-istart), cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
                                opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
                    opt[cur] = opt[cur-1];
                    opt[cur].litlen = litlen;
@ -1227,34 +1230,34 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                          && (with1literal < opt[cur+1].price) ) {
                            /* update offset history - before it disappears */
                            U32 const prev = cur - prevMatch.mlen;
-                            repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, prevMatch.off, opt[prev].litlen==0);
+                            Repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, prevMatch.off, opt[prev].litlen==0);
                            assert(cur >= prevMatch.mlen);
                            DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) (hist:%u,%u,%u) !",
                                        ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals),
                                        newReps.rep[0], newReps.rep[1], newReps.rep[2] );
                            opt[cur+1] = prevMatch;  /* mlen & offbase */
-                            ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(repcodes_t));
+                            ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(Repcodes_t));
                            opt[cur+1].litlen = 1;
                            opt[cur+1].price = with1literal;
                            if (last_pos < cur+1) last_pos = cur+1;
                        }
                    }
                } else {
-                    DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f)",
-                                inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price));
+                    DEBUGLOG(7, "cPos:%i==rPos:%u : literal would cost more (%.2f>%.2f)",
+                                (int)(inr-istart), cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price));
                }
            }

            /* Offset history is not updated during match comparison.
             * Do it here, now that the match is selected and confirmed.
             */
-            ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
+            ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(Repcodes_t));
            assert(cur >= opt[cur].mlen);
            if (opt[cur].litlen == 0) {
                /* just finished a match => alter offset history */
                U32 const prev = cur - opt[cur].mlen;
-                repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0);
-                ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
+                Repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0);
+                ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(Repcodes_t));
            }

            /* last match must start at a minimum distance of 8 from oend */
@ -1276,7 +1279,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                U32 matchNb;

                ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
-                                                  (U32)(inr-istart), (U32)(iend-inr));
+                                                  (U32)(inr-istart), (U32)(iend-inr),
+                                                  minMatch);

                if (!nbMatches) {
                    DEBUGLOG(7, "rPos:%u : no match found", cur);
@ -1284,8 +1288,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                }

                {   U32 const longestML = matches[nbMatches-1].len;
-                    DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of longest ML=%u",
-                                inr-istart, cur, nbMatches, longestML);
+                    DEBUGLOG(7, "cPos:%i==rPos:%u, found %u matches, of longest ML=%u",
+                                (int)(inr-istart), cur, nbMatches, longestML);

                    if ( (longestML > sufficient_len)
                      || (cur + longestML >= ZSTD_OPT_NUM)
@ -1353,10 +1357,10 @@ _shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
        /* Update offset history */
        if (lastStretch.litlen == 0) {
            /* finishing on a match : update offset history */
-            repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastStretch.off, opt[cur].litlen==0);
-            ZSTD_memcpy(rep, &reps, sizeof(repcodes_t));
+            Repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastStretch.off, opt[cur].litlen==0);
+            ZSTD_memcpy(rep, &reps, sizeof(Repcodes_t));
        } else {
-            ZSTD_memcpy(rep, lastStretch.rep, sizeof(repcodes_t));
+            ZSTD_memcpy(rep, lastStretch.rep, sizeof(Repcodes_t));
            assert(cur >= lastStretch.litlen);
            cur -= lastStretch.litlen;
        }
@ -1411,8 +1415,8 @@ _shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
                    U32 const mlen = opt[storePos].mlen;
                    U32 const offBase = opt[storePos].off;
                    U32 const advance = llen + mlen;
-                    DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
-                                anchor - istart, (unsigned)llen, (unsigned)mlen);
+                    DEBUGLOG(6, "considering seq starting at %i, llen=%u, mlen=%u",
+                                (int)(anchor - istart), (unsigned)llen, (unsigned)mlen);

                    if (mlen==0) {  /* only literals => must be last "sequence", actually starting a new stream of sequences */
                        assert(storePos == storeEnd);   /* must be last sequence */
@ -1440,7 +1444,7 @@ _shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */

 #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
 static size_t ZSTD_compressBlock_opt0(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
 {
    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
@ -1449,7 +1453,7 @@ static size_t ZSTD_compressBlock_opt0(

 #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
 static size_t ZSTD_compressBlock_opt2(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
 {
    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
@ -1458,7 +1462,7 @@ static size_t ZSTD_compressBlock_opt2(

 #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
 size_t ZSTD_compressBlock_btopt(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        const void* src, size_t srcSize)
 {
    DEBUGLOG(5, "ZSTD_compressBlock_btopt");
@ -1477,8 +1481,8 @@ size_t ZSTD_compressBlock_btopt(
 */
 static
 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-void ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
-                          seqStore_t* seqStore,
+void ZSTD_initStats_ultra(ZSTD_MatchState_t* ms,
+                          SeqStore_t* seqStore,
                          U32 rep[ZSTD_REP_NUM],
                    const void* src, size_t srcSize)
 {
@ -1503,7 +1507,7 @@ void ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
 }

 size_t ZSTD_compressBlock_btultra(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        const void* src, size_t srcSize)
 {
    DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
@ -1511,7 +1515,7 @@ size_t ZSTD_compressBlock_btultra(
 }

 size_t ZSTD_compressBlock_btultra2(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        const void* src, size_t srcSize)
 {
    U32 const curr = (U32)((const BYTE*)src - ms->window.base);
@ -1541,14 +1545,14 @@ size_t ZSTD_compressBlock_btultra2(

 #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
 size_t ZSTD_compressBlock_btopt_dictMatchState(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        const void* src, size_t srcSize)
 {
    return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
 }

 size_t ZSTD_compressBlock_btopt_extDict(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        const void* src, size_t srcSize)
 {
    return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
@ -1557,14 +1561,14 @@ size_t ZSTD_compressBlock_btopt_extDict(

 #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
 size_t ZSTD_compressBlock_btultra_dictMatchState(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        const void* src, size_t srcSize)
 {
    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
 }

 size_t ZSTD_compressBlock_btultra_extDict(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        const void* src, size_t srcSize)
 {
    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
--- a/thirdparty/zstd/compress/zstd_opt.h
+++ b/thirdparty/zstd/compress/zstd_opt.h
@ -11,28 +11,24 @@
 #ifndef ZSTD_OPT_H
 #define ZSTD_OPT_H

-#if defined (__cplusplus)
-extern "C" {
-#endif
-
 #include "zstd_compress_internal.h"

 #if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
 || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
 || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
 /* used in ZSTD_loadDictionaryContent() */
-void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend);
+void ZSTD_updateTree(ZSTD_MatchState_t* ms, const BYTE* ip, const BYTE* iend);
 #endif

 #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
 size_t ZSTD_compressBlock_btopt(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_btopt_dictMatchState(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_btopt_extDict(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);

 #define ZSTD_COMPRESSBLOCK_BTOPT ZSTD_compressBlock_btopt
@ -46,20 +42,20 @@ size_t ZSTD_compressBlock_btopt_extDict(

 #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
 size_t ZSTD_compressBlock_btultra(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_btultra_dictMatchState(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_btultra_extDict(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);

        /* note : no btultra2 variant for extDict nor dictMatchState,
         * because btultra2 is not meant to work with dictionaries
         * and is only specific for the first block (no prefix) */
 size_t ZSTD_compressBlock_btultra2(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);

 #define ZSTD_COMPRESSBLOCK_BTULTRA ZSTD_compressBlock_btultra
@ -73,8 +69,4 @@ size_t ZSTD_compressBlock_btultra2(
 #define ZSTD_COMPRESSBLOCK_BTULTRA2 NULL
 #endif

-#if defined (__cplusplus)
-}
-#endif
-
 #endif /* ZSTD_OPT_H */
--- a/thirdparty/zstd/compress/zstd_preSplit.c
+++ b/thirdparty/zstd/compress/zstd_preSplit.c
@ -0,0 +1,238 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include "../common/compiler.h" /* ZSTD_ALIGNOF */
+#include "../common/mem.h" /* S64 */
+#include "../common/zstd_deps.h" /* ZSTD_memset */
+#include "../common/zstd_internal.h" /* ZSTD_STATIC_ASSERT */
+#include "hist.h" /* HIST_add */
+#include "zstd_preSplit.h"
+
+
+#define BLOCKSIZE_MIN 3500
+#define THRESHOLD_PENALTY_RATE 16
+#define THRESHOLD_BASE (THRESHOLD_PENALTY_RATE - 2)
+#define THRESHOLD_PENALTY 3
+
+#define HASHLENGTH 2
+#define HASHLOG_MAX 10
+#define HASHTABLESIZE (1 << HASHLOG_MAX)
+#define HASHMASK (HASHTABLESIZE - 1)
+#define KNUTH 0x9e3779b9
+
+/* for hashLog > 8, hash 2 bytes.
+ * for hashLog == 8, just take the byte, no hashing.
+ * The speed of this method relies on compile-time constant propagation */
+FORCE_INLINE_TEMPLATE unsigned hash2(const void *p, unsigned hashLog)
+{
+    assert(hashLog >= 8);
+    if (hashLog == 8) return (U32)((const BYTE*)p)[0];
+    assert(hashLog <= HASHLOG_MAX);
+    return (U32)(MEM_read16(p)) * KNUTH >> (32 - hashLog);
+}
+
+
+typedef struct {
+  unsigned events[HASHTABLESIZE];
+  size_t nbEvents;
+} Fingerprint;
+typedef struct {
+    Fingerprint pastEvents;
+    Fingerprint newEvents;
+} FPStats;
+
+static void initStats(FPStats* fpstats)
+{
+    ZSTD_memset(fpstats, 0, sizeof(FPStats));
+}
+
+FORCE_INLINE_TEMPLATE void
+addEvents_generic(Fingerprint* fp, const void* src, size_t srcSize, size_t samplingRate, unsigned hashLog)
+{
+    const char* p = (const char*)src;
+    size_t limit = srcSize - HASHLENGTH + 1;
+    size_t n;
+    assert(srcSize >= HASHLENGTH);
+    for (n = 0; n < limit; n+=samplingRate) {
+        fp->events[hash2(p+n, hashLog)]++;
+    }
+    fp->nbEvents += limit/samplingRate;
+}
+
+FORCE_INLINE_TEMPLATE void
+recordFingerprint_generic(Fingerprint* fp, const void* src, size_t srcSize, size_t samplingRate, unsigned hashLog)
+{
+    ZSTD_memset(fp, 0, sizeof(unsigned) * ((size_t)1 << hashLog));
+    fp->nbEvents = 0;
+    addEvents_generic(fp, src, srcSize, samplingRate, hashLog);
+}
+
+typedef void (*RecordEvents_f)(Fingerprint* fp, const void* src, size_t srcSize);
+
+#define FP_RECORD(_rate) ZSTD_recordFingerprint_##_rate
+
+#define ZSTD_GEN_RECORD_FINGERPRINT(_rate, _hSize)                                 \
+    static void FP_RECORD(_rate)(Fingerprint* fp, const void* src, size_t srcSize) \
+    {                                                                              \
+        recordFingerprint_generic(fp, src, srcSize, _rate, _hSize);                \
+    }
+
+ZSTD_GEN_RECORD_FINGERPRINT(1, 10)
+ZSTD_GEN_RECORD_FINGERPRINT(5, 10)
+ZSTD_GEN_RECORD_FINGERPRINT(11, 9)
+ZSTD_GEN_RECORD_FINGERPRINT(43, 8)
+
+
+static U64 abs64(S64 s64) { return (U64)((s64 < 0) ? -s64 : s64); }
+
+static U64 fpDistance(const Fingerprint* fp1, const Fingerprint* fp2, unsigned hashLog)
+{
+    U64 distance = 0;
+    size_t n;
+    assert(hashLog <= HASHLOG_MAX);
+    for (n = 0; n < ((size_t)1 << hashLog); n++) {
+        distance +=
+            abs64((S64)fp1->events[n] * (S64)fp2->nbEvents - (S64)fp2->events[n] * (S64)fp1->nbEvents);
+    }
+    return distance;
+}
+
+/* Compare newEvents with pastEvents
+ * return 1 when considered "too different"
+ */
+static int compareFingerprints(const Fingerprint* ref,
+                            const Fingerprint* newfp,
+                            int penalty,
+                            unsigned hashLog)
+{
+    assert(ref->nbEvents > 0);
+    assert(newfp->nbEvents > 0);
+    {   U64 p50 = (U64)ref->nbEvents * (U64)newfp->nbEvents;
+        U64 deviation = fpDistance(ref, newfp, hashLog);
+        U64 threshold = p50 * (U64)(THRESHOLD_BASE + penalty) / THRESHOLD_PENALTY_RATE;
+        return deviation >= threshold;
+    }
+}
+
+static void mergeEvents(Fingerprint* acc, const Fingerprint* newfp)
+{
+    size_t n;
+    for (n = 0; n < HASHTABLESIZE; n++) {
+        acc->events[n] += newfp->events[n];
+    }
+    acc->nbEvents += newfp->nbEvents;
+}
+
+static void flushEvents(FPStats* fpstats)
+{
+    size_t n;
+    for (n = 0; n < HASHTABLESIZE; n++) {
+        fpstats->pastEvents.events[n] = fpstats->newEvents.events[n];
+    }
+    fpstats->pastEvents.nbEvents = fpstats->newEvents.nbEvents;
+    ZSTD_memset(&fpstats->newEvents, 0, sizeof(fpstats->newEvents));
+}
+
+static void removeEvents(Fingerprint* acc, const Fingerprint* slice)
+{
+    size_t n;
+    for (n = 0; n < HASHTABLESIZE; n++) {
+        assert(acc->events[n] >= slice->events[n]);
+        acc->events[n] -= slice->events[n];
+    }
+    acc->nbEvents -= slice->nbEvents;
+}
+
+#define CHUNKSIZE (8 << 10)
+static size_t ZSTD_splitBlock_byChunks(const void* blockStart, size_t blockSize,
+                        int level,
+                        void* workspace, size_t wkspSize)
+{
+    static const RecordEvents_f records_fs[] = {
+        FP_RECORD(43), FP_RECORD(11), FP_RECORD(5), FP_RECORD(1)
+    };
+    static const unsigned hashParams[] = { 8, 9, 10, 10 };
+    const RecordEvents_f record_f = (assert(0<=level && level<=3), records_fs[level]);
+    FPStats* const fpstats = (FPStats*)workspace;
+    const char* p = (const char*)blockStart;
+    int penalty = THRESHOLD_PENALTY;
+    size_t pos = 0;
+    assert(blockSize == (128 << 10));
+    assert(workspace != NULL);
+    assert((size_t)workspace % ZSTD_ALIGNOF(FPStats) == 0);
+    ZSTD_STATIC_ASSERT(ZSTD_SLIPBLOCK_WORKSPACESIZE >= sizeof(FPStats));
+    assert(wkspSize >= sizeof(FPStats)); (void)wkspSize;
+
+    initStats(fpstats);
+    record_f(&fpstats->pastEvents, p, CHUNKSIZE);
+    for (pos = CHUNKSIZE; pos <= blockSize - CHUNKSIZE; pos += CHUNKSIZE) {
+        record_f(&fpstats->newEvents, p + pos, CHUNKSIZE);
+        if (compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, penalty, hashParams[level])) {
+            return pos;
+        } else {
+            mergeEvents(&fpstats->pastEvents, &fpstats->newEvents);
+            if (penalty > 0) penalty--;
+        }
+    }
+    assert(pos == blockSize);
+    return blockSize;
+    (void)flushEvents; (void)removeEvents;
+}
+
+/* ZSTD_splitBlock_fromBorders(): very fast strategy :
+ * compare fingerprint from beginning and end of the block,
+ * derive from their difference if it's preferable to split in the middle,
+ * repeat the process a second time, for finer grained decision.
+ * 3 times did not brought improvements, so I stopped at 2.
+ * Benefits are good enough for a cheap heuristic.
+ * More accurate splitting saves more, but speed impact is also more perceptible.
+ * For better accuracy, use more elaborate variant *_byChunks.
+ */
+static size_t ZSTD_splitBlock_fromBorders(const void* blockStart, size_t blockSize,
+                        void* workspace, size_t wkspSize)
+{
+#define SEGMENT_SIZE 512
+    FPStats* const fpstats = (FPStats*)workspace;
+    Fingerprint* middleEvents = (Fingerprint*)(void*)((char*)workspace + 512 * sizeof(unsigned));
+    assert(blockSize == (128 << 10));
+    assert(workspace != NULL);
+    assert((size_t)workspace % ZSTD_ALIGNOF(FPStats) == 0);
+    ZSTD_STATIC_ASSERT(ZSTD_SLIPBLOCK_WORKSPACESIZE >= sizeof(FPStats));
+    assert(wkspSize >= sizeof(FPStats)); (void)wkspSize;
+
+    initStats(fpstats);
+    HIST_add(fpstats->pastEvents.events, blockStart, SEGMENT_SIZE);
+    HIST_add(fpstats->newEvents.events, (const char*)blockStart + blockSize - SEGMENT_SIZE, SEGMENT_SIZE);
+    fpstats->pastEvents.nbEvents = fpstats->newEvents.nbEvents = SEGMENT_SIZE;
+    if (!compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, 0, 8))
+        return blockSize;
+
+    HIST_add(middleEvents->events, (const char*)blockStart + blockSize/2 - SEGMENT_SIZE/2, SEGMENT_SIZE);
+    middleEvents->nbEvents = SEGMENT_SIZE;
+    {   U64 const distFromBegin = fpDistance(&fpstats->pastEvents, middleEvents, 8);
+        U64 const distFromEnd = fpDistance(&fpstats->newEvents, middleEvents, 8);
+        U64 const minDistance = SEGMENT_SIZE * SEGMENT_SIZE / 3;
+        if (abs64((S64)distFromBegin - (S64)distFromEnd) < minDistance)
+            return 64 KB;
+        return (distFromBegin > distFromEnd) ? 32 KB : 96 KB;
+    }
+}
+
+size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize,
+                    int level,
+                    void* workspace, size_t wkspSize)
+{
+    DEBUGLOG(6, "ZSTD_splitBlock (level=%i)", level);
+    assert(0<=level && level<=4);
+    if (level == 0)
+        return ZSTD_splitBlock_fromBorders(blockStart, blockSize, workspace, wkspSize);
+    /* level >= 1*/
+    return ZSTD_splitBlock_byChunks(blockStart, blockSize, level-1, workspace, wkspSize);
+}
--- a/thirdparty/zstd/compress/zstd_preSplit.h
+++ b/thirdparty/zstd/compress/zstd_preSplit.h
@ -0,0 +1,33 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_PRESPLIT_H
+#define ZSTD_PRESPLIT_H
+
+#include <stddef.h>  /* size_t */
+
+#define ZSTD_SLIPBLOCK_WORKSPACESIZE 8208
+
+/* ZSTD_splitBlock():
+ * @level must be a value between 0 and 4.
+ *        higher levels spend more energy to detect block boundaries.
+ * @workspace must be aligned for size_t.
+ * @wkspSize must be at least >= ZSTD_SLIPBLOCK_WORKSPACESIZE
+ * note:
+ * For the time being, this function only accepts full 128 KB blocks.
+ * Therefore, @blockSize must be == 128 KB.
+ * While this could be extended to smaller sizes in the future,
+ * it is not yet clear if this would be useful. TBD.
+ */
+size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize,
+                    int level,
+                    void* workspace, size_t wkspSize);
+
+#endif /* ZSTD_PRESPLIT_H */
--- a/thirdparty/zstd/compress/zstdmt_compress.c
+++ b/thirdparty/zstd/compress/zstdmt_compress.c
@ -90,9 +90,9 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void)
 typedef struct buffer_s {
    void* start;
    size_t capacity;
-} buffer_t;
+} Buffer;

-static const buffer_t g_nullBuffer = { NULL, 0 };
+static const Buffer g_nullBuffer = { NULL, 0 };

 typedef struct ZSTDMT_bufferPool_s {
    ZSTD_pthread_mutex_t poolMutex;
@ -100,7 +100,7 @@ typedef struct ZSTDMT_bufferPool_s {
    unsigned totalBuffers;
    unsigned nbBuffers;
    ZSTD_customMem cMem;
-    buffer_t* buffers;
+    Buffer* buffers;
 } ZSTDMT_bufferPool;

 static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
@ -128,7 +128,7 @@ static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned maxNbBuffers, ZSTD_cu
        ZSTD_customFree(bufPool, cMem);
        return NULL;
    }
-    bufPool->buffers = (buffer_t*)ZSTD_customCalloc(maxNbBuffers * sizeof(buffer_t), cMem);
+    bufPool->buffers = (Buffer*)ZSTD_customCalloc(maxNbBuffers * sizeof(Buffer), cMem);
    if (bufPool->buffers==NULL) {
        ZSTDMT_freeBufferPool(bufPool);
        return NULL;
@ -144,7 +144,7 @@ static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned maxNbBuffers, ZSTD_cu
 static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool)
 {
    size_t const poolSize = sizeof(*bufPool);
-    size_t const arraySize = bufPool->totalBuffers * sizeof(buffer_t);
+    size_t const arraySize = bufPool->totalBuffers * sizeof(Buffer);
    unsigned u;
    size_t totalBufferSize = 0;
    ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
@ -189,13 +189,13 @@ static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool,
 *  assumption : bufPool must be valid
 * @return : a buffer, with start pointer and size
 *  note: allocation may fail, in this case, start==NULL and size==0 */
-static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
+static Buffer ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
 {
    size_t const bSize = bufPool->bufferSize;
    DEBUGLOG(5, "ZSTDMT_getBuffer: bSize = %u", (U32)bufPool->bufferSize);
    ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
    if (bufPool->nbBuffers) {   /* try to use an existing buffer */
-        buffer_t const buf = bufPool->buffers[--(bufPool->nbBuffers)];
+        Buffer const buf = bufPool->buffers[--(bufPool->nbBuffers)];
        size_t const availBufferSize = buf.capacity;
        bufPool->buffers[bufPool->nbBuffers] = g_nullBuffer;
        if ((availBufferSize >= bSize) & ((availBufferSize>>3) <= bSize)) {
@ -212,7 +212,7 @@ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
    ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
    /* create new buffer */
    DEBUGLOG(5, "ZSTDMT_getBuffer: create a new buffer");
-    {   buffer_t buffer;
+    {   Buffer buffer;
        void* const start = ZSTD_customMalloc(bSize, bufPool->cMem);
        buffer.start = start;   /* note : start can be NULL if malloc fails ! */
        buffer.capacity = (start==NULL) ? 0 : bSize;
@ -231,12 +231,12 @@ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
 * @return : a buffer that is at least the buffer pool buffer size.
 *           If a reallocation happens, the data in the input buffer is copied.
 */
-static buffer_t ZSTDMT_resizeBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buffer)
+static Buffer ZSTDMT_resizeBuffer(ZSTDMT_bufferPool* bufPool, Buffer buffer)
 {
    size_t const bSize = bufPool->bufferSize;
    if (buffer.capacity < bSize) {
        void* const start = ZSTD_customMalloc(bSize, bufPool->cMem);
-        buffer_t newBuffer;
+        Buffer newBuffer;
        newBuffer.start = start;
        newBuffer.capacity = start == NULL ? 0 : bSize;
        if (start != NULL) {
@ -252,7 +252,7 @@ static buffer_t ZSTDMT_resizeBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buffer)
 #endif

 /* store buffer for later re-use, up to pool capacity */
-static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
+static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, Buffer buf)
 {
    DEBUGLOG(5, "ZSTDMT_releaseBuffer");
    if (buf.start == NULL) return;   /* compatible with release on NULL */
@ -290,23 +290,23 @@ static size_t ZSTDMT_sizeof_seqPool(ZSTDMT_seqPool* seqPool)
    return ZSTDMT_sizeof_bufferPool(seqPool);
 }

-static rawSeqStore_t bufferToSeq(buffer_t buffer)
+static RawSeqStore_t bufferToSeq(Buffer buffer)
 {
-    rawSeqStore_t seq = kNullRawSeqStore;
+    RawSeqStore_t seq = kNullRawSeqStore;
    seq.seq = (rawSeq*)buffer.start;
    seq.capacity = buffer.capacity / sizeof(rawSeq);
    return seq;
 }

-static buffer_t seqToBuffer(rawSeqStore_t seq)
+static Buffer seqToBuffer(RawSeqStore_t seq)
 {
-    buffer_t buffer;
+    Buffer buffer;
    buffer.start = seq.seq;
    buffer.capacity = seq.capacity * sizeof(rawSeq);
    return buffer;
 }

-static rawSeqStore_t ZSTDMT_getSeq(ZSTDMT_seqPool* seqPool)
+static RawSeqStore_t ZSTDMT_getSeq(ZSTDMT_seqPool* seqPool)
 {
    if (seqPool->bufferSize == 0) {
        return kNullRawSeqStore;
@ -315,13 +315,13 @@ static rawSeqStore_t ZSTDMT_getSeq(ZSTDMT_seqPool* seqPool)
 }

 #if ZSTD_RESIZE_SEQPOOL
-static rawSeqStore_t ZSTDMT_resizeSeq(ZSTDMT_seqPool* seqPool, rawSeqStore_t seq)
+static RawSeqStore_t ZSTDMT_resizeSeq(ZSTDMT_seqPool* seqPool, RawSeqStore_t seq)
 {
  return bufferToSeq(ZSTDMT_resizeBuffer(seqPool, seqToBuffer(seq)));
 }
 #endif

-static void ZSTDMT_releaseSeq(ZSTDMT_seqPool* seqPool, rawSeqStore_t seq)
+static void ZSTDMT_releaseSeq(ZSTDMT_seqPool* seqPool, RawSeqStore_t seq)
 {
  ZSTDMT_releaseBuffer(seqPool, seqToBuffer(seq));
 }
@ -466,7 +466,7 @@ static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx)
 typedef struct {
    void const* start;
    size_t size;
-} range_t;
+} Range;

 typedef struct {
    /* All variables in the struct are protected by mutex. */
@ -482,10 +482,10 @@ typedef struct {
    ZSTD_pthread_mutex_t ldmWindowMutex;
    ZSTD_pthread_cond_t ldmWindowCond;  /* Signaled when ldmWindow is updated */
    ZSTD_window_t ldmWindow;  /* A thread-safe copy of ldmState.window */
-} serialState_t;
+} SerialState;

 static int
-ZSTDMT_serialState_reset(serialState_t* serialState,
+ZSTDMT_serialState_reset(SerialState* serialState,
                         ZSTDMT_seqPool* seqPool,
                         ZSTD_CCtx_params params,
                         size_t jobSize,
@ -555,7 +555,7 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
    return 0;
 }

-static int ZSTDMT_serialState_init(serialState_t* serialState)
+static int ZSTDMT_serialState_init(SerialState* serialState)
 {
    int initError = 0;
    ZSTD_memset(serialState, 0, sizeof(*serialState));
@ -566,7 +566,7 @@ static int ZSTDMT_serialState_init(serialState_t* serialState)
    return initError;
 }

-static void ZSTDMT_serialState_free(serialState_t* serialState)
+static void ZSTDMT_serialState_free(SerialState* serialState)
 {
    ZSTD_customMem cMem = serialState->params.customMem;
    ZSTD_pthread_mutex_destroy(&serialState->mutex);
@ -577,9 +577,10 @@ static void ZSTDMT_serialState_free(serialState_t* serialState)
    ZSTD_customFree(serialState->ldmState.bucketOffsets, cMem);
 }

-static void ZSTDMT_serialState_update(serialState_t* serialState,
-                                      ZSTD_CCtx* jobCCtx, rawSeqStore_t seqStore,
-                                      range_t src, unsigned jobID)
+static void
+ZSTDMT_serialState_genSequences(SerialState* serialState,
+                                RawSeqStore_t* seqStore,
+                                Range src, unsigned jobID)
 {
    /* Wait for our turn */
    ZSTD_PTHREAD_MUTEX_LOCK(&serialState->mutex);
@ -592,12 +593,13 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
        /* It is now our turn, do any processing necessary */
        if (serialState->params.ldmParams.enableLdm == ZSTD_ps_enable) {
            size_t error;
-            assert(seqStore.seq != NULL && seqStore.pos == 0 &&
-                   seqStore.size == 0 && seqStore.capacity > 0);
+            DEBUGLOG(6, "ZSTDMT_serialState_genSequences: LDM update");
+            assert(seqStore->seq != NULL && seqStore->pos == 0 &&
+                   seqStore->size == 0 && seqStore->capacity > 0);
            assert(src.size <= serialState->params.jobSize);
            ZSTD_window_update(&serialState->ldmState.window, src.start, src.size, /* forceNonContiguous */ 0);
            error = ZSTD_ldm_generateSequences(
-                &serialState->ldmState, &seqStore,
+                &serialState->ldmState, seqStore,
                &serialState->params.ldmParams, src.start, src.size);
            /* We provide a large enough buffer to never fail. */
            assert(!ZSTD_isError(error)); (void)error;
@ -616,14 +618,22 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
    serialState->nextJobID++;
    ZSTD_pthread_cond_broadcast(&serialState->cond);
    ZSTD_pthread_mutex_unlock(&serialState->mutex);
+}

-    if (seqStore.size > 0) {
-        ZSTD_referenceExternalSequences(jobCCtx, seqStore.seq, seqStore.size);
-        assert(serialState->params.ldmParams.enableLdm == ZSTD_ps_enable);
+static void
+ZSTDMT_serialState_applySequences(const SerialState* serialState, /* just for an assert() check */
+                                  ZSTD_CCtx* jobCCtx,
+                                  const RawSeqStore_t* seqStore)
+{
+    if (seqStore->size > 0) {
+        DEBUGLOG(5, "ZSTDMT_serialState_applySequences: uploading %u external sequences", (unsigned)seqStore->size);
+        assert(serialState->params.ldmParams.enableLdm == ZSTD_ps_enable); (void)serialState;
+        assert(jobCCtx);
+        ZSTD_referenceExternalSequences(jobCCtx, seqStore->seq, seqStore->size);
    }
 }

-static void ZSTDMT_serialState_ensureFinished(serialState_t* serialState,
+static void ZSTDMT_serialState_ensureFinished(SerialState* serialState,
                                              unsigned jobID, size_t cSize)
 {
    ZSTD_PTHREAD_MUTEX_LOCK(&serialState->mutex);
@ -647,28 +657,28 @@ static void ZSTDMT_serialState_ensureFinished(serialState_t* serialState,
 /* =====          Worker thread         ===== */
 /* ------------------------------------------ */

-static const range_t kNullRange = { NULL, 0 };
+static const Range kNullRange = { NULL, 0 };

 typedef struct {
-    size_t   consumed;                   /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx */
-    size_t   cSize;                      /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx, then set0 by mtctx */
-    ZSTD_pthread_mutex_t job_mutex;      /* Thread-safe - used by mtctx and worker */
-    ZSTD_pthread_cond_t job_cond;        /* Thread-safe - used by mtctx and worker */
-    ZSTDMT_CCtxPool* cctxPool;           /* Thread-safe - used by mtctx and (all) workers */
-    ZSTDMT_bufferPool* bufPool;          /* Thread-safe - used by mtctx and (all) workers */
-    ZSTDMT_seqPool* seqPool;             /* Thread-safe - used by mtctx and (all) workers */
-    serialState_t* serial;               /* Thread-safe - used by mtctx and (all) workers */
-    buffer_t dstBuff;                    /* set by worker (or mtctx), then read by worker & mtctx, then modified by mtctx => no barrier */
-    range_t prefix;                      /* set by mtctx, then read by worker & mtctx => no barrier */
-    range_t src;                         /* set by mtctx, then read by worker & mtctx => no barrier */
-    unsigned jobID;                      /* set by mtctx, then read by worker => no barrier */
-    unsigned firstJob;                   /* set by mtctx, then read by worker => no barrier */
-    unsigned lastJob;                    /* set by mtctx, then read by worker => no barrier */
-    ZSTD_CCtx_params params;             /* set by mtctx, then read by worker => no barrier */
-    const ZSTD_CDict* cdict;             /* set by mtctx, then read by worker => no barrier */
-    unsigned long long fullFrameSize;    /* set by mtctx, then read by worker => no barrier */
-    size_t   dstFlushed;                 /* used only by mtctx */
-    unsigned frameChecksumNeeded;        /* used only by mtctx */
+    size_t   consumed;                 /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx */
+    size_t   cSize;                    /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx, then set0 by mtctx */
+    ZSTD_pthread_mutex_t job_mutex;    /* Thread-safe - used by mtctx and worker */
+    ZSTD_pthread_cond_t job_cond;      /* Thread-safe - used by mtctx and worker */
+    ZSTDMT_CCtxPool* cctxPool;         /* Thread-safe - used by mtctx and (all) workers */
+    ZSTDMT_bufferPool* bufPool;        /* Thread-safe - used by mtctx and (all) workers */
+    ZSTDMT_seqPool* seqPool;           /* Thread-safe - used by mtctx and (all) workers */
+    SerialState* serial;               /* Thread-safe - used by mtctx and (all) workers */
+    Buffer dstBuff;                    /* set by worker (or mtctx), then read by worker & mtctx, then modified by mtctx => no barrier */
+    Range prefix;                      /* set by mtctx, then read by worker & mtctx => no barrier */
+    Range src;                         /* set by mtctx, then read by worker & mtctx => no barrier */
+    unsigned jobID;                    /* set by mtctx, then read by worker => no barrier */
+    unsigned firstJob;                 /* set by mtctx, then read by worker => no barrier */
+    unsigned lastJob;                  /* set by mtctx, then read by worker => no barrier */
+    ZSTD_CCtx_params params;           /* set by mtctx, then read by worker => no barrier */
+    const ZSTD_CDict* cdict;           /* set by mtctx, then read by worker => no barrier */
+    unsigned long long fullFrameSize;  /* set by mtctx, then read by worker => no barrier */
+    size_t   dstFlushed;               /* used only by mtctx */
+    unsigned frameChecksumNeeded;      /* used only by mtctx */
 } ZSTDMT_jobDescription;

 #define JOB_ERROR(e)                                \
@ -685,10 +695,11 @@ static void ZSTDMT_compressionJob(void* jobDescription)
    ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription;
    ZSTD_CCtx_params jobParams = job->params;   /* do not modify job->params ! copy it, modify the copy */
    ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(job->cctxPool);
-    rawSeqStore_t rawSeqStore = ZSTDMT_getSeq(job->seqPool);
-    buffer_t dstBuff = job->dstBuff;
+    RawSeqStore_t rawSeqStore = ZSTDMT_getSeq(job->seqPool);
+    Buffer dstBuff = job->dstBuff;
    size_t lastCBlockSize = 0;

+    DEBUGLOG(5, "ZSTDMT_compressionJob: job %u", job->jobID);
    /* resources */
    if (cctx==NULL) JOB_ERROR(ERROR(memory_allocation));
    if (dstBuff.start == NULL) {   /* streaming job : doesn't provide a dstBuffer */
@ -710,11 +721,15 @@ static void ZSTDMT_compressionJob(void* jobDescription)


    /* init */
+
+    /* Perform serial step as early as possible */
+    ZSTDMT_serialState_genSequences(job->serial, &rawSeqStore, job->src, job->jobID);
+
    if (job->cdict) {
        size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, &jobParams, job->fullFrameSize);
        assert(job->firstJob);  /* only allowed for first job */
        if (ZSTD_isError(initError)) JOB_ERROR(initError);
-    } else {  /* srcStart points at reloaded section */
+    } else {
        U64 const pledgedSrcSize = job->firstJob ? job->fullFrameSize : job->src.size;
        {   size_t const forceWindowError = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob);
            if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError);
@ -723,16 +738,17 @@ static void ZSTDMT_compressionJob(void* jobDescription)
            size_t const err = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_deterministicRefPrefix, 0);
            if (ZSTD_isError(err)) JOB_ERROR(err);
        }
+        DEBUGLOG(6, "ZSTDMT_compressionJob: job %u: loading prefix of size %zu", job->jobID, job->prefix.size);
        {   size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
-                                        job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
+                                        job->prefix.start, job->prefix.size, ZSTD_dct_rawContent,
                                        ZSTD_dtlm_fast,
                                        NULL, /*cdict*/
                                        &jobParams, pledgedSrcSize);
            if (ZSTD_isError(initError)) JOB_ERROR(initError);
    }   }

-    /* Perform serial step as early as possible, but after CCtx initialization */
-    ZSTDMT_serialState_update(job->serial, cctx, rawSeqStore, job->src, job->jobID);
+    /* External Sequences can only be applied after CCtx initialization */
+    ZSTDMT_serialState_applySequences(job->serial, cctx, &rawSeqStore);

    if (!job->firstJob) {  /* flush and overwrite frame header when it's not first job */
        size_t const hSize = ZSTD_compressContinue_public(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0);
@ -741,7 +757,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
        ZSTD_invalidateRepCodes(cctx);
    }

-    /* compress */
+    /* compress the entire job by smaller chunks, for better granularity */
    {   size_t const chunkSize = 4*ZSTD_BLOCKSIZE_MAX;
        int const nbChunks = (int)((job->src.size + (chunkSize-1)) / chunkSize);
        const BYTE* ip = (const BYTE*) job->src.start;
@ -809,10 +825,10 @@ _endJob:
 /* ------------------------------------------ */

 typedef struct {
-    range_t prefix;         /* read-only non-owned prefix buffer */
-    buffer_t buffer;
+    Range prefix;         /* read-only non-owned prefix buffer */
+    Buffer buffer;
    size_t filled;
-} inBuff_t;
+} InBuff_t;

 typedef struct {
  BYTE* buffer;     /* The round input buffer. All jobs get references
@ -826,9 +842,9 @@ typedef struct {
                     * the inBuff is sent to the worker thread.
                     * pos <= capacity.
                     */
-} roundBuff_t;
+} RoundBuff_t;

-static const roundBuff_t kNullRoundBuff = {NULL, 0, 0};
+static const RoundBuff_t kNullRoundBuff = {NULL, 0, 0};

 #define RSYNC_LENGTH 32
 /* Don't create chunks smaller than the zstd block size.
@ -845,7 +861,7 @@ typedef struct {
  U64 hash;
  U64 hitMask;
  U64 primePower;
-} rsyncState_t;
+} RSyncState_t;

 struct ZSTDMT_CCtx_s {
    POOL_ctx* factory;
@ -857,10 +873,10 @@ struct ZSTDMT_CCtx_s {
    size_t targetSectionSize;
    size_t targetPrefixSize;
    int jobReady;        /* 1 => one job is already prepared, but pool has shortage of workers. Don't create a new job. */
-    inBuff_t inBuff;
-    roundBuff_t roundBuff;
-    serialState_t serial;
-    rsyncState_t rsync;
+    InBuff_t inBuff;
+    RoundBuff_t roundBuff;
+    SerialState serial;
+    RSyncState_t rsync;
    unsigned jobIDMask;
    unsigned doneJobID;
    unsigned nextJobID;
@ -1245,13 +1261,11 @@ size_t ZSTDMT_initCStream_internal(

    /* init */
    if (params.nbWorkers != mtctx->params.nbWorkers)
-        FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) , "");
+        FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, (unsigned)params.nbWorkers) , "");

    if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
    if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = (size_t)ZSTDMT_JOBSIZE_MAX;

-    DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers);
-
    if (mtctx->allJobsCompleted == 0) {   /* previous compression not correctly finished */
        ZSTDMT_waitForAllJobsCompleted(mtctx);
        ZSTDMT_releaseAllJobResources(mtctx);
@ -1260,15 +1274,14 @@ size_t ZSTDMT_initCStream_internal(

    mtctx->params = params;
    mtctx->frameContentSize = pledgedSrcSize;
+    ZSTD_freeCDict(mtctx->cdictLocal);
    if (dict) {
-        ZSTD_freeCDict(mtctx->cdictLocal);
        mtctx->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize,
                                                    ZSTD_dlm_byCopy, dictContentType, /* note : a loadPrefix becomes an internal CDict */
                                                    params.cParams, mtctx->cMem);
        mtctx->cdict = mtctx->cdictLocal;
        if (mtctx->cdictLocal == NULL) return ERROR(memory_allocation);
    } else {
-        ZSTD_freeCDict(mtctx->cdictLocal);
        mtctx->cdictLocal = NULL;
        mtctx->cdict = cdict;
    }
@ -1334,9 +1347,32 @@ size_t ZSTDMT_initCStream_internal(
    mtctx->allJobsCompleted = 0;
    mtctx->consumed = 0;
    mtctx->produced = 0;
+
+    /* update dictionary */
+    ZSTD_freeCDict(mtctx->cdictLocal);
+    mtctx->cdictLocal = NULL;
+    mtctx->cdict = NULL;
+    if (dict) {
+        if (dictContentType == ZSTD_dct_rawContent) {
+            mtctx->inBuff.prefix.start = (const BYTE*)dict;
+            mtctx->inBuff.prefix.size = dictSize;
+        } else {
+            /* note : a loadPrefix becomes an internal CDict */
+            mtctx->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize,
+                                                        ZSTD_dlm_byRef, dictContentType,
+                                                        params.cParams, mtctx->cMem);
+            mtctx->cdict = mtctx->cdictLocal;
+            if (mtctx->cdictLocal == NULL) return ERROR(memory_allocation);
+        }
+    } else {
+        mtctx->cdict = cdict;
+    }
+
    if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize,
                                 dict, dictSize, dictContentType))
        return ERROR(memory_allocation);
+
+
    return 0;
 }

@ -1403,7 +1439,7 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* mtctx, size_t srcSize, ZS
        mtctx->roundBuff.pos += srcSize;
        mtctx->inBuff.buffer = g_nullBuffer;
        mtctx->inBuff.filled = 0;
-        /* Set the prefix */
+        /* Set the prefix for next job */
        if (!endFrame) {
            size_t const newPrefixSize = MIN(srcSize, mtctx->targetPrefixSize);
            mtctx->inBuff.prefix.start = src + srcSize - newPrefixSize;
@ -1540,12 +1576,17 @@ static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, u
 * If the data of the first job is broken up into two segments, we cover both
 * sections.
 */
-static range_t ZSTDMT_getInputDataInUse(ZSTDMT_CCtx* mtctx)
+static Range ZSTDMT_getInputDataInUse(ZSTDMT_CCtx* mtctx)
 {
    unsigned const firstJobID = mtctx->doneJobID;
    unsigned const lastJobID = mtctx->nextJobID;
    unsigned jobID;

+    /* no need to check during first round */
+    size_t roundBuffCapacity = mtctx->roundBuff.capacity;
+    size_t nbJobs1stRoundMin = roundBuffCapacity / mtctx->targetSectionSize;
+    if (lastJobID < nbJobs1stRoundMin) return kNullRange;
+
    for (jobID = firstJobID; jobID < lastJobID; ++jobID) {
        unsigned const wJobID = jobID & mtctx->jobIDMask;
        size_t consumed;
@ -1555,7 +1596,7 @@ static range_t ZSTDMT_getInputDataInUse(ZSTDMT_CCtx* mtctx)
        ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex);

        if (consumed < mtctx->jobs[wJobID].src.size) {
-            range_t range = mtctx->jobs[wJobID].prefix;
+            Range range = mtctx->jobs[wJobID].prefix;
            if (range.size == 0) {
                /* Empty prefix */
                range = mtctx->jobs[wJobID].src;
@ -1571,7 +1612,7 @@ static range_t ZSTDMT_getInputDataInUse(ZSTDMT_CCtx* mtctx)
 /**
 * Returns non-zero iff buffer and range overlap.
 */
-static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range)
+static int ZSTDMT_isOverlapped(Buffer buffer, Range range)
 {
    BYTE const* const bufferStart = (BYTE const*)buffer.start;
    BYTE const* const rangeStart = (BYTE const*)range.start;
@ -1591,10 +1632,10 @@ static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range)
    }
 }

-static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window)
+static int ZSTDMT_doesOverlapWindow(Buffer buffer, ZSTD_window_t window)
 {
-    range_t extDict;
-    range_t prefix;
+    Range extDict;
+    Range prefix;

    DEBUGLOG(5, "ZSTDMT_doesOverlapWindow");
    extDict.start = window.dictBase + window.lowLimit;
@ -1613,7 +1654,7 @@ static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window)
        || ZSTDMT_isOverlapped(buffer, prefix);
 }

-static void ZSTDMT_waitForLdmComplete(ZSTDMT_CCtx* mtctx, buffer_t buffer)
+static void ZSTDMT_waitForLdmComplete(ZSTDMT_CCtx* mtctx, Buffer buffer)
 {
    if (mtctx->params.ldmParams.enableLdm == ZSTD_ps_enable) {
        ZSTD_pthread_mutex_t* mutex = &mtctx->serial.ldmWindowMutex;
@ -1638,16 +1679,16 @@ static void ZSTDMT_waitForLdmComplete(ZSTDMT_CCtx* mtctx, buffer_t buffer)
 */
 static int ZSTDMT_tryGetInputRange(ZSTDMT_CCtx* mtctx)
 {
-    range_t const inUse = ZSTDMT_getInputDataInUse(mtctx);
+    Range const inUse = ZSTDMT_getInputDataInUse(mtctx);
    size_t const spaceLeft = mtctx->roundBuff.capacity - mtctx->roundBuff.pos;
-    size_t const target = mtctx->targetSectionSize;
-    buffer_t buffer;
+    size_t const spaceNeeded = mtctx->targetSectionSize;
+    Buffer buffer;

    DEBUGLOG(5, "ZSTDMT_tryGetInputRange");
    assert(mtctx->inBuff.buffer.start == NULL);
-    assert(mtctx->roundBuff.capacity >= target);
+    assert(mtctx->roundBuff.capacity >= spaceNeeded);

-    if (spaceLeft < target) {
+    if (spaceLeft < spaceNeeded) {
        /* ZSTD_invalidateRepCodes() doesn't work for extDict variants.
         * Simply copy the prefix to the beginning in that case.
         */
@ -1666,7 +1707,7 @@ static int ZSTDMT_tryGetInputRange(ZSTDMT_CCtx* mtctx)
        mtctx->roundBuff.pos = prefixSize;
    }
    buffer.start = mtctx->roundBuff.buffer + mtctx->roundBuff.pos;
-    buffer.capacity = target;
+    buffer.capacity = spaceNeeded;

    if (ZSTDMT_isOverlapped(buffer, inUse)) {
        DEBUGLOG(5, "Waiting for buffer...");
@ -1693,7 +1734,7 @@ static int ZSTDMT_tryGetInputRange(ZSTDMT_CCtx* mtctx)
 typedef struct {
  size_t toLoad;  /* The number of bytes to load from the input. */
  int flush;      /* Boolean declaring if we must flush because we found a synchronization point. */
-} syncPoint_t;
+} SyncPoint;

 /**
 * Searches through the input for a synchronization point. If one is found, we
@ -1701,14 +1742,14 @@ typedef struct {
 * Otherwise, we will load as many bytes as possible and instruct the caller
 * to continue as normal.
 */
-static syncPoint_t
+static SyncPoint
 findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
 {
    BYTE const* const istart = (BYTE const*)input.src + input.pos;
    U64 const primePower = mtctx->rsync.primePower;
    U64 const hitMask = mtctx->rsync.hitMask;

-    syncPoint_t syncPoint;
+    SyncPoint syncPoint;
    U64 hash;
    BYTE const* prev;
    size_t pos;
@ -1840,7 +1881,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
                DEBUGLOG(5, "ZSTDMT_tryGetInputRange completed successfully : mtctx->inBuff.buffer.start = %p", mtctx->inBuff.buffer.start);
        }
        if (mtctx->inBuff.buffer.start != NULL) {
-            syncPoint_t const syncPoint = findSynchronizationPoint(mtctx, *input);
+            SyncPoint const syncPoint = findSynchronizationPoint(mtctx, *input);
            if (syncPoint.flush && endOp == ZSTD_e_continue) {
                endOp = ZSTD_e_flush;
            }
--- a/thirdparty/zstd/compress/zstdmt_compress.h
+++ b/thirdparty/zstd/compress/zstdmt_compress.h
@ -11,10 +11,10 @@
 #ifndef ZSTDMT_COMPRESS_H
 #define ZSTDMT_COMPRESS_H

- #if defined (__cplusplus)
- extern "C" {
- #endif
-
+/* ===   Dependencies   === */
+#include "../common/zstd_deps.h"   /* size_t */
+#define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_parameters */
+#include "../zstd.h"            /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */

 /* Note : This is an internal API.
 *        These APIs used to be exposed with ZSTDLIB_API,
@ -25,12 +25,6 @@
 *        otherwise ZSTDMT_createCCtx*() will fail.
 */

-/* ===   Dependencies   === */
-#include "../common/zstd_deps.h"   /* size_t */
-#define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_parameters */
-#include "../zstd.h"            /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
-
-
 /* ===   Constants   === */
 #ifndef ZSTDMT_NBWORKERS_MAX /* a different value can be selected at compile time */
 #  define ZSTDMT_NBWORKERS_MAX ((sizeof(void*)==4) /*32-bit*/ ? 64 : 256)
@ -105,9 +99,4 @@ void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_p
 */
 ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx);

-
-#if defined (__cplusplus)
-}
-#endif
-
 #endif   /* ZSTDMT_COMPRESS_H */
--- a/thirdparty/zstd/decompress/huf_decompress_amd64.S
+++ b/thirdparty/zstd/decompress/huf_decompress_amd64.S
@ -42,13 +42,11 @@

 /* Calling convention:
 *
- * %rdi contains the first argument: HUF_DecompressAsmArgs*.
+ * %rdi (or %rcx on Windows) contains the first argument: HUF_DecompressAsmArgs*.
 * %rbp isn't maintained (no frame pointer).
 * %rsp contains the stack pointer that grows down.
 *      No red-zone is assumed, only addresses >= %rsp are used.
 * All register contents are preserved.
- *
- * TODO: Support Windows calling convention.
 */

 ZSTD_HIDE_ASM_FUNCTION(HUF_decompress4X1_usingDTable_internal_fast_asm_loop)
@ -137,7 +135,11 @@ HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
    push %r15

    /* Read HUF_DecompressAsmArgs* args from %rax */
+#if defined(_WIN32)
+    movq %rcx, %rax
+#else
    movq %rdi, %rax
+#endif
    movq  0(%rax), %ip0
    movq  8(%rax), %ip1
    movq 16(%rax), %ip2
@ -391,7 +393,12 @@ HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
    push %r14
    push %r15

+    /* Read HUF_DecompressAsmArgs* args from %rax */
+#if defined(_WIN32)
+    movq %rcx, %rax
+#else
    movq %rdi, %rax
+#endif
    movq  0(%rax), %ip0
    movq  8(%rax), %ip1
    movq 16(%rax), %ip2
--- a/thirdparty/zstd/decompress/zstd_decompress.c
+++ b/thirdparty/zstd/decompress/zstd_decompress.c
@ -444,7 +444,7 @@ size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize)
 * @return : 0, `zfhPtr` is correctly filled,
 *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
 **           or an error code, which can be tested using ZSTD_isError() */
-size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format)
+size_t ZSTD_getFrameHeader_advanced(ZSTD_FrameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format)
 {
    const BYTE* ip = (const BYTE*)src;
    size_t const minInputSize = ZSTD_startingInputLength(format);
@ -484,8 +484,10 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s
            if (srcSize < ZSTD_SKIPPABLEHEADERSIZE)
                return ZSTD_SKIPPABLEHEADERSIZE; /* magic number + frame length */
            ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr));
-            zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE);
            zfhPtr->frameType = ZSTD_skippableFrame;
+            zfhPtr->dictID = MEM_readLE32(src) - ZSTD_MAGIC_SKIPPABLE_START;
+            zfhPtr->headerSize = ZSTD_SKIPPABLEHEADERSIZE;
+            zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE);
            return 0;
        }
        RETURN_ERROR(prefix_unknown, "");
@ -554,7 +556,7 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s
 * @return : 0, `zfhPtr` is correctly filled,
 *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
 *           or an error code, which can be tested using ZSTD_isError() */
-size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize)
+size_t ZSTD_getFrameHeader(ZSTD_FrameHeader* zfhPtr, const void* src, size_t srcSize)
 {
    return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1);
 }
@ -572,7 +574,7 @@ unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize)
        return ret == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : ret;
    }
 #endif
-    {   ZSTD_frameHeader zfh;
+    {   ZSTD_FrameHeader zfh;
        if (ZSTD_getFrameHeader(&zfh, src, srcSize) != 0)
            return ZSTD_CONTENTSIZE_ERROR;
        if (zfh.frameType == ZSTD_skippableFrame) {
@ -750,7 +752,7 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize
        const BYTE* const ipstart = ip;
        size_t remainingSize = srcSize;
        size_t nbBlocks = 0;
-        ZSTD_frameHeader zfh;
+        ZSTD_FrameHeader zfh;

        /* Extract Frame Header */
        {   size_t const ret = ZSTD_getFrameHeader_advanced(&zfh, src, srcSize, format);
@ -811,7 +813,7 @@ size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)

 /** ZSTD_decompressBound() :
 *  compatible with legacy mode
- *  `src` must point to the start of a ZSTD frame or a skippeable frame
+ *  `src` must point to the start of a ZSTD frame or a skippable frame
 *  `srcSize` must be at least as large as the frame contained
 *  @return : the maximum decompressed size of the compressed source
 */
@ -843,7 +845,7 @@ size_t ZSTD_decompressionMargin(void const* src, size_t srcSize)
        ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, ZSTD_f_zstd1);
        size_t const compressedSize = frameSizeInfo.compressedSize;
        unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
-        ZSTD_frameHeader zfh;
+        ZSTD_FrameHeader zfh;

        FORWARD_IF_ERROR(ZSTD_getFrameHeader(&zfh, src, srcSize), "");
        if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
@ -917,7 +919,7 @@ static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity,
    return regenSize;
 }

-static void ZSTD_DCtx_trace_end(ZSTD_DCtx const* dctx, U64 uncompressedSize, U64 compressedSize, unsigned streaming)
+static void ZSTD_DCtx_trace_end(ZSTD_DCtx const* dctx, U64 uncompressedSize, U64 compressedSize, int streaming)
 {
 #if ZSTD_TRACE
    if (dctx->traceCtx && ZSTD_trace_decompress_end != NULL) {
@ -1057,7 +1059,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
    }
    ZSTD_DCtx_trace_end(dctx, (U64)(op-ostart), (U64)(ip-istart), /* streaming */ 0);
    /* Allow caller to get size read */
-    DEBUGLOG(4, "ZSTD_decompressFrame: decompressed frame of size %zi, consuming %zi bytes of input", op-ostart, ip - (const BYTE*)*srcPtr);
+    DEBUGLOG(4, "ZSTD_decompressFrame: decompressed frame of size %i, consuming %i bytes of input", (int)(op-ostart), (int)(ip - (const BYTE*)*srcPtr));
    *srcPtr = ip;
    *srcSizePtr = remainingSrcSize;
    return (size_t)(op-ostart);
@ -1641,7 +1643,7 @@ unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize)
 *  ZSTD_getFrameHeader(), which will provide a more precise error code. */
 unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize)
 {
-    ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0, 0, 0 };
+    ZSTD_FrameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0, 0, 0 };
    size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize);
    if (ZSTD_isError(hError)) return 0;
    return zfp.dictID;
@ -1999,7 +2001,7 @@ size_t ZSTD_estimateDStreamSize(size_t windowSize)
 size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize)
 {
    U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX;   /* note : should be user-selectable, but requires an additional parameter (or a dctx) */
-    ZSTD_frameHeader zfh;
+    ZSTD_FrameHeader zfh;
    size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize);
    if (ZSTD_isError(err)) return err;
    RETURN_ERROR_IF(err>0, srcSize_wrong, "");
@ -2094,6 +2096,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
    U32 someMoreWork = 1;

    DEBUGLOG(5, "ZSTD_decompressStream");
+    assert(zds != NULL);
    RETURN_ERROR_IF(
        input->pos > input->size,
        srcSize_wrong,
--- a/thirdparty/zstd/decompress/zstd_decompress_block.c
+++ b/thirdparty/zstd/decompress/zstd_decompress_block.c
@ -139,7 +139,7 @@ static size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
    RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");

    {   const BYTE* const istart = (const BYTE*) src;
-        symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
+        SymbolEncodingType_e const litEncType = (SymbolEncodingType_e)(istart[0] & 3);
        size_t const blockSizeMax = ZSTD_blockSizeMax(dctx);

        switch(litEncType)
@ -358,7 +358,7 @@ size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx,
 * - start from default distributions, present in /lib/common/zstd_internal.h
 * - generate tables normally, using ZSTD_buildFSETable()
 * - printout the content of tables
- * - pretify output, report below, test with fuzzer to ensure it's correct */
+ * - prettify output, report below, test with fuzzer to ensure it's correct */

 /* Default FSE distribution table for Literal Lengths */
 static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
@ -645,7 +645,7 @@ void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
 * @return : nb bytes read from src,
 *           or an error code if it fails */
 static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
-                                 symbolEncodingType_e type, unsigned max, U32 maxLog,
+                                 SymbolEncodingType_e type, unsigned max, U32 maxLog,
                                 const void* src, size_t srcSize,
                                 const U32* baseValue, const U8* nbAdditionalBits,
                                 const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
@ -728,9 +728,9 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
    /* FSE table descriptors */
    RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
    RETURN_ERROR_IF(*ip & 3, corruption_detected, ""); /* The last field, Reserved, must be all-zeroes. */
-    {   symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
-        symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
-        symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
+    {   SymbolEncodingType_e const LLtype = (SymbolEncodingType_e)(*ip >> 6);
+        SymbolEncodingType_e const OFtype = (SymbolEncodingType_e)((*ip >> 4) & 3);
+        SymbolEncodingType_e const MLtype = (SymbolEncodingType_e)((*ip >> 2) & 3);
        ip++;

        /* Build DTables */
@ -1935,12 +1935,6 @@ ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,

 #endif /* DYNAMIC_BMI2 */

-typedef size_t (*ZSTD_decompressSequences_t)(
-                            ZSTD_DCtx* dctx,
-                            void* dst, size_t maxDstSize,
-                            const void* seqStart, size_t seqSize, int nbSeq,
-                            const ZSTD_longOffset_e isLongOffset);
-
 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
 static size_t
 ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
--- a/thirdparty/zstd/decompress/zstd_decompress_internal.h
+++ b/thirdparty/zstd/decompress/zstd_decompress_internal.h
@ -136,7 +136,7 @@ struct ZSTD_DCtx_s
    const void* virtualStart;     /* virtual start of previous segment if it was just before current one */
    const void* dictEnd;          /* end of previous segment */
    size_t expected;
-    ZSTD_frameHeader fParams;
+    ZSTD_FrameHeader fParams;
    U64 processedCSize;
    U64 decodedSize;
    blockType_e bType;            /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
@ -154,7 +154,7 @@ struct ZSTD_DCtx_s
    size_t rleSize;
    size_t staticSize;
    int isFrameDecompression;
-#if DYNAMIC_BMI2 != 0
+#if DYNAMIC_BMI2
    int bmi2;                     /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
 #endif

@ -211,11 +211,11 @@ struct ZSTD_DCtx_s
 };  /* typedef'd to ZSTD_DCtx within "zstd.h" */

 MEM_STATIC int ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s *dctx) {
-#if DYNAMIC_BMI2 != 0
-	return dctx->bmi2;
+#if DYNAMIC_BMI2
+    return dctx->bmi2;
 #else
    (void)dctx;
-	return 0;
+    return 0;
 #endif
 }

--- a/thirdparty/zstd/zstd.h
+++ b/thirdparty/zstd/zstd.h
@ -7,17 +7,22 @@
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
 */
-#if defined (__cplusplus)
-extern "C" {
-#endif

 #ifndef ZSTD_H_235446
 #define ZSTD_H_235446

+
 /* ======   Dependencies   ======*/
-#include <limits.h>   /* INT_MAX */
 #include <stddef.h>   /* size_t */

+#include "zstd_errors.h" /* list of errors */
+#if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY)
+#include <limits.h>   /* INT_MAX */
+#endif /* ZSTD_STATIC_LINKING_ONLY */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif

 /* =====   ZSTDLIB_API : control library symbols visibility   ===== */
 #ifndef ZSTDLIB_VISIBLE
@ -57,7 +62,7 @@ extern "C" {
 #else
 #  if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
 #    define ZSTD_DEPRECATED(message) [[deprecated(message)]]
-#  elif (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__)
+#  elif (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__) || defined(__IAR_SYSTEMS_ICC__)
 #    define ZSTD_DEPRECATED(message) __attribute__((deprecated(message)))
 #  elif defined(__GNUC__) && (__GNUC__ >= 3)
 #    define ZSTD_DEPRECATED(message) __attribute__((deprecated))
@ -106,7 +111,7 @@ extern "C" {
 /*------   Version   ------*/
 #define ZSTD_VERSION_MAJOR    1
 #define ZSTD_VERSION_MINOR    5
-#define ZSTD_VERSION_RELEASE  6
+#define ZSTD_VERSION_RELEASE  7
 #define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)

 /*! ZSTD_versionNumber() :
@ -144,7 +149,7 @@ ZSTDLIB_API const char* ZSTD_versionString(void);


 /***************************************
-*  Simple API
+*  Simple Core API
 ***************************************/
 /*! ZSTD_compress() :
 *  Compresses `src` content as a single zstd compressed frame into already allocated `dst`.
@ -157,68 +162,80 @@ ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity,
                                  int compressionLevel);

 /*! ZSTD_decompress() :
- *  `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames.
- *  `dstCapacity` is an upper bound of originalSize to regenerate.
- *  If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
- *  @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
- *            or an errorCode if it fails (which can be tested using ZSTD_isError()). */
+ * `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames.
+ *  Multiple compressed frames can be decompressed at once with this method.
+ *  The result will be the concatenation of all decompressed frames, back to back.
+ * `dstCapacity` is an upper bound of originalSize to regenerate.
+ *  First frame's decompressed size can be extracted using ZSTD_getFrameContentSize().
+ *  If maximum upper bound isn't known, prefer using streaming mode to decompress data.
+ * @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
+ *           or an errorCode if it fails (which can be tested using ZSTD_isError()). */
 ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity,
                              const void* src, size_t compressedSize);

+
+/*======  Decompression helper functions  ======*/
+
 /*! ZSTD_getFrameContentSize() : requires v1.3.0+
- *  `src` should point to the start of a ZSTD encoded frame.
- *  `srcSize` must be at least as large as the frame header.
- *            hint : any size >= `ZSTD_frameHeaderSize_max` is large enough.
- *  @return : - decompressed size of `src` frame content, if known
- *            - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
- *            - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small)
- *   note 1 : a 0 return value means the frame is valid but "empty".
- *   note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode.
- *            When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
- *            In which case, it's necessary to use streaming mode to decompress data.
- *            Optionally, application can rely on some implicit limit,
- *            as ZSTD_decompress() only needs an upper bound of decompressed size.
- *            (For example, data could be necessarily cut into blocks <= 16 KB).
- *   note 3 : decompressed size is always present when compression is completed using single-pass functions,
- *            such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict().
- *   note 4 : decompressed size can be very large (64-bits value),
- *            potentially larger than what local system can handle as a single memory segment.
- *            In which case, it's necessary to use streaming mode to decompress data.
- *   note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified.
- *            Always ensure return value fits within application's authorized limits.
- *            Each application can set its own limits.
- *   note 6 : This function replaces ZSTD_getDecompressedSize() */
+ * `src` should point to the start of a ZSTD encoded frame.
+ * `srcSize` must be at least as large as the frame header.
+ *           hint : any size >= `ZSTD_frameHeaderSize_max` is large enough.
+ * @return : - decompressed size of `src` frame content, if known
+ *           - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
+ *           - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small)
+ *  note 1 : a 0 return value means the frame is valid but "empty".
+ *           When invoking this method on a skippable frame, it will return 0.
+ *  note 2 : decompressed size is an optional field, it may not be present (typically in streaming mode).
+ *           When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
+ *           In which case, it's necessary to use streaming mode to decompress data.
+ *           Optionally, application can rely on some implicit limit,
+ *           as ZSTD_decompress() only needs an upper bound of decompressed size.
+ *           (For example, data could be necessarily cut into blocks <= 16 KB).
+ *  note 3 : decompressed size is always present when compression is completed using single-pass functions,
+ *           such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict().
+ *  note 4 : decompressed size can be very large (64-bits value),
+ *           potentially larger than what local system can handle as a single memory segment.
+ *           In which case, it's necessary to use streaming mode to decompress data.
+ *  note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified.
+ *           Always ensure return value fits within application's authorized limits.
+ *           Each application can set its own limits.
+ *  note 6 : This function replaces ZSTD_getDecompressedSize() */
 #define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
 #define ZSTD_CONTENTSIZE_ERROR   (0ULL - 2)
 ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);

-/*! ZSTD_getDecompressedSize() :
- *  NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize().
+/*! ZSTD_getDecompressedSize() (obsolete):
+ *  This function is now obsolete, in favor of ZSTD_getFrameContentSize().
 *  Both functions work the same way, but ZSTD_getDecompressedSize() blends
 *  "empty", "unknown" and "error" results to the same return value (0),
 *  while ZSTD_getFrameContentSize() gives them separate return values.
 * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */
 ZSTD_DEPRECATED("Replaced by ZSTD_getFrameContentSize")
-ZSTDLIB_API
-unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
+ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);

 /*! ZSTD_findFrameCompressedSize() : Requires v1.4.0+
 * `src` should point to the start of a ZSTD frame or skippable frame.
 * `srcSize` must be >= first frame size
 * @return : the compressed size of the first frame starting at `src`,
 *           suitable to pass as `srcSize` to `ZSTD_decompress` or similar,
- *        or an error code if input is invalid */
+ *           or an error code if input is invalid
+ *  Note 1: this method is called _find*() because it's not enough to read the header,
+ *          it may have to scan through the frame's content, to reach its end.
+ *  Note 2: this method also works with Skippable Frames. In which case,
+ *          it returns the size of the complete skippable frame,
+ *          which is always equal to its content size + 8 bytes for headers. */
 ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize);


-/*======  Helper functions  ======*/
-/* ZSTD_compressBound() :
+/*======  Compression helper functions  ======*/
+
+/*! ZSTD_compressBound() :
 * maximum compressed size in worst case single-pass scenario.
- * When invoking `ZSTD_compress()` or any other one-pass compression function,
+ * When invoking `ZSTD_compress()`, or any other one-pass compression function,
 * it's recommended to provide @dstCapacity >= ZSTD_compressBound(srcSize)
 * as it eliminates one potential failure scenario,
 * aka not enough room in dst buffer to write the compressed frame.
- * Note : ZSTD_compressBound() itself can fail, if @srcSize > ZSTD_MAX_INPUT_SIZE .
+ * Note : ZSTD_compressBound() itself can fail, if @srcSize >= ZSTD_MAX_INPUT_SIZE .
 *        In which case, ZSTD_compressBound() will return an error code
 *        which can be tested using ZSTD_isError().
 *
@ -226,21 +243,25 @@ ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize)
 * same as ZSTD_compressBound(), but as a macro.
 * It can be used to produce constants, which can be useful for static allocation,
 * for example to size a static array on stack.
- * Will produce constant value 0 if srcSize too large.
+ * Will produce constant value 0 if srcSize is too large.
 */
 #define ZSTD_MAX_INPUT_SIZE ((sizeof(size_t)==8) ? 0xFF00FF00FF00FF00ULL : 0xFF00FF00U)
 #define ZSTD_COMPRESSBOUND(srcSize)   (((size_t)(srcSize) >= ZSTD_MAX_INPUT_SIZE) ? 0 : (srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0))  /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */
 ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */
+
+
+/*======  Error helper functions  ======*/
 /* ZSTD_isError() :
 * Most ZSTD_* functions returning a size_t value can be tested for error,
 * using ZSTD_isError().
 * @return 1 if error, 0 otherwise
 */
-ZSTDLIB_API unsigned    ZSTD_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
-ZSTDLIB_API const char* ZSTD_getErrorName(size_t code);     /*!< provides readable string from an error code */
-ZSTDLIB_API int         ZSTD_minCLevel(void);               /*!< minimum negative compression level allowed, requires v1.4.0+ */
-ZSTDLIB_API int         ZSTD_maxCLevel(void);               /*!< maximum compression level available */
-ZSTDLIB_API int         ZSTD_defaultCLevel(void);           /*!< default compression level, specified by ZSTD_CLEVEL_DEFAULT, requires v1.5.0+ */
+ZSTDLIB_API unsigned     ZSTD_isError(size_t result);      /*!< tells if a `size_t` function result is an error code */
+ZSTDLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult); /* convert a result into an error code, which can be compared to error enum list */
+ZSTDLIB_API const char*  ZSTD_getErrorName(size_t result); /*!< provides readable string from a function result */
+ZSTDLIB_API int          ZSTD_minCLevel(void);             /*!< minimum negative compression level allowed, requires v1.4.0+ */
+ZSTDLIB_API int          ZSTD_maxCLevel(void);             /*!< maximum compression level available */
+ZSTDLIB_API int          ZSTD_defaultCLevel(void);         /*!< default compression level, specified by ZSTD_CLEVEL_DEFAULT, requires v1.5.0+ */


 /***************************************
@ -248,17 +269,17 @@ ZSTDLIB_API int         ZSTD_defaultCLevel(void);           /*!< default compres
 ***************************************/
 /*= Compression context
 *  When compressing many times,
- *  it is recommended to allocate a context just once,
+ *  it is recommended to allocate a compression context just once,
 *  and reuse it for each successive compression operation.
- *  This will make workload friendlier for system's memory.
+ *  This will make the workload easier for system's memory.
 *  Note : re-using context is just a speed / resource optimization.
 *         It doesn't change the compression ratio, which remains identical.
- *  Note 2 : In multi-threaded environments,
- *         use one different context per thread for parallel execution.
+ *  Note 2: For parallel execution in multi-threaded environments,
+ *         use one different context per thread .
 */
 typedef struct ZSTD_CCtx_s ZSTD_CCtx;
 ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
-ZSTDLIB_API size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);  /* accept NULL pointer */
+ZSTDLIB_API size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);  /* compatible with NULL pointer */

 /*! ZSTD_compressCCtx() :
 *  Same as ZSTD_compress(), using an explicit ZSTD_CCtx.
@ -266,7 +287,7 @@ ZSTDLIB_API size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);  /* accept NULL pointer *
 *  this function compresses at the requested compression level,
 *  __ignoring any other advanced parameter__ .
 *  If any advanced parameter was set using the advanced API,
- *  they will all be reset. Only `compressionLevel` remains.
+ *  they will all be reset. Only @compressionLevel remains.
 */
 ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
                                     void* dst, size_t dstCapacity,
@ -392,7 +413,7 @@ typedef enum {
                              * Special: value 0 means "use default strategy". */

    ZSTD_c_targetCBlockSize=130, /* v1.5.6+
-                                  * Attempts to fit compressed block size into approximatively targetCBlockSize.
+                                  * Attempts to fit compressed block size into approximately targetCBlockSize.
                                  * Bound by ZSTD_TARGETCBLOCKSIZE_MIN and ZSTD_TARGETCBLOCKSIZE_MAX.
                                  * Note that it's not a guarantee, just a convergence target (default:0).
                                  * No target when targetCBlockSize == 0.
@ -488,7 +509,8 @@ typedef enum {
     * ZSTD_c_stableOutBuffer
     * ZSTD_c_blockDelimiters
     * ZSTD_c_validateSequences
-     * ZSTD_c_useBlockSplitter
+     * ZSTD_c_blockSplitterLevel
+     * ZSTD_c_splitAfterSequences
     * ZSTD_c_useRowMatchFinder
     * ZSTD_c_prefetchCDictTables
     * ZSTD_c_enableSeqProducerFallback
@ -515,7 +537,8 @@ typedef enum {
     ZSTD_c_experimentalParam16=1013,
     ZSTD_c_experimentalParam17=1014,
     ZSTD_c_experimentalParam18=1015,
-     ZSTD_c_experimentalParam19=1016
+     ZSTD_c_experimentalParam19=1016,
+     ZSTD_c_experimentalParam20=1017
 } ZSTD_cParameter;

 typedef struct {
@ -855,7 +878,7 @@ ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
 *
 *  A ZSTD_DStream object is required to track streaming operations.
 *  Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
-*  ZSTD_DStream objects can be reused multiple times.
+*  ZSTD_DStream objects can be re-employed multiple times.
 *
 *  Use ZSTD_initDStream() to start a new decompression operation.
 * @return : recommended first input size
@ -865,16 +888,21 @@ ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
 *  The function will update both `pos` fields.
 *  If `input.pos < input.size`, some input has not been consumed.
 *  It's up to the caller to present again remaining data.
+*
 *  The function tries to flush all data decoded immediately, respecting output buffer size.
 *  If `output.pos < output.size`, decoder has flushed everything it could.
-*  But if `output.pos == output.size`, there might be some data left within internal buffers.,
+*
+*  However, when `output.pos == output.size`, it's more difficult to know.
+*  If @return > 0, the frame is not complete, meaning
+*  either there is still some data left to flush within internal buffers,
+*  or there is more input to read to complete the frame (or both).
 *  In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer.
 *  Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX.
 * @return : 0 when a frame is completely decoded and fully flushed,
 *        or an error code, which can be tested using ZSTD_isError(),
 *        or any other value > 0, which means there is still some decoding or flushing to do to complete current frame :
 *                                the return value is a suggested next input size (just a hint for better latency)
-*                                that will never request more than the remaining frame size.
+*                                that will never request more than the remaining content of the compressed frame.
 * *******************************************************************************/

 typedef ZSTD_DCtx ZSTD_DStream;  /**< DCtx and DStream are now effectively same object (>= v1.3.0) */
@ -901,9 +929,10 @@ ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds);
 * Function will update both input and output `pos` fields exposing current state via these fields:
 * - `input.pos < input.size`, some input remaining and caller should provide remaining input
 *   on the next call.
- * - `output.pos < output.size`, decoder finished and flushed all remaining buffers.
- * - `output.pos == output.size`, potentially uncflushed data present in the internal buffers,
- *   call ZSTD_decompressStream() again to flush remaining data to output.
+ * - `output.pos < output.size`, decoder flushed internal output buffer.
+ * - `output.pos == output.size`, unflushed data potentially present in the internal buffers,
+ *   check ZSTD_decompressStream() @return value,
+ *   if > 0, invoke it again to flush remaining data to output.
 * Note : with no additional input, amount of data flushed <= ZSTD_BLOCKSIZE_MAX.
 *
 * @return : 0 when a frame is completely decoded and fully flushed,
@ -1181,6 +1210,10 @@ ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
 ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict);
 ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);

+#if defined (__cplusplus)
+}
+#endif
+
 #endif  /* ZSTD_H_235446 */


@ -1196,6 +1229,10 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
 #if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY)
 #define ZSTD_H_ZSTD_STATIC_LINKING_ONLY

+#if defined (__cplusplus)
+extern "C" {
+#endif
+
 /* This can be overridden externally to hide static symbols. */
 #ifndef ZSTDLIB_STATIC_API
 #  if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
@ -1307,7 +1344,7 @@ typedef struct {
                               *
                               * Note: This field is optional. ZSTD_generateSequences() will calculate the value of
                               * 'rep', but repeat offsets do not necessarily need to be calculated from an external
-                               * sequence provider's perspective. For example, ZSTD_compressSequences() does not
+                               * sequence provider perspective. For example, ZSTD_compressSequences() does not
                               * use this 'rep' field at all (as of now).
                               */
 } ZSTD_Sequence;
@ -1412,14 +1449,15 @@ typedef enum {
 } ZSTD_literalCompressionMode_e;

 typedef enum {
-  /* Note: This enum controls features which are conditionally beneficial. Zstd typically will make a final
-   * decision on whether or not to enable the feature (ZSTD_ps_auto), but setting the switch to ZSTD_ps_enable
-   * or ZSTD_ps_disable allow for a force enable/disable the feature.
+  /* Note: This enum controls features which are conditionally beneficial.
+   * Zstd can take a decision on whether or not to enable the feature (ZSTD_ps_auto),
+   * but setting the switch to ZSTD_ps_enable or ZSTD_ps_disable force enable/disable the feature.
   */
  ZSTD_ps_auto = 0,         /* Let the library automatically determine whether the feature shall be enabled */
  ZSTD_ps_enable = 1,       /* Force-enable the feature */
  ZSTD_ps_disable = 2       /* Do not use the feature */
-} ZSTD_paramSwitch_e;
+} ZSTD_ParamSwitch_e;
+#define ZSTD_paramSwitch_e ZSTD_ParamSwitch_e  /* old name */

 /***************************************
 *  Frame header and size functions
@ -1464,34 +1502,36 @@ ZSTDLIB_STATIC_API unsigned long long ZSTD_findDecompressedSize(const void* src,
 ZSTDLIB_STATIC_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize);

 /*! ZSTD_frameHeaderSize() :
- *  srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX.
+ *  srcSize must be large enough, aka >= ZSTD_FRAMEHEADERSIZE_PREFIX.
 * @return : size of the Frame Header,
 *           or an error code (if srcSize is too small) */
 ZSTDLIB_STATIC_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);

-typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e;
+typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_FrameType_e;
+#define ZSTD_frameType_e ZSTD_FrameType_e /* old name */
 typedef struct {
    unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */
    unsigned long long windowSize;       /* can be very large, up to <= frameContentSize */
    unsigned blockSizeMax;
-    ZSTD_frameType_e frameType;          /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */
+    ZSTD_FrameType_e frameType;          /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */
    unsigned headerSize;
-    unsigned dictID;
+    unsigned dictID;                     /* for ZSTD_skippableFrame, contains the skippable magic variant [0-15] */
    unsigned checksumFlag;
    unsigned _reserved1;
    unsigned _reserved2;
-} ZSTD_frameHeader;
+} ZSTD_FrameHeader;
+#define ZSTD_frameHeader ZSTD_FrameHeader /* old name */

 /*! ZSTD_getFrameHeader() :
- *  decode Frame Header, or requires larger `srcSize`.
- * @return : 0, `zfhPtr` is correctly filled,
- *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
+ *  decode Frame Header into `zfhPtr`, or requires larger `srcSize`.
+ * @return : 0 => header is complete, `zfhPtr` is correctly filled,
+ *          >0 => `srcSize` is too small, @return value is the wanted `srcSize` amount, `zfhPtr` is not filled,
 *           or an error code, which can be tested using ZSTD_isError() */
-ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize);   /**< doesn't consume input */
+ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader(ZSTD_FrameHeader* zfhPtr, const void* src, size_t srcSize);
 /*! ZSTD_getFrameHeader_advanced() :
 *  same as ZSTD_getFrameHeader(),
 *  with added capability to select a format (like ZSTD_f_zstd1_magicless) */
-ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format);
+ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader_advanced(ZSTD_FrameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format);

 /*! ZSTD_decompressionMargin() :
 * Zstd supports in-place decompression, where the input and output buffers overlap.
@ -1539,9 +1579,10 @@ ZSTDLIB_STATIC_API size_t ZSTD_decompressionMargin(const void* src, size_t srcSi
    ))

 typedef enum {
-  ZSTD_sf_noBlockDelimiters = 0,         /* Representation of ZSTD_Sequence has no block delimiters, sequences only */
-  ZSTD_sf_explicitBlockDelimiters = 1    /* Representation of ZSTD_Sequence contains explicit block delimiters */
-} ZSTD_sequenceFormat_e;
+  ZSTD_sf_noBlockDelimiters = 0,         /* ZSTD_Sequence[] has no block delimiters, just sequences */
+  ZSTD_sf_explicitBlockDelimiters = 1    /* ZSTD_Sequence[] contains explicit block delimiters */
+} ZSTD_SequenceFormat_e;
+#define ZSTD_sequenceFormat_e ZSTD_SequenceFormat_e /* old name */

 /*! ZSTD_sequenceBound() :
 * `srcSize` : size of the input buffer
@ -1565,7 +1606,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_sequenceBound(size_t srcSize);
 * @param zc The compression context to be used for ZSTD_compress2(). Set any
 *           compression parameters you need on this context.
 * @param outSeqs The output sequences buffer of size @p outSeqsSize
- * @param outSeqsSize The size of the output sequences buffer.
+ * @param outSeqsCapacity The size of the output sequences buffer.
 *                    ZSTD_sequenceBound(srcSize) is an upper bound on the number
 *                    of sequences that can be generated.
 * @param src The source buffer to generate sequences from of size @p srcSize.
@ -1583,7 +1624,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_sequenceBound(size_t srcSize);
 ZSTD_DEPRECATED("For debugging only, will be replaced by ZSTD_extractSequences()")
 ZSTDLIB_STATIC_API size_t
 ZSTD_generateSequences(ZSTD_CCtx* zc,
-                       ZSTD_Sequence* outSeqs, size_t outSeqsSize,
+                       ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
                       const void* src, size_t srcSize);

 /*! ZSTD_mergeBlockDelimiters() :
@ -1603,7 +1644,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, si
 * Compress an array of ZSTD_Sequence, associated with @src buffer, into dst.
 * @src contains the entire input (not just the literals).
 * If @srcSize > sum(sequence.length), the remaining bytes are considered all literals
- * If a dictionary is included, then the cctx should reference the dict. (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.)
+ * If a dictionary is included, then the cctx should reference the dict (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.).
 * The entire source is compressed into a single frame.
 *
 * The compression behavior changes based on cctx params. In particular:
@ -1612,11 +1653,17 @@ ZSTDLIB_STATIC_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, si
 *    the block size derived from the cctx, and sequences may be split. This is the default setting.
 *
 *    If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain
- *    block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided.
+ *    valid block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided.
 *
- *    If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined
- *    behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for
- *    specifics regarding offset/matchlength requirements) then the function will bail out and return an error.
+ *    When ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, it's possible to decide generating repcodes
+ *    using the advanced parameter ZSTD_c_repcodeResolution. Repcodes will improve compression ratio, though the benefit
+ *    can vary greatly depending on Sequences. On the other hand, repcode resolution is an expensive operation.
+ *    By default, it's disabled at low (<10) compression levels, and enabled above the threshold (>=10).
+ *    ZSTD_c_repcodeResolution makes it possible to directly manage this processing in either direction.
+ *
+ *    If ZSTD_c_validateSequences == 0, this function blindly accepts the Sequences provided. Invalid Sequences cause undefined
+ *    behavior. If ZSTD_c_validateSequences == 1, then the function will detect invalid Sequences (see doc/zstd_compression_format.md for
+ *    specifics regarding offset/matchlength requirements) and then bail out and return an error.
 *
 *    In addition to the two adjustable experimental params, there are other important cctx params.
 *    - ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN.
@ -1624,15 +1671,42 @@ ZSTDLIB_STATIC_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, si
 *    - ZSTD_c_windowLog affects offset validation: this function will return an error at higher debug levels if a provided offset
 *      is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md
 *
- * Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused.
- * Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly,
- *         and cannot emit an RLE block that disagrees with the repcode history
+ * Note: Repcodes are, as of now, always re-calculated within this function, ZSTD_Sequence.rep is effectively unused.
+ * Dev Note: Once ability to ingest repcodes become available, the explicit block delims mode must respect those repcodes exactly,
+ *         and cannot emit an RLE block that disagrees with the repcode history.
 * @return : final compressed size, or a ZSTD error code.
 */
 ZSTDLIB_STATIC_API size_t
-ZSTD_compressSequences( ZSTD_CCtx* cctx, void* dst, size_t dstSize,
-                        const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
-                        const void* src, size_t srcSize);
+ZSTD_compressSequences(ZSTD_CCtx* cctx,
+                       void* dst, size_t dstCapacity,
+                 const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
+                 const void* src, size_t srcSize);
+
+
+/*! ZSTD_compressSequencesAndLiterals() :
+ * This is a variant of ZSTD_compressSequences() which,
+ * instead of receiving (src,srcSize) as input parameter, receives (literals,litSize),
+ * aka all the literals, already extracted and laid out into a single continuous buffer.
+ * This can be useful if the process generating the sequences also happens to generate the buffer of literals,
+ * thus skipping an extraction + caching stage.
+ * It's a speed optimization, useful when the right conditions are met,
+ * but it also features the following limitations:
+ * - Only supports explicit delimiter mode
+ * - Currently does not support Sequences validation (so input Sequences are trusted)
+ * - Not compatible with frame checksum, which must be disabled
+ * - If any block is incompressible, will fail and return an error
+ * - @litSize must be == sum of all @.litLength fields in @inSeqs. Any discrepancy will generate an error.
+ * - @litBufCapacity is the size of the underlying buffer into which literals are written, starting at address @literals.
+ *   @litBufCapacity must be at least 8 bytes larger than @litSize.
+ * - @decompressedSize must be correct, and correspond to the sum of all Sequences. Any discrepancy will generate an error.
+ * @return : final compressed size, or a ZSTD error code.
+ */
+ZSTDLIB_STATIC_API size_t
+ZSTD_compressSequencesAndLiterals(ZSTD_CCtx* cctx,
+                                  void* dst, size_t dstCapacity,
+                            const ZSTD_Sequence* inSeqs, size_t nbSequences,
+                            const void* literals, size_t litSize, size_t litBufCapacity,
+                            size_t decompressedSize);


 /*! ZSTD_writeSkippableFrame() :
@ -1640,8 +1714,8 @@ ZSTD_compressSequences( ZSTD_CCtx* cctx, void* dst, size_t dstSize,
 *
 * Skippable frames begin with a 4-byte magic number. There are 16 possible choices of magic number,
 * ranging from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15.
- * As such, the parameter magicVariant controls the exact skippable frame magic number variant used, so
- * the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant.
+ * As such, the parameter magicVariant controls the exact skippable frame magic number variant used,
+ * so the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant.
 *
 * Returns an error if destination buffer is not large enough, if the source size is not representable
 * with a 4-byte unsigned int, or if the parameter magicVariant is greater than 15 (and therefore invalid).
@ -1649,26 +1723,28 @@ ZSTD_compressSequences( ZSTD_CCtx* cctx, void* dst, size_t dstSize,
 * @return : number of bytes written or a ZSTD error.
 */
 ZSTDLIB_STATIC_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity,
-                                            const void* src, size_t srcSize, unsigned magicVariant);
+                                             const void* src, size_t srcSize,
+                                                   unsigned magicVariant);

 /*! ZSTD_readSkippableFrame() :
- * Retrieves a zstd skippable frame containing data given by src, and writes it to dst buffer.
+ * Retrieves the content of a zstd skippable frame starting at @src, and writes it to @dst buffer.
 *
- * The parameter magicVariant will receive the magicVariant that was supplied when the frame was written,
- * i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START.  This can be NULL if the caller is not interested
- * in the magicVariant.
+ * The parameter @magicVariant will receive the magicVariant that was supplied when the frame was written,
+ * i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START.
+ * This can be NULL if the caller is not interested in the magicVariant.
 *
 * Returns an error if destination buffer is not large enough, or if the frame is not skippable.
 *
 * @return : number of bytes written or a ZSTD error.
 */
-ZSTDLIB_API size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, unsigned* magicVariant,
-                                            const void* src, size_t srcSize);
+ZSTDLIB_STATIC_API size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity,
+                                                  unsigned* magicVariant,
+                                                  const void* src, size_t srcSize);

 /*! ZSTD_isSkippableFrame() :
 *  Tells if the content of `buffer` starts with a valid Frame Identifier for a skippable frame.
 */
-ZSTDLIB_API unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size);
+ZSTDLIB_STATIC_API unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size);



@ -1796,7 +1872,15 @@ static
 #ifdef __GNUC__
 __attribute__((__unused__))
 #endif
+
+#if defined(__clang__) && __clang_major__ >= 5
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
+#endif
 ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL };  /**< this constant defers to stdlib's functions */
+#if defined(__clang__) && __clang_major__ >= 5
+#pragma clang diagnostic pop
+#endif

 ZSTDLIB_STATIC_API ZSTD_CCtx*    ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
 ZSTDLIB_STATIC_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
@ -1976,7 +2060,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
 * See the comments on that enum for an explanation of the feature. */
 #define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4

-/* Controlled with ZSTD_paramSwitch_e enum.
+/* Controlled with ZSTD_ParamSwitch_e enum.
 * Default is ZSTD_ps_auto.
 * Set to ZSTD_ps_disable to never compress literals.
 * Set to ZSTD_ps_enable to always compress literals. (Note: uncompressed literals
@ -2117,22 +2201,46 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
 /* ZSTD_c_validateSequences
 * Default is 0 == disabled. Set to 1 to enable sequence validation.
 *
- * For use with sequence compression API: ZSTD_compressSequences().
- * Designates whether or not we validate sequences provided to ZSTD_compressSequences()
+ * For use with sequence compression API: ZSTD_compressSequences*().
+ * Designates whether or not provided sequences are validated within ZSTD_compressSequences*()
 * during function execution.
 *
- * Without validation, providing a sequence that does not conform to the zstd spec will cause
- * undefined behavior, and may produce a corrupted block.
+ * When Sequence validation is disabled (default), Sequences are compressed as-is,
+ * so they must correct, otherwise it would result in a corruption error.
 *
- * With validation enabled, if sequence is invalid (see doc/zstd_compression_format.md for
+ * Sequence validation adds some protection, by ensuring that all values respect boundary conditions.
+ * If a Sequence is detected invalid (see doc/zstd_compression_format.md for
 * specifics regarding offset/matchlength requirements) then the function will bail out and
 * return an error.
- *
 */
 #define ZSTD_c_validateSequences ZSTD_c_experimentalParam12

-/* ZSTD_c_useBlockSplitter
- * Controlled with ZSTD_paramSwitch_e enum.
+/* ZSTD_c_blockSplitterLevel
+ * note: this parameter only influences the first splitter stage,
+ *       which is active before producing the sequences.
+ *       ZSTD_c_splitAfterSequences controls the next splitter stage,
+ *       which is active after sequence production.
+ *       Note that both can be combined.
+ * Allowed values are between 0 and ZSTD_BLOCKSPLITTER_LEVEL_MAX included.
+ * 0 means "auto", which will select a value depending on current ZSTD_c_strategy.
+ * 1 means no splitting.
+ * Then, values from 2 to 6 are sorted in increasing cpu load order.
+ *
+ * Note that currently the first block is never split,
+ * to ensure expansion guarantees in presence of incompressible data.
+ */
+#define ZSTD_BLOCKSPLITTER_LEVEL_MAX 6
+#define ZSTD_c_blockSplitterLevel ZSTD_c_experimentalParam20
+
+/* ZSTD_c_splitAfterSequences
+ * This is a stronger splitter algorithm,
+ * based on actual sequences previously produced by the selected parser.
+ * It's also slower, and as a consequence, mostly used for high compression levels.
+ * While the post-splitter does overlap with the pre-splitter,
+ * both can nonetheless be combined,
+ * notably with ZSTD_c_blockSplitterLevel at ZSTD_BLOCKSPLITTER_LEVEL_MAX,
+ * resulting in higher compression ratio than just one of them.
+ *
 * Default is ZSTD_ps_auto.
 * Set to ZSTD_ps_disable to never use block splitter.
 * Set to ZSTD_ps_enable to always use block splitter.
@ -2140,10 +2248,10 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
 * By default, in ZSTD_ps_auto, the library will decide at runtime whether to use
 * block splitting based on the compression parameters.
 */
-#define ZSTD_c_useBlockSplitter ZSTD_c_experimentalParam13
+#define ZSTD_c_splitAfterSequences ZSTD_c_experimentalParam13

 /* ZSTD_c_useRowMatchFinder
- * Controlled with ZSTD_paramSwitch_e enum.
+ * Controlled with ZSTD_ParamSwitch_e enum.
 * Default is ZSTD_ps_auto.
 * Set to ZSTD_ps_disable to never use row-based matchfinder.
 * Set to ZSTD_ps_enable to force usage of row-based matchfinder.
@ -2175,7 +2283,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
 #define ZSTD_c_deterministicRefPrefix ZSTD_c_experimentalParam15

 /* ZSTD_c_prefetchCDictTables
- * Controlled with ZSTD_paramSwitch_e enum. Default is ZSTD_ps_auto.
+ * Controlled with ZSTD_ParamSwitch_e enum. Default is ZSTD_ps_auto.
 *
 * In some situations, zstd uses CDict tables in-place rather than copying them
 * into the working context. (See docs on ZSTD_dictAttachPref_e above for details).
@ -2219,19 +2327,21 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
 * that overrides the default ZSTD_BLOCKSIZE_MAX. It cannot be used to set upper
 * bounds greater than ZSTD_BLOCKSIZE_MAX or bounds lower than 1KB (will make
 * compressBound() inaccurate). Only currently meant to be used for testing.
- *
 */
 #define ZSTD_c_maxBlockSize ZSTD_c_experimentalParam18

-/* ZSTD_c_searchForExternalRepcodes
- * This parameter affects how zstd parses external sequences, such as sequences
- * provided through the compressSequences() API or from an external block-level
- * sequence producer.
+/* ZSTD_c_repcodeResolution
+ * This parameter only has an effect if ZSTD_c_blockDelimiters is
+ * set to ZSTD_sf_explicitBlockDelimiters (may change in the future).
 *
- * If set to ZSTD_ps_enable, the library will check for repeated offsets in
+ * This parameter affects how zstd parses external sequences,
+ * provided via the ZSTD_compressSequences*() API
+ * or from an external block-level sequence producer.
+ *
+ * If set to ZSTD_ps_enable, the library will check for repeated offsets within
 * external sequences, even if those repcodes are not explicitly indicated in
 * the "rep" field. Note that this is the only way to exploit repcode matches
- * while using compressSequences() or an external sequence producer, since zstd
+ * while using compressSequences*() or an external sequence producer, since zstd
 * currently ignores the "rep" field of external sequences.
 *
 * If set to ZSTD_ps_disable, the library will not exploit repeated offsets in
@ -2240,12 +2350,11 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
 * compression ratio.
 *
 * The default value is ZSTD_ps_auto, for which the library will enable/disable
- * based on compression level.
- *
- * Note: for now, this param only has an effect if ZSTD_c_blockDelimiters is
- * set to ZSTD_sf_explicitBlockDelimiters. That may change in the future.
+ * based on compression level (currently: level<10 disables, level>=10 enables).
 */
-#define ZSTD_c_searchForExternalRepcodes ZSTD_c_experimentalParam19
+#define ZSTD_c_repcodeResolution ZSTD_c_experimentalParam19
+#define ZSTD_c_searchForExternalRepcodes ZSTD_c_experimentalParam19 /* older name */
+

 /*! ZSTD_CCtx_getParameter() :
 *  Get the requested compression parameter value, selected by enum ZSTD_cParameter,
@ -2952,7 +3061,7 @@ size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_
           >0 : `srcSize` is too small, please provide at least result bytes on next attempt.
           errorCode, which can be tested using ZSTD_isError().

-  It fills a ZSTD_frameHeader structure with important information to correctly decode the frame,
+  It fills a ZSTD_FrameHeader structure with important information to correctly decode the frame,
  such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`).
  Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information.
  As a consequence, check that values remain within valid application range.
@ -3082,8 +3191,8 @@ ZSTDLIB_STATIC_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_
 ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.")
 ZSTDLIB_STATIC_API size_t ZSTD_insertBlock    (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize);  /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */

-#endif   /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */
-
 #if defined (__cplusplus)
 }
 #endif
+
+#endif   /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */
--- a/thirdparty/zstd/zstd_errors.h
+++ b/thirdparty/zstd/zstd_errors.h
@ -15,10 +15,6 @@
 extern "C" {
 #endif

-/*===== dependency =====*/
-#include <stddef.h>   /* size_t */
-
-
 /* =====   ZSTDERRORLIB_API : control library symbols visibility   ===== */
 #ifndef ZSTDERRORLIB_VISIBLE
   /* Backwards compatibility with old macro name */
@ -80,6 +76,7 @@ typedef enum {
  ZSTD_error_tableLog_tooLarge       = 44,
  ZSTD_error_maxSymbolValue_tooLarge = 46,
  ZSTD_error_maxSymbolValue_tooSmall = 48,
+  ZSTD_error_cannotProduce_uncompressedBlock = 49,
  ZSTD_error_stabilityCondition_notRespected = 50,
  ZSTD_error_stage_wrong       = 60,
  ZSTD_error_init_missing      = 62,
@ -100,10 +97,6 @@ typedef enum {
  ZSTD_error_maxCode = 120  /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
 } ZSTD_ErrorCode;

-/*! ZSTD_getErrorCode() :
-    convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
-    which can be used to compare with enum list published above */
-ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
 ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code);   /**< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */