Rémi Verschelde
2023-05-22 14:32:14 +02:00
parent 65fa775ff6
commit 6100b4bd33
65 changed files with 5170 additions and 3249 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the

View File

@ -1,6 +1,6 @@
/* ******************************************************************
* FSE : Finite State Entropy encoder
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
@ -26,6 +26,7 @@
#define ZSTD_DEPS_NEED_MALLOC
#define ZSTD_DEPS_NEED_MATH64
#include "../common/zstd_deps.h" /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */
#include "../common/bits.h" /* ZSTD_highbit32 */
/* **************************************************************
@ -90,7 +91,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
assert(tableLog < 16); /* required for threshold strategy to work */
/* For explanations on how to distribute symbol values over the table :
* http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
* https://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
#ifdef __clang_analyzer__
ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */
@ -191,7 +192,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
break;
default :
assert(normalizedCounter[s] > 1);
{ U32 const maxBitsOut = tableLog - BIT_highbit32 ((U32)normalizedCounter[s]-1);
{ U32 const maxBitsOut = tableLog - ZSTD_highbit32 ((U32)normalizedCounter[s]-1);
U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut;
symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]);
@ -342,21 +343,11 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize,
* FSE Compression Code
****************************************************************/
FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
{
size_t size;
if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
return (FSE_CTable*)ZSTD_malloc(size);
}
void FSE_freeCTable (FSE_CTable* ct) { ZSTD_free(ct); }
/* provides the minimum logSize to safely represent a distribution */
static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
{
U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1;
U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
U32 minBitsSrc = ZSTD_highbit32((U32)(srcSize)) + 1;
U32 minBitsSymbols = ZSTD_highbit32(maxSymbolValue) + 2;
U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
assert(srcSize > 1); /* Not supported, RLE should be used instead */
return minBits;
@ -364,7 +355,7 @@ static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
{
U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
U32 maxBitsSrc = ZSTD_highbit32((U32)(srcSize - 1)) - minus;
U32 tableLog = maxTableLog;
U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
assert(srcSize > 1); /* Not supported, RLE should be used instead */
@ -532,40 +523,6 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
return tableLog;
}
/* fake FSE_CTable, for raw (uncompressed) input */
size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
{
const unsigned tableSize = 1 << nbBits;
const unsigned tableMask = tableSize - 1;
const unsigned maxSymbolValue = tableMask;
void* const ptr = ct;
U16* const tableU16 = ( (U16*) ptr) + 2;
void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1); /* assumption : tableLog >= 1 */
FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
unsigned s;
/* Sanity checks */
if (nbBits < 1) return ERROR(GENERIC); /* min size */
/* header */
tableU16[-2] = (U16) nbBits;
tableU16[-1] = (U16) maxSymbolValue;
/* Build table */
for (s=0; s<tableSize; s++)
tableU16[s] = (U16)(tableSize + s);
/* Build Symbol Transformation Table */
{ const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
for (s=0; s<=maxSymbolValue; s++) {
symbolTT[s].deltaNbBits = deltaNbBits;
symbolTT[s].deltaFindState = s-1;
} }
return 0;
}
/* fake FSE_CTable, for rle input (always same symbol) */
size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
{
@ -664,78 +621,4 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
#ifndef ZSTD_NO_UNUSED_FUNCTIONS
/* FSE_compress_wksp() :
* Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
* `wkspSize` size must be `(1<<tableLog)`.
*/
size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
{
BYTE* const ostart = (BYTE*) dst;
BYTE* op = ostart;
BYTE* const oend = ostart + dstSize;
unsigned count[FSE_MAX_SYMBOL_VALUE+1];
S16 norm[FSE_MAX_SYMBOL_VALUE+1];
FSE_CTable* CTable = (FSE_CTable*)workSpace;
size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue);
void* scratchBuffer = (void*)(CTable + CTableSize);
size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable));
/* init conditions */
if (wkspSize < FSE_COMPRESS_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge);
if (srcSize <= 1) return 0; /* Not compressible */
if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
/* Scan input and build symbol stats */
{ CHECK_V_F(maxCount, HIST_count_wksp(count, &maxSymbolValue, src, srcSize, scratchBuffer, scratchBufferSize) );
if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */
if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
}
tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue, /* useLowProbCount */ srcSize >= 2048) );
/* Write table description header */
{ CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
op += nc_err;
}
/* Compress */
CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) );
{ CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) );
if (cSize == 0) return 0; /* not enough space for compressed data */
op += cSize;
}
/* check compressibility */
if ( (size_t)(op-ostart) >= srcSize-1 ) return 0;
return op-ostart;
}
typedef struct {
FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
union {
U32 hist_wksp[HIST_WKSP_SIZE_U32];
BYTE scratchBuffer[1 << FSE_MAX_TABLELOG];
} workspace;
} fseWkspMax_t;
size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
{
fseWkspMax_t scratchBuffer;
DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_COMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
}
size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
}
#endif
#endif /* FSE_COMMONDEFS_ONLY */

View File

@ -1,7 +1,7 @@
/* ******************************************************************
* hist : Histogram functions
* part of Finite State Entropy project
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy

View File

@ -1,7 +1,7 @@
/* ******************************************************************
* hist : Histogram functions
* part of Finite State Entropy project
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy

View File

@ -1,6 +1,6 @@
/* ******************************************************************
* Huffman encoder, part of New Generation Entropy library
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
@ -29,9 +29,9 @@
#include "hist.h"
#define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */
#include "../common/fse.h" /* header compression */
#define HUF_STATIC_LINKING_ONLY
#include "../common/huf.h"
#include "../common/error_private.h"
#include "../common/bits.h" /* ZSTD_highbit32 */
/* **************************************************************
@ -42,13 +42,67 @@
/* **************************************************************
* Utils
* Required declarations
****************************************************************/
unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
typedef struct nodeElt_s {
U32 count;
U16 parent;
BYTE byte;
BYTE nbBits;
} nodeElt;
/* **************************************************************
* Debug Traces
****************************************************************/
#if DEBUGLEVEL >= 2
static size_t showU32(const U32* arr, size_t size)
{
return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
size_t u;
for (u=0; u<size; u++) {
RAWLOG(6, " %u", arr[u]); (void)arr;
}
RAWLOG(6, " \n");
return size;
}
static size_t HUF_getNbBits(HUF_CElt elt);
static size_t showCTableBits(const HUF_CElt* ctable, size_t size)
{
size_t u;
for (u=0; u<size; u++) {
RAWLOG(6, " %zu", HUF_getNbBits(ctable[u])); (void)ctable;
}
RAWLOG(6, " \n");
return size;
}
static size_t showHNodeSymbols(const nodeElt* hnode, size_t size)
{
size_t u;
for (u=0; u<size; u++) {
RAWLOG(6, " %u", hnode[u].byte); (void)hnode;
}
RAWLOG(6, " \n");
return size;
}
static size_t showHNodeBits(const nodeElt* hnode, size_t size)
{
size_t u;
for (u=0; u<size; u++) {
RAWLOG(6, " %u", hnode[u].nbBits); (void)hnode;
}
RAWLOG(6, " \n");
return size;
}
#endif
/* *******************************************************
* HUF : Huffman block compression
@ -89,7 +143,10 @@ typedef struct {
S16 norm[HUF_TABLELOG_MAX+1];
} HUF_CompressWeightsWksp;
static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightTable, size_t wtSize, void* workspace, size_t workspaceSize)
static size_t
HUF_compressWeights(void* dst, size_t dstSize,
const void* weightTable, size_t wtSize,
void* workspace, size_t workspaceSize)
{
BYTE* const ostart = (BYTE*) dst;
BYTE* op = ostart;
@ -140,7 +197,7 @@ static size_t HUF_getNbBitsFast(HUF_CElt elt)
static size_t HUF_getValue(HUF_CElt elt)
{
return elt & ~0xFF;
return elt & ~(size_t)0xFF;
}
static size_t HUF_getValueFast(HUF_CElt elt)
@ -178,6 +235,8 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
U32 n;
HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE >= sizeof(HUF_WriteCTableWksp));
/* check conditions */
if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
@ -207,16 +266,6 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
return ((maxSymbolValue+1)/2) + 1;
}
/*! HUF_writeCTable() :
`CTable` : Huffman tree to save, using huf representation.
@return : size of saved CTable */
size_t HUF_writeCTable (void* dst, size_t maxDstSize,
const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
{
HUF_WriteCTableWksp wksp;
return HUF_writeCTable_wksp(dst, maxDstSize, CTable, maxSymbolValue, huffLog, &wksp, sizeof(wksp));
}
size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)
{
@ -272,68 +321,64 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue)
{
const HUF_CElt* ct = CTable + 1;
const HUF_CElt* const ct = CTable + 1;
assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
return (U32)HUF_getNbBits(ct[symbolValue]);
}
typedef struct nodeElt_s {
U32 count;
U16 parent;
BYTE byte;
BYTE nbBits;
} nodeElt;
/**
* HUF_setMaxHeight():
* Enforces maxNbBits on the Huffman tree described in huffNode.
* Try to enforce @targetNbBits on the Huffman tree described in @huffNode.
*
* It sets all nodes with nbBits > maxNbBits to be maxNbBits. Then it adjusts
* the tree to so that it is a valid canonical Huffman tree.
* It attempts to convert all nodes with nbBits > @targetNbBits
* to employ @targetNbBits instead. Then it adjusts the tree
* so that it remains a valid canonical Huffman tree.
*
* @pre The sum of the ranks of each symbol == 2^largestBits,
* where largestBits == huffNode[lastNonNull].nbBits.
* @post The sum of the ranks of each symbol == 2^largestBits,
* where largestBits is the return value <= maxNbBits.
* where largestBits is the return value (expected <= targetNbBits).
*
* @param huffNode The Huffman tree modified in place to enforce maxNbBits.
* @param huffNode The Huffman tree modified in place to enforce targetNbBits.
* It's presumed sorted, from most frequent to rarest symbol.
* @param lastNonNull The symbol with the lowest count in the Huffman tree.
* @param maxNbBits The maximum allowed number of bits, which the Huffman tree
* @param targetNbBits The allowed number of bits, which the Huffman tree
* may not respect. After this function the Huffman tree will
* respect maxNbBits.
* @return The maximum number of bits of the Huffman tree after adjustment,
* necessarily no more than maxNbBits.
* respect targetNbBits.
* @return The maximum number of bits of the Huffman tree after adjustment.
*/
static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 targetNbBits)
{
const U32 largestBits = huffNode[lastNonNull].nbBits;
/* early exit : no elt > maxNbBits, so the tree is already valid. */
if (largestBits <= maxNbBits) return largestBits;
/* early exit : no elt > targetNbBits, so the tree is already valid. */
if (largestBits <= targetNbBits) return largestBits;
DEBUGLOG(5, "HUF_setMaxHeight (targetNbBits = %u)", targetNbBits);
/* there are several too large elements (at least >= 2) */
{ int totalCost = 0;
const U32 baseCost = 1 << (largestBits - maxNbBits);
const U32 baseCost = 1 << (largestBits - targetNbBits);
int n = (int)lastNonNull;
/* Adjust any ranks > maxNbBits to maxNbBits.
/* Adjust any ranks > targetNbBits to targetNbBits.
* Compute totalCost, which is how far the sum of the ranks is
* we are over 2^largestBits after adjust the offending ranks.
*/
while (huffNode[n].nbBits > maxNbBits) {
while (huffNode[n].nbBits > targetNbBits) {
totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
huffNode[n].nbBits = (BYTE)maxNbBits;
huffNode[n].nbBits = (BYTE)targetNbBits;
n--;
}
/* n stops at huffNode[n].nbBits <= maxNbBits */
assert(huffNode[n].nbBits <= maxNbBits);
/* n end at index of smallest symbol using < maxNbBits */
while (huffNode[n].nbBits == maxNbBits) --n;
/* n stops at huffNode[n].nbBits <= targetNbBits */
assert(huffNode[n].nbBits <= targetNbBits);
/* n end at index of smallest symbol using < targetNbBits */
while (huffNode[n].nbBits == targetNbBits) --n;
/* renorm totalCost from 2^largestBits to 2^maxNbBits
/* renorm totalCost from 2^largestBits to 2^targetNbBits
* note : totalCost is necessarily a multiple of baseCost */
assert((totalCost & (baseCost - 1)) == 0);
totalCost >>= (largestBits - maxNbBits);
assert(((U32)totalCost & (baseCost - 1)) == 0);
totalCost >>= (largestBits - targetNbBits);
assert(totalCost > 0);
/* repay normalized cost */
@ -342,19 +387,19 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
/* Get pos of last (smallest = lowest cum. count) symbol per rank */
ZSTD_memset(rankLast, 0xF0, sizeof(rankLast));
{ U32 currentNbBits = maxNbBits;
{ U32 currentNbBits = targetNbBits;
int pos;
for (pos=n ; pos >= 0; pos--) {
if (huffNode[pos].nbBits >= currentNbBits) continue;
currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */
rankLast[maxNbBits-currentNbBits] = (U32)pos;
currentNbBits = huffNode[pos].nbBits; /* < targetNbBits */
rankLast[targetNbBits-currentNbBits] = (U32)pos;
} }
while (totalCost > 0) {
/* Try to reduce the next power of 2 above totalCost because we
* gain back half the rank.
*/
U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1;
U32 nBitsToDecrease = ZSTD_highbit32((U32)totalCost) + 1;
for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
U32 const highPos = rankLast[nBitsToDecrease];
U32 const lowPos = rankLast[nBitsToDecrease-1];
@ -394,7 +439,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
rankLast[nBitsToDecrease] = noSymbol;
else {
rankLast[nBitsToDecrease]--;
if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
if (huffNode[rankLast[nBitsToDecrease]].nbBits != targetNbBits-nBitsToDecrease)
rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */
}
} /* while (totalCost > 0) */
@ -406,11 +451,11 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
* TODO.
*/
while (totalCost < 0) { /* Sometimes, cost correction overshoot */
/* special case : no rank 1 symbol (using maxNbBits-1);
* let's create one from largest rank 0 (using maxNbBits).
/* special case : no rank 1 symbol (using targetNbBits-1);
* let's create one from largest rank 0 (using targetNbBits).
*/
if (rankLast[1] == noSymbol) {
while (huffNode[n].nbBits == maxNbBits) n--;
while (huffNode[n].nbBits == targetNbBits) n--;
huffNode[n+1].nbBits--;
assert(n >= 0);
rankLast[1] = (U32)(n+1);
@ -424,7 +469,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
} /* repay normalized cost */
} /* there are several too large elements (at least >= 2) */
return maxNbBits;
return targetNbBits;
}
typedef struct {
@ -432,7 +477,7 @@ typedef struct {
U16 curr;
} rankPos;
typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
typedef nodeElt huffNodeTable[2 * (HUF_SYMBOLVALUE_MAX + 1)];
/* Number of buckets available for HUF_sort() */
#define RANK_POSITION_TABLE_SIZE 192
@ -451,8 +496,8 @@ typedef struct {
* Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing.
*/
#define RANK_POSITION_MAX_COUNT_LOG 32
#define RANK_POSITION_LOG_BUCKETS_BEGIN (RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */
#define RANK_POSITION_DISTINCT_COUNT_CUTOFF RANK_POSITION_LOG_BUCKETS_BEGIN + BIT_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */
#define RANK_POSITION_LOG_BUCKETS_BEGIN ((RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */)
#define RANK_POSITION_DISTINCT_COUNT_CUTOFF (RANK_POSITION_LOG_BUCKETS_BEGIN + ZSTD_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */)
/* Return the appropriate bucket index for a given count. See definition of
* RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy.
@ -460,7 +505,7 @@ typedef struct {
static U32 HUF_getIndex(U32 const count) {
return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF)
? count
: BIT_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN;
: ZSTD_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN;
}
/* Helper swap function for HUF_quickSortPartition() */
@ -583,7 +628,7 @@ static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSy
/* Sort each bucket. */
for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) {
U32 const bucketSize = rankPosition[n].curr-rankPosition[n].base;
int const bucketSize = rankPosition[n].curr - rankPosition[n].base;
U32 const bucketStartIdx = rankPosition[n].base;
if (bucketSize > 1) {
assert(bucketStartIdx < maxSymbolValue1);
@ -594,6 +639,7 @@ static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSy
assert(HUF_isSorted(huffNode, maxSymbolValue1));
}
/** HUF_buildCTable_wksp() :
* Same as HUF_buildCTable(), but using externally allocated scratch buffer.
* `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables).
@ -614,6 +660,7 @@ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
int lowS, lowN;
int nodeNb = STARTNODE;
int n, nodeRoot;
DEBUGLOG(5, "HUF_buildTree (alphabet size = %u)", maxSymbolValue + 1);
/* init for parents */
nonNullRank = (int)maxSymbolValue;
while(huffNode[nonNullRank].count == 0) nonNullRank--;
@ -640,6 +687,8 @@ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
for (n=0; n<=nonNullRank; n++)
huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
DEBUGLOG(6, "Initial distribution of bits completed (%zu sorted symbols)", showHNodeBits(huffNode, maxSymbolValue+1));
return nonNullRank;
}
@ -677,28 +726,36 @@ static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, i
CTable[0] = maxNbBits;
}
size_t HUF_buildCTable_wksp (HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
size_t
HUF_buildCTable_wksp(HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
void* workSpace, size_t wkspSize)
{
HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32));
HUF_buildCTable_wksp_tables* const wksp_tables =
(HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32));
nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
nodeElt* const huffNode = huffNode0+1;
int nonNullRank;
HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE == sizeof(HUF_buildCTable_wksp_tables));
DEBUGLOG(5, "HUF_buildCTable_wksp (alphabet size = %u)", maxSymbolValue+1);
/* safety checks */
if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
return ERROR(workSpace_tooSmall);
return ERROR(workSpace_tooSmall);
if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
return ERROR(maxSymbolValue_tooLarge);
return ERROR(maxSymbolValue_tooLarge);
ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable));
/* sort, decreasing order */
HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
DEBUGLOG(6, "sorted symbols completed (%zu symbols)", showHNodeSymbols(huffNode, maxSymbolValue+1));
/* build tree */
nonNullRank = HUF_buildTree(huffNode, maxSymbolValue);
/* enforce maxTableLog */
/* determine and enforce maxTableLog */
maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
@ -807,7 +864,7 @@ FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int id
#if DEBUGLEVEL >= 1
{
size_t const nbBits = HUF_getNbBits(elt);
size_t const dirtyBits = nbBits == 0 ? 0 : BIT_highbit32((U32)nbBits) + 1;
size_t const dirtyBits = nbBits == 0 ? 0 : ZSTD_highbit32((U32)nbBits) + 1;
(void)dirtyBits;
/* Middle bits are 0. */
assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0);
@ -887,7 +944,7 @@ static size_t HUF_closeCStream(HUF_CStream_t* bitC)
{
size_t const nbBits = bitC->bitPos[0] & 0xFF;
if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
return (bitC->ptr - bitC->startPtr) + (nbBits > 0);
return (size_t)(bitC->ptr - bitC->startPtr) + (nbBits > 0);
}
}
@ -1048,9 +1105,9 @@ HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
static size_t
HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
const void* src, size_t srcSize,
const HUF_CElt* CTable, const int bmi2)
const HUF_CElt* CTable, const int flags)
{
if (bmi2) {
if (flags & HUF_flags_bmi2) {
return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
}
return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
@ -1061,28 +1118,23 @@ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
static size_t
HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
const void* src, size_t srcSize,
const HUF_CElt* CTable, const int bmi2)
const HUF_CElt* CTable, const int flags)
{
(void)bmi2;
(void)flags;
return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
}
#endif
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)
{
return HUF_compress1X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
}
size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
{
return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags);
}
static size_t
HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
const void* src, size_t srcSize,
const HUF_CElt* CTable, int bmi2)
const HUF_CElt* CTable, int flags)
{
size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */
const BYTE* ip = (const BYTE*) src;
@ -1096,7 +1148,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
op += 6; /* jumpTable */
assert(op <= oend);
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
if (cSize == 0 || cSize > 65535) return 0;
MEM_writeLE16(ostart, (U16)cSize);
op += cSize;
@ -1104,7 +1156,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
ip += segmentSize;
assert(op <= oend);
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
if (cSize == 0 || cSize > 65535) return 0;
MEM_writeLE16(ostart+2, (U16)cSize);
op += cSize;
@ -1112,7 +1164,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
ip += segmentSize;
assert(op <= oend);
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
if (cSize == 0 || cSize > 65535) return 0;
MEM_writeLE16(ostart+4, (U16)cSize);
op += cSize;
@ -1121,7 +1173,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
ip += segmentSize;
assert(op <= oend);
assert(ip <= iend);
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) );
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, flags) );
if (cSize == 0 || cSize > 65535) return 0;
op += cSize;
}
@ -1129,14 +1181,9 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
return (size_t)(op-ostart);
}
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)
{
return HUF_compress4X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
}
size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
{
return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags);
}
typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
@ -1144,11 +1191,11 @@ typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
static size_t HUF_compressCTable_internal(
BYTE* const ostart, BYTE* op, BYTE* const oend,
const void* src, size_t srcSize,
HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2)
HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int flags)
{
size_t const cSize = (nbStreams==HUF_singleStream) ?
HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) :
HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2);
HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags) :
HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags);
if (HUF_isError(cSize)) { return cSize; }
if (cSize==0) { return 0; } /* uncompressible */
op += cSize;
@ -1171,6 +1218,79 @@ typedef struct {
#define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096
#define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */
unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue)
{
unsigned cardinality = 0;
unsigned i;
for (i = 0; i < maxSymbolValue + 1; i++) {
if (count[i] != 0) cardinality += 1;
}
return cardinality;
}
unsigned HUF_minTableLog(unsigned symbolCardinality)
{
U32 minBitsSymbols = ZSTD_highbit32(symbolCardinality) + 1;
return minBitsSymbols;
}
unsigned HUF_optimalTableLog(
unsigned maxTableLog,
size_t srcSize,
unsigned maxSymbolValue,
void* workSpace, size_t wkspSize,
HUF_CElt* table,
const unsigned* count,
int flags)
{
assert(srcSize > 1); /* Not supported, RLE should be used instead */
assert(wkspSize >= sizeof(HUF_buildCTable_wksp_tables));
if (!(flags & HUF_flags_optimalDepth)) {
/* cheap evaluation, based on FSE */
return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
}
{ BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp);
size_t dstSize = wkspSize - sizeof(HUF_WriteCTableWksp);
size_t maxBits, hSize, newSize;
const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue);
const unsigned minTableLog = HUF_minTableLog(symbolCardinality);
size_t optSize = ((size_t) ~0) - 1;
unsigned optLog = maxTableLog, optLogGuess;
DEBUGLOG(6, "HUF_optimalTableLog: probing huf depth (srcSize=%zu)", srcSize);
/* Search until size increases */
for (optLogGuess = minTableLog; optLogGuess <= maxTableLog; optLogGuess++) {
DEBUGLOG(7, "checking for huffLog=%u", optLogGuess);
maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize);
if (ERR_isError(maxBits)) continue;
if (maxBits < optLogGuess && optLogGuess > minTableLog) break;
hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize);
if (ERR_isError(hSize)) continue;
newSize = HUF_estimateCompressedSize(table, count, maxSymbolValue) + hSize;
if (newSize > optSize + 1) {
break;
}
if (newSize < optSize) {
optSize = newSize;
optLog = optLogGuess;
}
}
assert(optLog <= HUF_TABLELOG_MAX);
return optLog;
}
}
/* HUF_compress_internal() :
* `workSpace_align4` must be aligned on 4-bytes boundaries,
* and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */
@ -1180,14 +1300,14 @@ HUF_compress_internal (void* dst, size_t dstSize,
unsigned maxSymbolValue, unsigned huffLog,
HUF_nbStreams_e nbStreams,
void* workSpace, size_t wkspSize,
HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
const int bmi2, unsigned suspectUncompressible)
HUF_CElt* oldHufTable, HUF_repeat* repeat, int flags)
{
HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t));
BYTE* const ostart = (BYTE*)dst;
BYTE* const oend = ostart + dstSize;
BYTE* op = ostart;
DEBUGLOG(5, "HUF_compress_internal (srcSize=%zu)", srcSize);
HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE);
/* checks & inits */
@ -1201,16 +1321,17 @@ HUF_compress_internal (void* dst, size_t dstSize,
if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
/* Heuristic : If old table is valid, use it for small inputs */
if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
if ((flags & HUF_flags_preferRepeat) && repeat && *repeat == HUF_repeat_valid) {
return HUF_compressCTable_internal(ostart, op, oend,
src, srcSize,
nbStreams, oldHufTable, bmi2);
nbStreams, oldHufTable, flags);
}
/* If uncompressible data is suspected, do a smaller sampling first */
DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2);
if (suspectUncompressible && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) {
if ((flags & HUF_flags_suspectUncompressible) && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) {
size_t largestTotal = 0;
DEBUGLOG(5, "input suspected incompressible : sampling to check");
{ unsigned maxSymbolValueBegin = maxSymbolValue;
CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
largestTotal += largestBegin;
@ -1227,6 +1348,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */
}
DEBUGLOG(6, "histogram detail completed (%zu symbols)", showU32(table->count, maxSymbolValue+1));
/* Check validity of previous table */
if ( repeat
@ -1235,19 +1357,20 @@ HUF_compress_internal (void* dst, size_t dstSize,
*repeat = HUF_repeat_none;
}
/* Heuristic : use existing table for small inputs */
if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
if ((flags & HUF_flags_preferRepeat) && repeat && *repeat != HUF_repeat_none) {
return HUF_compressCTable_internal(ostart, op, oend,
src, srcSize,
nbStreams, oldHufTable, bmi2);
nbStreams, oldHufTable, flags);
}
/* Build Huffman Tree */
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, &table->wksps, sizeof(table->wksps), table->CTable, table->count, flags);
{ size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
maxSymbolValue, huffLog,
&table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
CHECK_F(maxBits);
huffLog = (U32)maxBits;
DEBUGLOG(6, "bit distribution completed (%zu symbols)", showCTableBits(table->CTable + 1, maxSymbolValue+1));
}
/* Zero unused symbols in CTable, so we can check it for validity */
{
@ -1266,7 +1389,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
return HUF_compressCTable_internal(ostart, op, oend,
src, srcSize,
nbStreams, oldHufTable, bmi2);
nbStreams, oldHufTable, flags);
} }
/* Use the new huffman table */
@ -1278,46 +1401,20 @@ HUF_compress_internal (void* dst, size_t dstSize,
}
return HUF_compressCTable_internal(ostart, op, oend,
src, srcSize,
nbStreams, table->CTable, bmi2);
}
size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize)
{
return HUF_compress_internal(dst, dstSize, src, srcSize,
maxSymbolValue, huffLog, HUF_singleStream,
workSpace, wkspSize,
NULL, NULL, 0, 0 /*bmi2*/, 0);
nbStreams, table->CTable, flags);
}
size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize,
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat,
int bmi2, unsigned suspectUncompressible)
HUF_CElt* hufTable, HUF_repeat* repeat, int flags)
{
DEBUGLOG(5, "HUF_compress1X_repeat (srcSize = %zu)", srcSize);
return HUF_compress_internal(dst, dstSize, src, srcSize,
maxSymbolValue, huffLog, HUF_singleStream,
workSpace, wkspSize, hufTable,
repeat, preferRepeat, bmi2, suspectUncompressible);
}
/* HUF_compress4X_repeat():
* compress input using 4 streams.
* provide workspace to generate compression tables */
size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize)
{
return HUF_compress_internal(dst, dstSize, src, srcSize,
maxSymbolValue, huffLog, HUF_fourStreams,
workSpace, wkspSize,
NULL, NULL, 0, 0 /*bmi2*/, 0);
repeat, flags);
}
/* HUF_compress4X_repeat():
@ -1328,43 +1425,11 @@ size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize,
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible)
HUF_CElt* hufTable, HUF_repeat* repeat, int flags)
{
DEBUGLOG(5, "HUF_compress4X_repeat (srcSize = %zu)", srcSize);
return HUF_compress_internal(dst, dstSize, src, srcSize,
maxSymbolValue, huffLog, HUF_fourStreams,
workSpace, wkspSize,
hufTable, repeat, preferRepeat, bmi2, suspectUncompressible);
hufTable, repeat, flags);
}
#ifndef ZSTD_NO_UNUSED_FUNCTIONS
/** HUF_buildCTable() :
* @return : maxNbBits
* Note : count is used before tree is written, so they can safely overlap
*/
size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits)
{
HUF_buildCTable_wksp_tables workspace;
return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, &workspace, sizeof(workspace));
}
size_t HUF_compress1X (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog)
{
U64 workSpace[HUF_WORKSPACE_SIZE_U64];
return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
}
size_t HUF_compress2 (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog)
{
U64 workSpace[HUF_WORKSPACE_SIZE_U64];
return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
}
size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
{
return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT);
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -23,6 +23,7 @@
#ifdef ZSTD_MULTITHREAD
# include "zstdmt_compress.h"
#endif
#include "../common/bits.h" /* ZSTD_highbit32, ZSTD_NbCommonBytes */
#if defined (__cplusplus)
extern "C" {
@ -117,12 +118,13 @@ typedef struct {
/** ZSTD_buildBlockEntropyStats() :
* Builds entropy for the block.
* @return : 0 on success or error code */
size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
const ZSTD_entropyCTables_t* prevEntropy,
ZSTD_entropyCTables_t* nextEntropy,
const ZSTD_CCtx_params* cctxParams,
ZSTD_entropyCTablesMetadata_t* entropyMetadata,
void* workspace, size_t wkspSize);
size_t ZSTD_buildBlockEntropyStats(
const seqStore_t* seqStorePtr,
const ZSTD_entropyCTables_t* prevEntropy,
ZSTD_entropyCTables_t* nextEntropy,
const ZSTD_CCtx_params* cctxParams,
ZSTD_entropyCTablesMetadata_t* entropyMetadata,
void* workspace, size_t wkspSize);
/*********************************
* Compression internals structs *
@ -148,6 +150,12 @@ typedef struct {
size_t capacity; /* The capacity starting from `seq` pointer */
} rawSeqStore_t;
typedef struct {
U32 idx; /* Index in array of ZSTD_Sequence */
U32 posInSequence; /* Position within sequence at idx */
size_t posInSrc; /* Number of bytes given by sequences provided so far */
} ZSTD_sequencePosition;
UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
typedef struct {
@ -218,8 +226,10 @@ struct ZSTD_matchState_t {
U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */
U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
U16* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */
BYTE* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */
U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
U64 hashSalt; /* For row-based matchFinder: salts the hash for re-use of tag table */
U32 hashSaltEntropy; /* For row-based matchFinder: collects entropy for salt generation */
U32* hashTable;
U32* hashTable3;
@ -234,6 +244,18 @@ struct ZSTD_matchState_t {
const ZSTD_matchState_t* dictMatchState;
ZSTD_compressionParameters cParams;
const rawSeqStore_t* ldmSeqStore;
/* Controls prefetching in some dictMatchState matchfinders.
* This behavior is controlled from the cctx ms.
* This parameter has no effect in the cdict ms. */
int prefetchCDictTables;
/* When == 0, lazy match finders insert every position.
* When != 0, lazy match finders only insert positions they search.
* This allows them to skip much faster over incompressible data,
* at a small cost to compression ratio.
*/
int lazySkipping;
};
typedef struct {
@ -330,6 +352,24 @@ struct ZSTD_CCtx_params_s {
/* Internal use, for createCCtxParams() and freeCCtxParams() only */
ZSTD_customMem customMem;
/* Controls prefetching in some dictMatchState matchfinders */
ZSTD_paramSwitch_e prefetchCDictTables;
/* Controls whether zstd will fall back to an internal matchfinder
* if the external matchfinder returns an error code. */
int enableMatchFinderFallback;
/* Indicates whether an external matchfinder has been referenced.
* Users can't set this externally.
* It is set internally in ZSTD_registerSequenceProducer(). */
int useSequenceProducer;
/* Adjust the max block size*/
size_t maxBlockSize;
/* Controls repcode search in external sequence parsing */
ZSTD_paramSwitch_e searchForExternalRepcodes;
}; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
#define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
@ -361,6 +401,14 @@ typedef struct {
ZSTD_entropyCTablesMetadata_t entropyMetadata;
} ZSTD_blockSplitCtx;
/* Context for block-level external matchfinder API */
typedef struct {
void* mState;
ZSTD_sequenceProducer_F* mFinder;
ZSTD_Sequence* seqBuffer;
size_t seqBufferCapacity;
} ZSTD_externalMatchCtx;
struct ZSTD_CCtx_s {
ZSTD_compressionStage_e stage;
int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
@ -410,6 +458,7 @@ struct ZSTD_CCtx_s {
/* Stable in/out buffer verification */
ZSTD_inBuffer expectedInBuffer;
size_t stableIn_notConsumed; /* nb bytes within stable input buffer that are said to be consumed but are not */
size_t expectedOutBufferSize;
/* Dictionary */
@ -429,9 +478,13 @@ struct ZSTD_CCtx_s {
/* Workspace for block splitter */
ZSTD_blockSplitCtx blockSplitCtx;
/* Workspace for external matchfinder */
ZSTD_externalMatchCtx externalMatchCtx;
};
typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
typedef enum { ZSTD_tfp_forCCtx, ZSTD_tfp_forCDict } ZSTD_tableFillPurpose_e;
typedef enum {
ZSTD_noDict = 0,
@ -453,7 +506,7 @@ typedef enum {
* In this mode we take both the source size and the dictionary size
* into account when selecting and adjusting the parameters.
*/
ZSTD_cpm_unknown = 3, /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
ZSTD_cpm_unknown = 3 /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
* We don't know what these parameters are for. We default to the legacy
* behavior of taking both the source size and the dict size into account
* when selecting and adjusting parameters.
@ -512,9 +565,11 @@ MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
/* ZSTD_noCompressBlock() :
* Writes uncompressed block to dst buffer from given src.
* Returns the size of the block */
MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
MEM_STATIC size_t
ZSTD_noCompressBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
{
U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
DEBUGLOG(5, "ZSTD_noCompressBlock (srcSize=%zu, dstCapacity=%zu)", srcSize, dstCapacity);
RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
dstSize_tooSmall, "dst buf too small for uncompressed block");
MEM_writeLE24(dst, cBlockHeader24);
@ -522,7 +577,8 @@ MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const voi
return ZSTD_blockHeaderSize + srcSize;
}
MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
MEM_STATIC size_t
ZSTD_rleCompressBlock(void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
{
BYTE* const op = (BYTE*)dst;
U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
@ -541,7 +597,7 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
{
U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, (int)strat));
return (srcSize >> minlog) + 2;
}
@ -577,29 +633,27 @@ ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE con
while (ip < iend) *op++ = *ip++;
}
#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1)
#define STORE_REPCODE_1 STORE_REPCODE(1)
#define STORE_REPCODE_2 STORE_REPCODE(2)
#define STORE_REPCODE_3 STORE_REPCODE(3)
#define STORE_REPCODE(r) (assert((r)>=1), assert((r)<=3), (r)-1)
#define STORE_OFFSET(o) (assert((o)>0), o + ZSTD_REP_MOVE)
#define STORED_IS_OFFSET(o) ((o) > ZSTD_REP_MOVE)
#define STORED_IS_REPCODE(o) ((o) <= ZSTD_REP_MOVE)
#define STORED_OFFSET(o) (assert(STORED_IS_OFFSET(o)), (o)-ZSTD_REP_MOVE)
#define STORED_REPCODE(o) (assert(STORED_IS_REPCODE(o)), (o)+1) /* returns ID 1,2,3 */
#define STORED_TO_OFFBASE(o) ((o)+1)
#define OFFBASE_TO_STORED(o) ((o)-1)
#define REPCODE1_TO_OFFBASE REPCODE_TO_OFFBASE(1)
#define REPCODE2_TO_OFFBASE REPCODE_TO_OFFBASE(2)
#define REPCODE3_TO_OFFBASE REPCODE_TO_OFFBASE(3)
#define REPCODE_TO_OFFBASE(r) (assert((r)>=1), assert((r)<=ZSTD_REP_NUM), (r)) /* accepts IDs 1,2,3 */
#define OFFSET_TO_OFFBASE(o) (assert((o)>0), o + ZSTD_REP_NUM)
#define OFFBASE_IS_OFFSET(o) ((o) > ZSTD_REP_NUM)
#define OFFBASE_IS_REPCODE(o) ( 1 <= (o) && (o) <= ZSTD_REP_NUM)
#define OFFBASE_TO_OFFSET(o) (assert(OFFBASE_IS_OFFSET(o)), (o) - ZSTD_REP_NUM)
#define OFFBASE_TO_REPCODE(o) (assert(OFFBASE_IS_REPCODE(o)), (o)) /* returns ID 1,2,3 */
/*! ZSTD_storeSeq() :
* Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t.
* @offBase_minus1 : Users should use employ macros STORE_REPCODE_X and STORE_OFFSET().
* Store a sequence (litlen, litPtr, offBase and matchLength) into seqStore_t.
* @offBase : Users should employ macros REPCODE_TO_OFFBASE() and OFFSET_TO_OFFBASE().
* @matchLength : must be >= MINMATCH
* Allowed to overread literals up to litLimit.
* Allowed to over-read literals up to litLimit.
*/
HINT_INLINE UNUSED_ATTR void
ZSTD_storeSeq(seqStore_t* seqStorePtr,
size_t litLength, const BYTE* literals, const BYTE* litLimit,
U32 offBase_minus1,
U32 offBase,
size_t matchLength)
{
BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
@ -608,8 +662,8 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr,
static const BYTE* g_start = NULL;
if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */
{ U32 const pos = (U32)((const BYTE*)literals - g_start);
DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
pos, (U32)litLength, (U32)matchLength, (U32)offBase_minus1);
DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offBase%7u",
pos, (U32)litLength, (U32)matchLength, (U32)offBase);
}
#endif
assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
@ -619,9 +673,9 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr,
assert(literals + litLength <= litLimit);
if (litEnd <= litLimit_w) {
/* Common case we can use wildcopy.
* First copy 16 bytes, because literals are likely short.
*/
assert(WILDCOPY_OVERLENGTH >= 16);
* First copy 16 bytes, because literals are likely short.
*/
ZSTD_STATIC_ASSERT(WILDCOPY_OVERLENGTH >= 16);
ZSTD_copy16(seqStorePtr->lit, literals);
if (litLength > 16) {
ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
@ -640,7 +694,7 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr,
seqStorePtr->sequences[0].litLength = (U16)litLength;
/* match offset */
seqStorePtr->sequences[0].offBase = STORED_TO_OFFBASE(offBase_minus1);
seqStorePtr->sequences[0].offBase = offBase;
/* match Length */
assert(matchLength >= MINMATCH);
@ -658,17 +712,17 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr,
/* ZSTD_updateRep() :
* updates in-place @rep (array of repeat offsets)
* @offBase_minus1 : sum-type, with same numeric representation as ZSTD_storeSeq()
* @offBase : sum-type, using numeric representation of ZSTD_storeSeq()
*/
MEM_STATIC void
ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0)
{
if (STORED_IS_OFFSET(offBase_minus1)) { /* full offset */
if (OFFBASE_IS_OFFSET(offBase)) { /* full offset */
rep[2] = rep[1];
rep[1] = rep[0];
rep[0] = STORED_OFFSET(offBase_minus1);
rep[0] = OFFBASE_TO_OFFSET(offBase);
} else { /* repcode */
U32 const repCode = STORED_REPCODE(offBase_minus1) - 1 + ll0;
U32 const repCode = OFFBASE_TO_REPCODE(offBase) - 1 + ll0;
if (repCode > 0) { /* note : if repCode==0, no change */
U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
rep[2] = (repCode >= 2) ? rep[1] : rep[2];
@ -685,11 +739,11 @@ typedef struct repcodes_s {
} repcodes_t;
MEM_STATIC repcodes_t
ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0)
{
repcodes_t newReps;
ZSTD_memcpy(&newReps, rep, sizeof(newReps));
ZSTD_updateRep(newReps.rep, offBase_minus1, ll0);
ZSTD_updateRep(newReps.rep, offBase, ll0);
return newReps;
}
@ -697,103 +751,6 @@ ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0
/*-*************************************
* Match length counter
***************************************/
static unsigned ZSTD_NbCommonBytes (size_t val)
{
if (MEM_isLittleEndian()) {
if (MEM_64bits()) {
# if defined(_MSC_VER) && defined(_WIN64)
# if STATIC_BMI2
return _tzcnt_u64(val) >> 3;
# else
if (val != 0) {
unsigned long r;
_BitScanForward64(&r, (U64)val);
return (unsigned)(r >> 3);
} else {
/* Should not reach this code path */
__assume(0);
}
# endif
# elif defined(__GNUC__) && (__GNUC__ >= 4)
return (__builtin_ctzll((U64)val) >> 3);
# else
static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
0, 3, 1, 3, 1, 4, 2, 7,
0, 2, 3, 6, 1, 5, 3, 5,
1, 3, 4, 4, 2, 5, 6, 7,
7, 0, 1, 2, 3, 3, 4, 6,
2, 6, 5, 5, 3, 4, 5, 6,
7, 1, 2, 4, 6, 4, 4, 5,
7, 2, 6, 5, 7, 6, 7, 7 };
return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
# endif
} else { /* 32 bits */
# if defined(_MSC_VER)
if (val != 0) {
unsigned long r;
_BitScanForward(&r, (U32)val);
return (unsigned)(r >> 3);
} else {
/* Should not reach this code path */
__assume(0);
}
# elif defined(__GNUC__) && (__GNUC__ >= 3)
return (__builtin_ctz((U32)val) >> 3);
# else
static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
3, 2, 2, 1, 3, 2, 0, 1,
3, 3, 1, 2, 2, 2, 2, 0,
3, 1, 2, 0, 1, 0, 1, 1 };
return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
# endif
}
} else { /* Big Endian CPU */
if (MEM_64bits()) {
# if defined(_MSC_VER) && defined(_WIN64)
# if STATIC_BMI2
return _lzcnt_u64(val) >> 3;
# else
if (val != 0) {
unsigned long r;
_BitScanReverse64(&r, (U64)val);
return (unsigned)(r >> 3);
} else {
/* Should not reach this code path */
__assume(0);
}
# endif
# elif defined(__GNUC__) && (__GNUC__ >= 4)
return (__builtin_clzll(val) >> 3);
# else
unsigned r;
const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */
if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
r += (!val);
return r;
# endif
} else { /* 32 bits */
# if defined(_MSC_VER)
if (val != 0) {
unsigned long r;
_BitScanReverse(&r, (unsigned long)val);
return (unsigned)(r >> 3);
} else {
/* Should not reach this code path */
__assume(0);
}
# elif defined(__GNUC__) && (__GNUC__ >= 3)
return (__builtin_clz((U32)val) >> 3);
# else
unsigned r;
if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
r += (!val);
return r;
# endif
} }
}
MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
{
const BYTE* const pStart = pIn;
@ -839,32 +796,43 @@ ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
* Hashes
***************************************/
static const U32 prime3bytes = 506832829U;
static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes) >> (32-h) ; }
MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
static U32 ZSTD_hash3(U32 u, U32 h, U32 s) { assert(h <= 32); return (((u << (32-24)) * prime3bytes) ^ s) >> (32-h) ; }
MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h, 0); } /* only in zstd_opt.h */
MEM_STATIC size_t ZSTD_hash3PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash3(MEM_readLE32(ptr), h, s); }
static const U32 prime4bytes = 2654435761U;
static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
static U32 ZSTD_hash4(U32 u, U32 h, U32 s) { assert(h <= 32); return ((u * prime4bytes) ^ s) >> (32-h) ; }
static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_readLE32(ptr), h, 0); }
static size_t ZSTD_hash4PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash4(MEM_readLE32(ptr), h, s); }
static const U64 prime5bytes = 889523592379ULL;
static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; }
static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
static size_t ZSTD_hash5(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-40)) * prime5bytes) ^ s) >> (64-h)) ; }
static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h, 0); }
static size_t ZSTD_hash5PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash5(MEM_readLE64(p), h, s); }
static const U64 prime6bytes = 227718039650203ULL;
static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
static size_t ZSTD_hash6(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-48)) * prime6bytes) ^ s) >> (64-h)) ; }
static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h, 0); }
static size_t ZSTD_hash6PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash6(MEM_readLE64(p), h, s); }
static const U64 prime7bytes = 58295818150454627ULL;
static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; }
static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
static size_t ZSTD_hash7(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-56)) * prime7bytes) ^ s) >> (64-h)) ; }
static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h, 0); }
static size_t ZSTD_hash7PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash7(MEM_readLE64(p), h, s); }
static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
static size_t ZSTD_hash8(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u) * prime8bytes) ^ s) >> (64-h)) ; }
static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h, 0); }
static size_t ZSTD_hash8PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash8(MEM_readLE64(p), h, s); }
MEM_STATIC FORCE_INLINE_ATTR
size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
{
/* Although some of these hashes do support hBits up to 64, some do not.
* To be on the safe side, always avoid hBits > 32. */
assert(hBits <= 32);
switch(mls)
{
default:
@ -876,6 +844,24 @@ size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
}
}
MEM_STATIC FORCE_INLINE_ATTR
size_t ZSTD_hashPtrSalted(const void* p, U32 hBits, U32 mls, const U64 hashSalt) {
/* Although some of these hashes do support hBits up to 64, some do not.
* To be on the safe side, always avoid hBits > 32. */
assert(hBits <= 32);
switch(mls)
{
default:
case 4: return ZSTD_hash4PtrS(p, hBits, (U32)hashSalt);
case 5: return ZSTD_hash5PtrS(p, hBits, hashSalt);
case 6: return ZSTD_hash6PtrS(p, hBits, hashSalt);
case 7: return ZSTD_hash7PtrS(p, hBits, hashSalt);
case 8: return ZSTD_hash8PtrS(p, hBits, hashSalt);
}
}
/** ZSTD_ipow() :
* Return base^exponent.
*/
@ -1223,10 +1209,15 @@ ZSTD_checkDictValidity(const ZSTD_window_t* window,
(unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
assert(blockEndIdx >= loadedDictEnd);
if (blockEndIdx > loadedDictEnd + maxDist) {
if (blockEndIdx > loadedDictEnd + maxDist || loadedDictEnd != window->dictLimit) {
/* On reaching window size, dictionaries are invalidated.
* For simplification, if window size is reached anywhere within next block,
* the dictionary is invalidated for the full block.
*
* We also have to invalidate the dictionary if ZSTD_window_update() has detected
* non-contiguous segments, which means that loadedDictEnd != window->dictLimit.
* loadedDictEnd may be 0, if forceWindow is true, but in that case we never use
* dictMatchState, so setting it to NULL is not a problem.
*/
DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
*loadedDictEndPtr = 0;
@ -1358,6 +1349,42 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
#endif
/* Short Cache */
/* Normally, zstd matchfinders follow this flow:
* 1. Compute hash at ip
* 2. Load index from hashTable[hash]
* 3. Check if *ip == *(base + index)
* In dictionary compression, loading *(base + index) is often an L2 or even L3 miss.
*
* Short cache is an optimization which allows us to avoid step 3 most of the time
* when the data doesn't actually match. With short cache, the flow becomes:
* 1. Compute (hash, currentTag) at ip. currentTag is an 8-bit independent hash at ip.
* 2. Load (index, matchTag) from hashTable[hash]. See ZSTD_writeTaggedIndex to understand how this works.
* 3. Only if currentTag == matchTag, check *ip == *(base + index). Otherwise, continue.
*
* Currently, short cache is only implemented in CDict hashtables. Thus, its use is limited to
* dictMatchState matchfinders.
*/
#define ZSTD_SHORT_CACHE_TAG_BITS 8
#define ZSTD_SHORT_CACHE_TAG_MASK ((1u << ZSTD_SHORT_CACHE_TAG_BITS) - 1)
/* Helper function for ZSTD_fillHashTable and ZSTD_fillDoubleHashTable.
* Unpacks hashAndTag into (hash, tag), then packs (index, tag) into hashTable[hash]. */
MEM_STATIC void ZSTD_writeTaggedIndex(U32* const hashTable, size_t hashAndTag, U32 index) {
size_t const hash = hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS;
U32 const tag = (U32)(hashAndTag & ZSTD_SHORT_CACHE_TAG_MASK);
assert(index >> (32 - ZSTD_SHORT_CACHE_TAG_BITS) == 0);
hashTable[hash] = (index << ZSTD_SHORT_CACHE_TAG_BITS) | tag;
}
/* Helper function for short cache matchfinders.
* Unpacks tag1 and tag2 from lower bits of packedTag1 and packedTag2, then checks if the tags match. */
MEM_STATIC int ZSTD_comparePackedTags(size_t packedTag1, size_t packedTag2) {
U32 const tag1 = packedTag1 & ZSTD_SHORT_CACHE_TAG_MASK;
U32 const tag2 = packedTag2 & ZSTD_SHORT_CACHE_TAG_MASK;
return tag1 == tag2;
}
#if defined (__cplusplus)
}
@ -1455,4 +1482,51 @@ U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
*/
void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);
/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of
* ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter.
* Note that the block delimiter must include the last literals of the block.
*/
size_t
ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
ZSTD_sequencePosition* seqPos,
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
/* Returns the number of bytes to move the current read position back by.
* Only non-zero if we ended up splitting a sequence.
* Otherwise, it may return a ZSTD error if something went wrong.
*
* This function will attempt to scan through blockSize bytes
* represented by the sequences in @inSeqs,
* storing any (partial) sequences.
*
* Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to
* avoid splitting a match, or to avoid splitting a match such that it would produce a match
* smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block.
*/
size_t
ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
/* ===============================================================
* Deprecated definitions that are still used internally to avoid
* deprecation warnings. These functions are exactly equivalent to
* their public variants, but avoid the deprecation warnings.
* =============================================================== */
size_t ZSTD_compressBegin_usingCDict_deprecated(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
size_t ZSTD_compressContinue_public(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize);
size_t ZSTD_compressEnd_public(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize);
size_t ZSTD_compressBlock_deprecated(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
#endif /* ZSTD_COMPRESS_H */

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -13,11 +13,36 @@
***************************************/
#include "zstd_compress_literals.h"
/* **************************************************************
* Debug Traces
****************************************************************/
#if DEBUGLEVEL >= 2
static size_t showHexa(const void* src, size_t srcSize)
{
const BYTE* const ip = (const BYTE*)src;
size_t u;
for (u=0; u<srcSize; u++) {
RAWLOG(5, " %02X", ip[u]); (void)ip;
}
RAWLOG(5, " \n");
return srcSize;
}
#endif
/* **************************************************************
* Literals compression - special cases
****************************************************************/
size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
BYTE* const ostart = (BYTE*)dst;
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
DEBUGLOG(5, "ZSTD_noCompressLiterals: srcSize=%zu, dstCapacity=%zu", srcSize, dstCapacity);
RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, "");
switch(flSize)
@ -36,16 +61,30 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src,
}
ZSTD_memcpy(ostart + flSize, src, srcSize);
DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
DEBUGLOG(5, "Raw (uncompressed) literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
return srcSize + flSize;
}
static int allBytesIdentical(const void* src, size_t srcSize)
{
assert(srcSize >= 1);
assert(src != NULL);
{ const BYTE b = ((const BYTE*)src)[0];
size_t p;
for (p=1; p<srcSize; p++) {
if (((const BYTE*)src)[p] != b) return 0;
}
return 1;
}
}
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
BYTE* const ostart = (BYTE*)dst;
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
(void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */
assert(dstCapacity >= 4); (void)dstCapacity;
assert(allBytesIdentical(src, srcSize));
switch(flSize)
{
@ -63,28 +102,51 @@ size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void*
}
ostart[flSize] = *(const BYTE*)src;
DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1);
DEBUGLOG(5, "RLE : Repeated Literal (%02X: %u times) -> %u bytes encoded", ((const BYTE*)src)[0], (U32)srcSize, (U32)flSize + 1);
return flSize+1;
}
size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
ZSTD_hufCTables_t* nextHuf,
ZSTD_strategy strategy, int disableLiteralCompression,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
void* entropyWorkspace, size_t entropyWorkspaceSize,
const int bmi2,
unsigned suspectUncompressible)
/* ZSTD_minLiteralsToCompress() :
* returns minimal amount of literals
* for literal compression to even be attempted.
* Minimum is made tighter as compression strategy increases.
*/
static size_t
ZSTD_minLiteralsToCompress(ZSTD_strategy strategy, HUF_repeat huf_repeat)
{
assert((int)strategy >= 0);
assert((int)strategy <= 9);
/* btultra2 : min 8 bytes;
* then 2x larger for each successive compression strategy
* max threshold 64 bytes */
{ int const shift = MIN(9-(int)strategy, 3);
size_t const mintc = (huf_repeat == HUF_repeat_valid) ? 6 : (size_t)8 << shift;
DEBUGLOG(7, "minLiteralsToCompress = %zu", mintc);
return mintc;
}
}
size_t ZSTD_compressLiterals (
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
void* entropyWorkspace, size_t entropyWorkspaceSize,
const ZSTD_hufCTables_t* prevHuf,
ZSTD_hufCTables_t* nextHuf,
ZSTD_strategy strategy,
int disableLiteralCompression,
int suspectUncompressible,
int bmi2)
{
size_t const minGain = ZSTD_minGain(srcSize, strategy);
size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
BYTE* const ostart = (BYTE*)dst;
U32 singleStream = srcSize < 256;
symbolEncodingType_e hType = set_compressed;
size_t cLitSize;
DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)",
disableLiteralCompression, (U32)srcSize);
DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i, srcSize=%u, dstCapacity=%zu)",
disableLiteralCompression, (U32)srcSize, dstCapacity);
DEBUGLOG(6, "Completed literals listing (%zu bytes)", showHexa(src, srcSize));
/* Prepare nextEntropy assuming reusing the existing table */
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
@ -92,40 +154,51 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
if (disableLiteralCompression)
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
/* small ? don't even attempt compression (speed opt) */
# define COMPRESS_LITERALS_SIZE_MIN 63
{ size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
}
/* if too small, don't even attempt compression (speed opt) */
if (srcSize < ZSTD_minLiteralsToCompress(strategy, prevHuf->repeatMode))
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
{ HUF_repeat repeat = prevHuf->repeatMode;
int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
int const flags = 0
| (bmi2 ? HUF_flags_bmi2 : 0)
| (strategy < ZSTD_lazy && srcSize <= 1024 ? HUF_flags_preferRepeat : 0)
| (strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD ? HUF_flags_optimalDepth : 0)
| (suspectUncompressible ? HUF_flags_suspectUncompressible : 0);
typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int);
huf_compress_f huf_compress;
if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
cLitSize = singleStream ?
HUF_compress1X_repeat(
ostart+lhSize, dstCapacity-lhSize, src, srcSize,
HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible) :
HUF_compress4X_repeat(
ostart+lhSize, dstCapacity-lhSize, src, srcSize,
HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible);
huf_compress = singleStream ? HUF_compress1X_repeat : HUF_compress4X_repeat;
cLitSize = huf_compress(ostart+lhSize, dstCapacity-lhSize,
src, srcSize,
HUF_SYMBOLVALUE_MAX, LitHufLog,
entropyWorkspace, entropyWorkspaceSize,
(HUF_CElt*)nextHuf->CTable,
&repeat, flags);
DEBUGLOG(5, "%zu literals compressed into %zu bytes (before header)", srcSize, cLitSize);
if (repeat != HUF_repeat_none) {
/* reused the existing table */
DEBUGLOG(5, "Reusing previous huffman table");
DEBUGLOG(5, "reusing statistics from previous huffman block");
hType = set_repeat;
}
}
if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
}
{ size_t const minGain = ZSTD_minGain(srcSize, strategy);
if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
} }
if (cLitSize==1) {
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
}
/* A return value of 1 signals that the alphabet consists of a single symbol.
* However, in some rare circumstances, it could be the compressed size (a single byte).
* For that outcome to have a chance to happen, it's necessary that `srcSize < 8`.
* (it's also necessary to not generate statistics).
* Therefore, in such a case, actively check that all bytes are identical. */
if ((srcSize >= 8) || allBytesIdentical(src, srcSize)) {
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
} }
if (hType == set_compressed) {
/* using a newly constructed table */
@ -136,16 +209,19 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
switch(lhSize)
{
case 3: /* 2 - 2 - 10 - 10 */
{ U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
if (!singleStream) assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
{ U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
MEM_writeLE24(ostart, lhc);
break;
}
case 4: /* 2 - 2 - 14 - 14 */
assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
{ U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
MEM_writeLE32(ostart, lhc);
break;
}
case 5: /* 2 - 2 - 18 - 18 */
assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
{ U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
MEM_writeLE32(ostart, lhc);
ostart[4] = (BYTE)(cLitSize >> 10);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -16,16 +16,24 @@
size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
/* ZSTD_compressRleLiteralsBlock() :
* Conditions :
* - All bytes in @src are identical
* - dstCapacity >= 4 */
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
/* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
ZSTD_hufCTables_t* nextHuf,
ZSTD_strategy strategy, int disableLiteralCompression,
void* dst, size_t dstCapacity,
/* ZSTD_compressLiterals():
* @entropyWorkspace: must be aligned on 4-bytes boundaries
* @entropyWorkspaceSize : must be >= HUF_WORKSPACE_SIZE
* @suspectUncompressible: sampling checks, to potentially skip huffman coding
*/
size_t ZSTD_compressLiterals (void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
void* entropyWorkspace, size_t entropyWorkspaceSize,
const int bmi2,
unsigned suspectUncompressible);
const ZSTD_hufCTables_t* prevHuf,
ZSTD_hufCTables_t* nextHuf,
ZSTD_strategy strategy, int disableLiteralCompression,
int suspectUncompressible,
int bmi2);
#endif /* ZSTD_COMPRESS_LITERALS_H */

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -58,7 +58,7 @@ static unsigned ZSTD_useLowProbCount(size_t const nbSeq)
{
/* Heuristic: This should cover most blocks <= 16K and
* start to fade out after 16K to about 32K depending on
* comprssibility.
* compressibility.
*/
return nbSeq >= 2048;
}
@ -166,7 +166,7 @@ ZSTD_selectEncodingType(
if (mostFrequent == nbSeq) {
*repeatMode = FSE_repeat_none;
if (isDefaultAllowed && nbSeq <= 2) {
/* Prefer set_basic over set_rle when there are 2 or less symbols,
/* Prefer set_basic over set_rle when there are 2 or fewer symbols,
* since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
* If basic encoding isn't possible, always choose RLE.
*/

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -36,13 +36,14 @@
* If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block
* and the following sub-blocks' literals sections will be Treeless_Literals_Block.
* @return : compressed size of literals section of a sub-block
* Or 0 if it unable to compress.
* Or 0 if unable to compress.
* Or error code */
static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
const ZSTD_hufCTablesMetadata_t* hufMetadata,
const BYTE* literals, size_t litSize,
void* dst, size_t dstSize,
const int bmi2, int writeEntropy, int* entropyWritten)
static size_t
ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
const ZSTD_hufCTablesMetadata_t* hufMetadata,
const BYTE* literals, size_t litSize,
void* dst, size_t dstSize,
const int bmi2, int writeEntropy, int* entropyWritten)
{
size_t const header = writeEntropy ? 200 : 0;
size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header));
@ -53,8 +54,6 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat;
size_t cLitSize = 0;
(void)bmi2; /* TODO bmi2... */
DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy);
*entropyWritten = 0;
@ -76,9 +75,9 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize);
}
/* TODO bmi2 */
{ const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable)
: HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable);
{ int const flags = bmi2 ? HUF_flags_bmi2 : 0;
const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable, flags)
: HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable, flags);
op += cSize;
cLitSize += cSize;
if (cSize == 0 || ERR_isError(cSize)) {
@ -126,7 +125,11 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
return op-ostart;
}
static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) {
static size_t
ZSTD_seqDecompressedSize(seqStore_t const* seqStore,
const seqDef* sequences, size_t nbSeq,
size_t litSize, int lastSequence)
{
const seqDef* const sstart = sequences;
const seqDef* const send = sequences + nbSeq;
const seqDef* sp = sstart;
@ -156,13 +159,14 @@ static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef*
* @return : compressed size of sequences section of a sub-block
* Or 0 if it is unable to compress
* Or error code. */
static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
const ZSTD_fseCTablesMetadata_t* fseMetadata,
const seqDef* sequences, size_t nbSeq,
const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
const ZSTD_CCtx_params* cctxParams,
void* dst, size_t dstCapacity,
const int bmi2, int writeEntropy, int* entropyWritten)
static size_t
ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
const ZSTD_fseCTablesMetadata_t* fseMetadata,
const seqDef* sequences, size_t nbSeq,
const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
const ZSTD_CCtx_params* cctxParams,
void* dst, size_t dstCapacity,
const int bmi2, int writeEntropy, int* entropyWritten)
{
const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
BYTE* const ostart = (BYTE*)dst;
@ -539,7 +543,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
repcodes_t rep;
ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
for (seq = sstart; seq < sp; ++seq) {
ZSTD_updateRep(rep.rep, seq->offBase - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
ZSTD_updateRep(rep.rep, seq->offBase, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
}
ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -14,7 +14,9 @@
/*-*************************************
* Dependencies
***************************************/
#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */
#include "../common/zstd_internal.h"
#include "../common/portability_macros.h"
#if defined (__cplusplus)
extern "C" {
@ -44,8 +46,9 @@ extern "C" {
***************************************/
typedef enum {
ZSTD_cwksp_alloc_objects,
ZSTD_cwksp_alloc_buffers,
ZSTD_cwksp_alloc_aligned
ZSTD_cwksp_alloc_aligned_init_once,
ZSTD_cwksp_alloc_aligned,
ZSTD_cwksp_alloc_buffers
} ZSTD_cwksp_alloc_phase_e;
/**
@ -98,8 +101,8 @@ typedef enum {
*
* Workspace Layout:
*
* [ ... workspace ... ]
* [objects][tables ... ->] free space [<- ... aligned][<- ... buffers]
* [ ... workspace ... ]
* [objects][tables ->] free space [<- buffers][<- aligned][<- init once]
*
* The various objects that live in the workspace are divided into the
* following categories, and are allocated separately:
@ -123,9 +126,18 @@ typedef enum {
* uint32_t arrays, all of whose values are between 0 and (nextSrc - base).
* Their sizes depend on the cparams. These tables are 64-byte aligned.
*
* - Aligned: these buffers are used for various purposes that require 4 byte
* alignment, but don't require any initialization before they're used. These
* buffers are each aligned to 64 bytes.
* - Init once: these buffers require to be initialized at least once before
* use. They should be used when we want to skip memory initialization
* while not triggering memory checkers (like Valgrind) when reading from
* from this memory without writing to it first.
* These buffers should be used carefully as they might contain data
* from previous compressions.
* Buffers are aligned to 64 bytes.
*
* - Aligned: these buffers don't require any initialization before they're
* used. The user of the buffer should make sure they write into a buffer
* location before reading from it.
* Buffers are aligned to 64 bytes.
*
* - Buffers: these buffers are used for various purposes that don't require
* any alignment or initialization before they're used. This means they can
@ -137,8 +149,9 @@ typedef enum {
* correctly packed into the workspace buffer. That order is:
*
* 1. Objects
* 2. Buffers
* 3. Aligned/Tables
* 2. Init once / Tables
* 3. Aligned / Tables
* 4. Buffers / Tables
*
* Attempts to reserve objects of different types out of order will fail.
*/
@ -150,6 +163,7 @@ typedef struct {
void* tableEnd;
void* tableValidEnd;
void* allocStart;
void* initOnceStart;
BYTE allocFailed;
int workspaceOversizedDuration;
@ -162,6 +176,7 @@ typedef struct {
***************************************/
MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws);
MEM_STATIC void* ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws);
MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
(void)ws;
@ -171,6 +186,20 @@ MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
assert(ws->tableEnd <= ws->allocStart);
assert(ws->tableValidEnd <= ws->allocStart);
assert(ws->allocStart <= ws->workspaceEnd);
assert(ws->initOnceStart <= ZSTD_cwksp_initialAllocStart(ws));
assert(ws->workspace <= ws->initOnceStart);
#if ZSTD_MEMORY_SANITIZER
{
intptr_t const offset = __msan_test_shadow(ws->initOnceStart,
(U8*)ZSTD_cwksp_initialAllocStart(ws) - (U8*)ws->initOnceStart);
#if defined(ZSTD_MSAN_PRINT)
if(offset!=-1) {
__msan_print_shadow((U8*)ws->initOnceStart + offset - 8, 32);
}
#endif
assert(offset==-1);
};
#endif
}
/**
@ -217,14 +246,10 @@ MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) {
* for internal purposes (currently only alignment).
*/
MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) {
/* For alignment, the wksp will always allocate an additional n_1=[1, 64] bytes
* to align the beginning of tables section, as well as another n_2=[0, 63] bytes
* to align the beginning of the aligned section.
*
* n_1 + n_2 == 64 bytes if the cwksp is freshly allocated, due to tables and
* aligneds being sized in multiples of 64 bytes.
/* For alignment, the wksp will always allocate an additional 2*ZSTD_CWKSP_ALIGNMENT_BYTES
* bytes to align the beginning of tables section and end of buffers;
*/
size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES;
size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES * 2;
return slackSpace;
}
@ -237,10 +262,18 @@ MEM_STATIC size_t ZSTD_cwksp_bytes_to_align_ptr(void* ptr, const size_t alignByt
size_t const alignBytesMask = alignBytes - 1;
size_t const bytes = (alignBytes - ((size_t)ptr & (alignBytesMask))) & alignBytesMask;
assert((alignBytes & alignBytesMask) == 0);
assert(bytes != ZSTD_CWKSP_ALIGNMENT_BYTES);
assert(bytes < alignBytes);
return bytes;
}
/**
* Returns the initial value for allocStart which is used to determine the position from
* which we can allocate from the end of the workspace.
*/
MEM_STATIC void* ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws) {
return (void*)((size_t)ws->workspaceEnd & ~(ZSTD_CWKSP_ALIGNMENT_BYTES-1));
}
/**
* Internal function. Do not use directly.
* Reserves the given number of bytes within the aligned/buffer segment of the wksp,
@ -281,27 +314,16 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase
{
assert(phase >= ws->phase);
if (phase > ws->phase) {
/* Going from allocating objects to allocating buffers */
if (ws->phase < ZSTD_cwksp_alloc_buffers &&
phase >= ZSTD_cwksp_alloc_buffers) {
/* Going from allocating objects to allocating initOnce / tables */
if (ws->phase < ZSTD_cwksp_alloc_aligned_init_once &&
phase >= ZSTD_cwksp_alloc_aligned_init_once) {
ws->tableValidEnd = ws->objectEnd;
}
ws->initOnceStart = ZSTD_cwksp_initialAllocStart(ws);
/* Going from allocating buffers to allocating aligneds/tables */
if (ws->phase < ZSTD_cwksp_alloc_aligned &&
phase >= ZSTD_cwksp_alloc_aligned) {
{ /* Align the start of the "aligned" to 64 bytes. Use [1, 64] bytes. */
size_t const bytesToAlign =
ZSTD_CWKSP_ALIGNMENT_BYTES - ZSTD_cwksp_bytes_to_align_ptr(ws->allocStart, ZSTD_CWKSP_ALIGNMENT_BYTES);
DEBUGLOG(5, "reserving aligned alignment addtl space: %zu", bytesToAlign);
ZSTD_STATIC_ASSERT((ZSTD_CWKSP_ALIGNMENT_BYTES & (ZSTD_CWKSP_ALIGNMENT_BYTES - 1)) == 0); /* power of 2 */
RETURN_ERROR_IF(!ZSTD_cwksp_reserve_internal_buffer_space(ws, bytesToAlign),
memory_allocation, "aligned phase - alignment initial allocation failed!");
}
{ /* Align the start of the tables to 64 bytes. Use [0, 63] bytes */
void* const alloc = ws->objectEnd;
void *const alloc = ws->objectEnd;
size_t const bytesToAlign = ZSTD_cwksp_bytes_to_align_ptr(alloc, ZSTD_CWKSP_ALIGNMENT_BYTES);
void* const objectEnd = (BYTE*)alloc + bytesToAlign;
void *const objectEnd = (BYTE *) alloc + bytesToAlign;
DEBUGLOG(5, "reserving table alignment addtl space: %zu", bytesToAlign);
RETURN_ERROR_IF(objectEnd > ws->workspaceEnd, memory_allocation,
"table phase - alignment initial allocation failed!");
@ -309,7 +331,9 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase
ws->tableEnd = objectEnd; /* table area starts being empty */
if (ws->tableValidEnd < ws->tableEnd) {
ws->tableValidEnd = ws->tableEnd;
} } }
}
}
}
ws->phase = phase;
ZSTD_cwksp_assert_internal_consistency(ws);
}
@ -321,7 +345,7 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase
*/
MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr)
{
return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd);
return (ptr != NULL) && (ws->workspace <= ptr) && (ptr < ws->workspaceEnd);
}
/**
@ -348,7 +372,9 @@ ZSTD_cwksp_reserve_internal(ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase
if (alloc) {
alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
__asan_unpoison_memory_region(alloc, bytes);
/* We need to keep the redzone poisoned while unpoisoning the bytes that
* are actually allocated. */
__asan_unpoison_memory_region(alloc, bytes - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE);
}
}
#endif
@ -364,6 +390,36 @@ MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes)
return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers);
}
/**
* Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
* This memory has been initialized at least once in the past.
* This doesn't mean it has been initialized this time, and it might contain data from previous
* operations.
* The main usage is for algorithms that might need read access into uninitialized memory.
* The algorithm must maintain safety under these conditions and must make sure it doesn't
* leak any of the past data (directly or in side channels).
*/
MEM_STATIC void* ZSTD_cwksp_reserve_aligned_init_once(ZSTD_cwksp* ws, size_t bytes)
{
size_t const alignedBytes = ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES);
void* ptr = ZSTD_cwksp_reserve_internal(ws, alignedBytes, ZSTD_cwksp_alloc_aligned_init_once);
assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
if(ptr && ptr < ws->initOnceStart) {
/* We assume the memory following the current allocation is either:
* 1. Not usable as initOnce memory (end of workspace)
* 2. Another initOnce buffer that has been allocated before (and so was previously memset)
* 3. An ASAN redzone, in which case we don't want to write on it
* For these reasons it should be fine to not explicitly zero every byte up to ws->initOnceStart.
* Note that we assume here that MSAN and ASAN cannot run in the same time. */
ZSTD_memset(ptr, 0, MIN((size_t)((U8*)ws->initOnceStart - (U8*)ptr), alignedBytes));
ws->initOnceStart = ptr;
}
#if ZSTD_MEMORY_SANITIZER
assert(__msan_test_shadow(ptr, bytes) == -1);
#endif
return ptr;
}
/**
* Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
*/
@ -382,13 +438,17 @@ MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes)
*/
MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes)
{
const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned;
const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned_init_once;
void* alloc;
void* end;
void* top;
if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) {
return NULL;
/* We can only start allocating tables after we are done reserving space for objects at the
* start of the workspace */
if(ws->phase < phase) {
if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) {
return NULL;
}
}
alloc = ws->tableEnd;
end = (BYTE *)alloc + bytes;
@ -467,11 +527,19 @@ MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws)
#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
/* To validate that the table re-use logic is sound, and that we don't
* access table space that we haven't cleaned, we re-"poison" the table
* space every time we mark it dirty. */
* space every time we mark it dirty.
* Since tableValidEnd space and initOnce space may overlap we don't poison
* the initOnce portion as it break its promise. This means that this poisoning
* check isn't always applied fully. */
{
size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
assert(__msan_test_shadow(ws->objectEnd, size) == -1);
__msan_poison(ws->objectEnd, size);
if((BYTE*)ws->tableValidEnd < (BYTE*)ws->initOnceStart) {
__msan_poison(ws->objectEnd, size);
} else {
assert(ws->initOnceStart >= ws->objectEnd);
__msan_poison(ws->objectEnd, (BYTE*)ws->initOnceStart - (BYTE*)ws->objectEnd);
}
}
#endif
@ -499,7 +567,7 @@ MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) {
assert(ws->tableValidEnd >= ws->objectEnd);
assert(ws->tableValidEnd <= ws->allocStart);
if (ws->tableValidEnd < ws->tableEnd) {
ZSTD_memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd);
ZSTD_memset(ws->tableValidEnd, 0, (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd));
}
ZSTD_cwksp_mark_tables_clean(ws);
}
@ -536,11 +604,14 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
/* To validate that the context re-use logic is sound, and that we don't
* access stuff that this compression hasn't initialized, we re-"poison"
* the workspace (or at least the non-static, non-table parts of it)
* every time we start a new compression. */
* the workspace except for the areas in which we expect memory re-use
* without initialization (objects, valid tables area and init once
* memory). */
{
size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->tableValidEnd;
__msan_poison(ws->tableValidEnd, size);
if((BYTE*)ws->tableValidEnd < (BYTE*)ws->initOnceStart) {
size_t size = (BYTE*)ws->initOnceStart - (BYTE*)ws->tableValidEnd;
__msan_poison(ws->tableValidEnd, size);
}
}
#endif
@ -556,10 +627,10 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
#endif
ws->tableEnd = ws->objectEnd;
ws->allocStart = ws->workspaceEnd;
ws->allocStart = ZSTD_cwksp_initialAllocStart(ws);
ws->allocFailed = 0;
if (ws->phase > ZSTD_cwksp_alloc_buffers) {
ws->phase = ZSTD_cwksp_alloc_buffers;
if (ws->phase > ZSTD_cwksp_alloc_aligned_init_once) {
ws->phase = ZSTD_cwksp_alloc_aligned_init_once;
}
ZSTD_cwksp_assert_internal_consistency(ws);
}
@ -576,6 +647,7 @@ MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size, ZSTD_c
ws->workspaceEnd = (BYTE*)start + size;
ws->objectEnd = ws->workspace;
ws->tableValidEnd = ws->objectEnd;
ws->initOnceStart = ZSTD_cwksp_initialAllocStart(ws);
ws->phase = ZSTD_cwksp_alloc_objects;
ws->isStatic = isStatic;
ZSTD_cwksp_clear(ws);
@ -628,17 +700,11 @@ MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
* Returns if the estimated space needed for a wksp is within an acceptable limit of the
* actual amount of space used.
*/
MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp* const ws,
size_t const estimatedSpace, int resizedWorkspace) {
if (resizedWorkspace) {
/* Resized/newly allocated wksp should have exact bounds */
return ZSTD_cwksp_used(ws) == estimatedSpace;
} else {
/* Due to alignment, when reusing a workspace, we can actually consume 63 fewer or more bytes
* than estimatedSpace. See the comments in zstd_cwksp.h for details.
*/
return (ZSTD_cwksp_used(ws) >= estimatedSpace - 63) && (ZSTD_cwksp_used(ws) <= estimatedSpace + 63);
}
MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp *const ws, size_t const estimatedSpace) {
/* We have an alignment space between objects and tables between tables and buffers, so we can have up to twice
* the alignment bytes difference between estimation and actual usage */
return (estimatedSpace - ZSTD_cwksp_slack_space_required()) <= ZSTD_cwksp_used(ws) &&
ZSTD_cwksp_used(ws) <= estimatedSpace;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -11,8 +11,43 @@
#include "zstd_compress_internal.h"
#include "zstd_double_fast.h"
static void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32* const hashLarge = ms->hashTable;
U32 const hBitsL = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
U32 const mls = cParams->minMatch;
U32* const hashSmall = ms->chainTable;
U32 const hBitsS = cParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
const BYTE* const base = ms->window.base;
const BYTE* ip = base + ms->nextToUpdate;
const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
const U32 fastHashFillStep = 3;
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
/* Always insert every fastHashFillStep position into the hash tables.
* Insert the other positions into the large hash table if their entry
* is empty.
*/
for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
U32 const curr = (U32)(ip - base);
U32 i;
for (i = 0; i < fastHashFillStep; ++i) {
size_t const smHashAndTag = ZSTD_hashPtr(ip + i, hBitsS, mls);
size_t const lgHashAndTag = ZSTD_hashPtr(ip + i, hBitsL, 8);
if (i == 0) {
ZSTD_writeTaggedIndex(hashSmall, smHashAndTag, curr + i);
}
if (i == 0 || hashLarge[lgHashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) {
ZSTD_writeTaggedIndex(hashLarge, lgHashAndTag, curr + i);
}
/* Only load extra positions for ZSTD_dtlm_full */
if (dtlm == ZSTD_dtlm_fast)
break;
} }
}
static void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
@ -43,7 +78,19 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
/* Only load extra positions for ZSTD_dtlm_full */
if (dtlm == ZSTD_dtlm_fast)
break;
} }
} }
}
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
const void* const end,
ZSTD_dictTableLoadMethod_e dtlm,
ZSTD_tableFillPurpose_e tfp)
{
if (tfp == ZSTD_tfp_forCDict) {
ZSTD_fillDoubleHashTableForCDict(ms, end, dtlm);
} else {
ZSTD_fillDoubleHashTableForCCtx(ms, end, dtlm);
}
}
@ -67,7 +114,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE;
U32 offset_1=rep[0], offset_2=rep[1];
U32 offsetSaved = 0;
U32 offsetSaved1 = 0, offsetSaved2 = 0;
size_t mLength;
U32 offset;
@ -100,8 +147,8 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
U32 const current = (U32)(ip - base);
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
U32 const maxRep = current - windowLow;
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0;
if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0;
}
/* Outer Loop: one iteration per match found and stored */
@ -131,7 +178,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
ip++;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
goto _match_stored;
}
@ -175,9 +222,13 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
} while (ip1 <= ilimit);
_cleanup:
/* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
* rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
/* save reps for next block */
rep[0] = offset_1 ? offset_1 : offsetSaved;
rep[1] = offset_2 ? offset_2 : offsetSaved;
rep[0] = offset_1 ? offset_1 : offsetSaved1;
rep[1] = offset_2 ? offset_2 : offsetSaved2;
/* Return the last literals size */
return (size_t)(iend - anchor);
@ -217,7 +268,7 @@ _match_found: /* requires ip, offset, mLength */
hashLong[hl1] = (U32)(ip1 - base);
}
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
_match_stored:
/* match found */
@ -243,7 +294,7 @@ _match_stored:
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, rLength);
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, rLength);
ip += rLength;
anchor = ip;
continue; /* faster when present ... (?) */
@ -275,7 +326,6 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE;
U32 offset_1=rep[0], offset_2=rep[1];
U32 offsetSaved = 0;
const ZSTD_matchState_t* const dms = ms->dictMatchState;
const ZSTD_compressionParameters* const dictCParams = &dms->cParams;
@ -286,8 +336,8 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
const BYTE* const dictStart = dictBase + dictStartIndex;
const BYTE* const dictEnd = dms->window.nextSrc;
const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase);
const U32 dictHBitsL = dictCParams->hashLog;
const U32 dictHBitsS = dictCParams->chainLog;
const U32 dictHBitsL = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
const U32 dictHBitsS = dictCParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic");
@ -295,6 +345,13 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
/* if a dictionary is attached, it must be within window range */
assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
if (ms->prefetchCDictTables) {
size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32);
PREFETCH_AREA(dictHashLong, hashTableBytes)
PREFETCH_AREA(dictHashSmall, chainTableBytes)
}
/* init */
ip += (dictAndPrefixLength == 0);
@ -309,8 +366,12 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
U32 offset;
size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8);
size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls);
size_t const dictHashAndTagL = ZSTD_hashPtr(ip, dictHBitsL, 8);
size_t const dictHashAndTagS = ZSTD_hashPtr(ip, dictHBitsS, mls);
U32 const dictMatchIndexAndTagL = dictHashLong[dictHashAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS];
U32 const dictMatchIndexAndTagS = dictHashSmall[dictHashAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS];
int const dictTagsMatchL = ZSTD_comparePackedTags(dictMatchIndexAndTagL, dictHashAndTagL);
int const dictTagsMatchS = ZSTD_comparePackedTags(dictMatchIndexAndTagS, dictHashAndTagS);
U32 const curr = (U32)(ip-base);
U32 const matchIndexL = hashLong[h2];
U32 matchIndexS = hashSmall[h];
@ -328,7 +389,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
ip++;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
goto _match_stored;
}
@ -340,9 +401,9 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
goto _match_found;
}
} else {
} else if (dictTagsMatchL) {
/* check dictMatchState long match */
U32 const dictMatchIndexL = dictHashLong[dictHL];
U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS;
const BYTE* dictMatchL = dictBase + dictMatchIndexL;
assert(dictMatchL < dictEnd);
@ -358,9 +419,9 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
if (MEM_read32(match) == MEM_read32(ip)) {
goto _search_next_long;
}
} else {
} else if (dictTagsMatchS) {
/* check dictMatchState short match */
U32 const dictMatchIndexS = dictHashSmall[dictHS];
U32 const dictMatchIndexS = dictMatchIndexAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS;
match = dictBase + dictMatchIndexS;
matchIndexS = dictMatchIndexS + dictIndexDelta;
@ -375,10 +436,11 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
continue;
_search_next_long:
{ size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
size_t const dictHashAndTagL3 = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
U32 const matchIndexL3 = hashLong[hl3];
U32 const dictMatchIndexAndTagL3 = dictHashLong[dictHashAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS];
int const dictTagsMatchL3 = ZSTD_comparePackedTags(dictMatchIndexAndTagL3, dictHashAndTagL3);
const BYTE* matchL3 = base + matchIndexL3;
hashLong[hl3] = curr + 1;
@ -391,9 +453,9 @@ _search_next_long:
while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
goto _match_found;
}
} else {
} else if (dictTagsMatchL3) {
/* check dict long +1 match */
U32 const dictMatchIndexL3 = dictHashLong[dictHLNext];
U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS;
const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
assert(dictMatchL3 < dictEnd);
if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
@ -419,7 +481,7 @@ _match_found:
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
_match_stored:
/* match found */
@ -448,7 +510,7 @@ _match_stored:
const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
ip += repLength2;
@ -461,8 +523,8 @@ _match_stored:
} /* while (ip < ilimit) */
/* save reps for next block */
rep[0] = offset_1 ? offset_1 : offsetSaved;
rep[1] = offset_2 ? offset_2 : offsetSaved;
rep[0] = offset_1;
rep[1] = offset_2;
/* Return the last literals size */
return (size_t)(iend - anchor);
@ -585,7 +647,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
ip++;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
} else {
if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
@ -596,7 +658,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
} else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
@ -621,7 +683,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
}
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
} else {
ip += ((ip-anchor) >> kSearchStrength) + 1;
@ -653,7 +715,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
ip += repLength2;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -19,7 +19,8 @@ extern "C" {
#include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm);
void const* end, ZSTD_dictTableLoadMethod_e dtlm,
ZSTD_tableFillPurpose_e tfp);
size_t ZSTD_compressBlock_doubleFast(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -11,8 +11,42 @@
#include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */
#include "zstd_fast.h"
static void ZSTD_fillHashTableForCDict(ZSTD_matchState_t* ms,
const void* const end,
ZSTD_dictTableLoadMethod_e dtlm)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32* const hashTable = ms->hashTable;
U32 const hBits = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
U32 const mls = cParams->minMatch;
const BYTE* const base = ms->window.base;
const BYTE* ip = base + ms->nextToUpdate;
const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
const U32 fastHashFillStep = 3;
void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
/* Currently, we always use ZSTD_dtlm_full for filling CDict tables.
* Feel free to remove this assert if there's a good reason! */
assert(dtlm == ZSTD_dtlm_full);
/* Always insert every fastHashFillStep position into the hash table.
* Insert the other positions if their hash entry is empty.
*/
for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
U32 const curr = (U32)(ip - base);
{ size_t const hashAndTag = ZSTD_hashPtr(ip, hBits, mls);
ZSTD_writeTaggedIndex(hashTable, hashAndTag, curr); }
if (dtlm == ZSTD_dtlm_fast) continue;
/* Only load extra positions for ZSTD_dtlm_full */
{ U32 p;
for (p = 1; p < fastHashFillStep; ++p) {
size_t const hashAndTag = ZSTD_hashPtr(ip + p, hBits, mls);
if (hashTable[hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) { /* not yet filled */
ZSTD_writeTaggedIndex(hashTable, hashAndTag, curr + p);
} } } }
}
static void ZSTD_fillHashTableForCCtx(ZSTD_matchState_t* ms,
const void* const end,
ZSTD_dictTableLoadMethod_e dtlm)
{
@ -25,6 +59,10 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
const U32 fastHashFillStep = 3;
/* Currently, we always use ZSTD_dtlm_fast for filling CCtx tables.
* Feel free to remove this assert if there's a good reason! */
assert(dtlm == ZSTD_dtlm_fast);
/* Always insert every fastHashFillStep position into the hash table.
* Insert the other positions if their hash entry is empty.
*/
@ -42,6 +80,18 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
} } } }
}
void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
const void* const end,
ZSTD_dictTableLoadMethod_e dtlm,
ZSTD_tableFillPurpose_e tfp)
{
if (tfp == ZSTD_tfp_forCDict) {
ZSTD_fillHashTableForCDict(ms, end, dtlm);
} else {
ZSTD_fillHashTableForCCtx(ms, end, dtlm);
}
}
/**
* If you squint hard enough (and ignore repcodes), the search operation at any
@ -117,7 +167,7 @@ ZSTD_compressBlock_fast_noDict_generic(
U32 rep_offset1 = rep[0];
U32 rep_offset2 = rep[1];
U32 offsetSaved = 0;
U32 offsetSaved1 = 0, offsetSaved2 = 0;
size_t hash0; /* hash for ip0 */
size_t hash1; /* hash for ip1 */
@ -141,8 +191,8 @@ ZSTD_compressBlock_fast_noDict_generic(
{ U32 const curr = (U32)(ip0 - base);
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
U32 const maxRep = curr - windowLow;
if (rep_offset2 > maxRep) offsetSaved = rep_offset2, rep_offset2 = 0;
if (rep_offset1 > maxRep) offsetSaved = rep_offset1, rep_offset1 = 0;
if (rep_offset2 > maxRep) offsetSaved2 = rep_offset2, rep_offset2 = 0;
if (rep_offset1 > maxRep) offsetSaved1 = rep_offset1, rep_offset1 = 0;
}
/* start each op */
@ -180,8 +230,14 @@ _start: /* Requires: ip0 */
mLength = ip0[-1] == match0[-1];
ip0 -= mLength;
match0 -= mLength;
offcode = STORE_REPCODE_1;
offcode = REPCODE1_TO_OFFBASE;
mLength += 4;
/* First write next hash table entry; we've already calculated it.
* This write is known to be safe because the ip1 is before the
* repcode (ip2). */
hashTable[hash1] = (U32)(ip1 - base);
goto _match;
}
@ -195,6 +251,12 @@ _start: /* Requires: ip0 */
/* check match at ip[0] */
if (MEM_read32(ip0) == mval) {
/* found a match! */
/* First write next hash table entry; we've already calculated it.
* This write is known to be safe because the ip1 == ip0 + 1, so
* we know we will resume searching after ip1 */
hashTable[hash1] = (U32)(ip1 - base);
goto _offset;
}
@ -224,6 +286,21 @@ _start: /* Requires: ip0 */
/* check match at ip[0] */
if (MEM_read32(ip0) == mval) {
/* found a match! */
/* first write next hash table entry; we've already calculated it */
if (step <= 4) {
/* We need to avoid writing an index into the hash table >= the
* position at which we will pick up our searching after we've
* taken this match.
*
* The minimum possible match has length 4, so the earliest ip0
* can be after we take this match will be the current ip0 + 4.
* ip1 is ip0 + step - 1. If ip1 is >= ip0 + 4, we can't safely
* write this position.
*/
hashTable[hash1] = (U32)(ip1 - base);
}
goto _offset;
}
@ -254,9 +331,24 @@ _cleanup:
* However, it seems to be a meaningful performance hit to try to search
* them. So let's not. */
/* When the repcodes are outside of the prefix, we set them to zero before the loop.
* When the offsets are still zero, we need to restore them after the block to have a correct
* repcode history. If only one offset was invalid, it is easy. The tricky case is when both
* offsets were invalid. We need to figure out which offset to refill with.
* - If both offsets are zero they are in the same order.
* - If both offsets are non-zero, we won't restore the offsets from `offsetSaved[12]`.
* - If only one is zero, we need to decide which offset to restore.
* - If rep_offset1 is non-zero, then rep_offset2 must be offsetSaved1.
* - It is impossible for rep_offset2 to be non-zero.
*
* So if rep_offset1 started invalid (offsetSaved1 != 0) and became valid (rep_offset1 != 0), then
* set rep[0] = rep_offset1 and rep[1] = offsetSaved1.
*/
offsetSaved2 = ((offsetSaved1 != 0) && (rep_offset1 != 0)) ? offsetSaved1 : offsetSaved2;
/* save reps for next block */
rep[0] = rep_offset1 ? rep_offset1 : offsetSaved;
rep[1] = rep_offset2 ? rep_offset2 : offsetSaved;
rep[0] = rep_offset1 ? rep_offset1 : offsetSaved1;
rep[1] = rep_offset2 ? rep_offset2 : offsetSaved2;
/* Return the last literals size */
return (size_t)(iend - anchor);
@ -267,7 +359,7 @@ _offset: /* Requires: ip0, idx */
match0 = base + idx;
rep_offset2 = rep_offset1;
rep_offset1 = (U32)(ip0-match0);
offcode = STORE_OFFSET(rep_offset1);
offcode = OFFSET_TO_OFFBASE(rep_offset1);
mLength = 4;
/* Count the backwards match length. */
@ -287,11 +379,6 @@ _match: /* Requires: ip0, match0, offcode */
ip0 += mLength;
anchor = ip0;
/* write next hash table entry */
if (ip1 < ip0) {
hashTable[hash1] = (U32)(ip1 - base);
}
/* Fill table and check for immediate repcode. */
if (ip0 <= ilimit) {
/* Fill Table */
@ -306,7 +393,7 @@ _match: /* Requires: ip0, match0, offcode */
{ U32 const tmpOff = rep_offset2; rep_offset2 = rep_offset1; rep_offset1 = tmpOff; } /* swap rep_offset2 <=> rep_offset1 */
hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
ip0 += rLength;
ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, STORE_REPCODE_1, rLength);
ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, REPCODE1_TO_OFFBASE, rLength);
anchor = ip0;
continue; /* faster when present (confirmed on gcc-8) ... (?) */
} } }
@ -380,14 +467,14 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
const BYTE* const base = ms->window.base;
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* ip0 = istart;
const BYTE* ip1 = ip0 + stepSize; /* we assert below that stepSize >= 1 */
const BYTE* anchor = istart;
const U32 prefixStartIndex = ms->window.dictLimit;
const BYTE* const prefixStart = base + prefixStartIndex;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE;
U32 offset_1=rep[0], offset_2=rep[1];
U32 offsetSaved = 0;
const ZSTD_matchState_t* const dms = ms->dictMatchState;
const ZSTD_compressionParameters* const dictCParams = &dms->cParams ;
@ -397,13 +484,13 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
const BYTE* const dictStart = dictBase + dictStartIndex;
const BYTE* const dictEnd = dms->window.nextSrc;
const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase);
const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart);
const U32 dictHLog = dictCParams->hashLog;
const U32 dictAndPrefixLength = (U32)(istart - prefixStart + dictEnd - dictStart);
const U32 dictHBits = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
/* if a dictionary is still attached, it necessarily means that
* it is within window size. So we just check it. */
const U32 maxDistance = 1U << cParams->windowLog;
const U32 endIndex = (U32)((size_t)(ip - base) + srcSize);
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
assert(endIndex - prefixStartIndex <= maxDistance);
(void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */
@ -413,106 +500,155 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
* when translating a dict index into a local index */
assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
if (ms->prefetchCDictTables) {
size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
PREFETCH_AREA(dictHashTable, hashTableBytes)
}
/* init */
DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic");
ip += (dictAndPrefixLength == 0);
ip0 += (dictAndPrefixLength == 0);
/* dictMatchState repCode checks don't currently handle repCode == 0
* disabling. */
assert(offset_1 <= dictAndPrefixLength);
assert(offset_2 <= dictAndPrefixLength);
/* Main Search Loop */
while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
/* Outer search loop */
assert(stepSize >= 1);
while (ip1 <= ilimit) { /* repcode check at (ip0 + 1) is safe because ip0 < ip1 */
size_t mLength;
size_t const h = ZSTD_hashPtr(ip, hlog, mls);
U32 const curr = (U32)(ip-base);
U32 const matchIndex = hashTable[h];
const BYTE* match = base + matchIndex;
const U32 repIndex = curr + 1 - offset_1;
const BYTE* repMatch = (repIndex < prefixStartIndex) ?
dictBase + (repIndex - dictIndexDelta) :
base + repIndex;
hashTable[h] = curr; /* update hash table */
size_t hash0 = ZSTD_hashPtr(ip0, hlog, mls);
if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
ip++;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
} else if ( (matchIndex <= prefixStartIndex) ) {
size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
U32 const dictMatchIndex = dictHashTable[dictHash];
const BYTE* dictMatch = dictBase + dictMatchIndex;
if (dictMatchIndex <= dictStartIndex ||
MEM_read32(dictMatch) != MEM_read32(ip)) {
assert(stepSize >= 1);
ip += ((ip-anchor) >> kSearchStrength) + stepSize;
continue;
} else {
/* found a dict match */
U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta);
mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
while (((ip>anchor) & (dictMatch>dictStart))
&& (ip[-1] == dictMatch[-1])) {
ip--; dictMatch--; mLength++;
size_t const dictHashAndTag0 = ZSTD_hashPtr(ip0, dictHBits, mls);
U32 dictMatchIndexAndTag = dictHashTable[dictHashAndTag0 >> ZSTD_SHORT_CACHE_TAG_BITS];
int dictTagsMatch = ZSTD_comparePackedTags(dictMatchIndexAndTag, dictHashAndTag0);
U32 matchIndex = hashTable[hash0];
U32 curr = (U32)(ip0 - base);
size_t step = stepSize;
const size_t kStepIncr = 1 << kSearchStrength;
const BYTE* nextStep = ip0 + kStepIncr;
/* Inner search loop */
while (1) {
const BYTE* match = base + matchIndex;
const U32 repIndex = curr + 1 - offset_1;
const BYTE* repMatch = (repIndex < prefixStartIndex) ?
dictBase + (repIndex - dictIndexDelta) :
base + repIndex;
const size_t hash1 = ZSTD_hashPtr(ip1, hlog, mls);
size_t const dictHashAndTag1 = ZSTD_hashPtr(ip1, dictHBits, mls);
hashTable[hash0] = curr; /* update hash table */
if (((U32) ((prefixStartIndex - 1) - repIndex) >=
3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
&& (MEM_read32(repMatch) == MEM_read32(ip0 + 1))) {
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip0 + 1 + 4, repMatch + 4, iend, repMatchEnd, prefixStart) + 4;
ip0++;
ZSTD_storeSeq(seqStore, (size_t) (ip0 - anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
break;
}
if (dictTagsMatch) {
/* Found a possible dict match */
const U32 dictMatchIndex = dictMatchIndexAndTag >> ZSTD_SHORT_CACHE_TAG_BITS;
const BYTE* dictMatch = dictBase + dictMatchIndex;
if (dictMatchIndex > dictStartIndex &&
MEM_read32(dictMatch) == MEM_read32(ip0)) {
/* To replicate extDict parse behavior, we only use dict matches when the normal matchIndex is invalid */
if (matchIndex <= prefixStartIndex) {
U32 const offset = (U32) (curr - dictMatchIndex - dictIndexDelta);
mLength = ZSTD_count_2segments(ip0 + 4, dictMatch + 4, iend, dictEnd, prefixStart) + 4;
while (((ip0 > anchor) & (dictMatch > dictStart))
&& (ip0[-1] == dictMatch[-1])) {
ip0--;
dictMatch--;
mLength++;
} /* catch up */
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, (size_t) (ip0 - anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
break;
}
}
}
if (matchIndex > prefixStartIndex && MEM_read32(match) == MEM_read32(ip0)) {
/* found a regular match */
U32 const offset = (U32) (ip0 - match);
mLength = ZSTD_count(ip0 + 4, match + 4, iend) + 4;
while (((ip0 > anchor) & (match > prefixStart))
&& (ip0[-1] == match[-1])) {
ip0--;
match--;
mLength++;
} /* catch up */
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
ZSTD_storeSeq(seqStore, (size_t) (ip0 - anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
break;
}
} else if (MEM_read32(match) != MEM_read32(ip)) {
/* it's not a match, and we're not going to check the dictionary */
assert(stepSize >= 1);
ip += ((ip-anchor) >> kSearchStrength) + stepSize;
continue;
} else {
/* found a regular match */
U32 const offset = (U32)(ip-match);
mLength = ZSTD_count(ip+4, match+4, iend) + 4;
while (((ip>anchor) & (match>prefixStart))
&& (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
}
/* Prepare for next iteration */
dictMatchIndexAndTag = dictHashTable[dictHashAndTag1 >> ZSTD_SHORT_CACHE_TAG_BITS];
dictTagsMatch = ZSTD_comparePackedTags(dictMatchIndexAndTag, dictHashAndTag1);
matchIndex = hashTable[hash1];
if (ip1 >= nextStep) {
step++;
nextStep += kStepIncr;
}
ip0 = ip1;
ip1 = ip1 + step;
if (ip1 > ilimit) goto _cleanup;
curr = (U32)(ip0 - base);
hash0 = hash1;
} /* end inner search loop */
/* match found */
ip += mLength;
anchor = ip;
assert(mLength);
ip0 += mLength;
anchor = ip0;
if (ip <= ilimit) {
if (ip0 <= ilimit) {
/* Fill Table */
assert(base+curr+2 > istart); /* check base overflow */
hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; /* here because curr+2 could be > iend-8 */
hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
/* check immediate repcode */
while (ip <= ilimit) {
U32 const current2 = (U32)(ip-base);
while (ip0 <= ilimit) {
U32 const current2 = (U32)(ip0-base);
U32 const repIndex2 = current2 - offset_2;
const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
dictBase - dictIndexDelta + repIndex2 :
base + repIndex2;
if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
&& (MEM_read32(repMatch2) == MEM_read32(ip0))) {
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
size_t const repLength2 = ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
ip += repLength2;
anchor = ip;
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = current2;
ip0 += repLength2;
anchor = ip0;
continue;
}
break;
}
}
/* Prepare for next iteration */
assert(ip0 == anchor);
ip1 = ip0 + stepSize;
}
_cleanup:
/* save reps for next block */
rep[0] = offset_1 ? offset_1 : offsetSaved;
rep[1] = offset_2 ? offset_2 : offsetSaved;
rep[0] = offset_1;
rep[1] = offset_2;
/* Return the last literals size */
return (size_t)(iend - anchor);
@ -553,11 +689,10 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
U32* const hashTable = ms->hashTable;
U32 const hlog = cParams->hashLog;
/* support stepSize of 0 */
U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1;
const BYTE* const base = ms->window.base;
const BYTE* const dictBase = ms->window.dictBase;
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
@ -570,6 +705,28 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8;
U32 offset_1=rep[0], offset_2=rep[1];
U32 offsetSaved1 = 0, offsetSaved2 = 0;
const BYTE* ip0 = istart;
const BYTE* ip1;
const BYTE* ip2;
const BYTE* ip3;
U32 current0;
size_t hash0; /* hash for ip0 */
size_t hash1; /* hash for ip1 */
U32 idx; /* match idx for ip0 */
const BYTE* idxBase; /* base pointer for idx */
U32 offcode;
const BYTE* match0;
size_t mLength;
const BYTE* matchEnd = 0; /* initialize to avoid warning, assert != 0 later */
size_t step;
const BYTE* nextStep;
const size_t kStepIncr = (1 << (kSearchStrength - 1));
(void)hasStep; /* not currently specialized on whether it's accelerated */
@ -579,75 +736,202 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
if (prefixStartIndex == dictStartIndex)
return ZSTD_compressBlock_fast(ms, seqStore, rep, src, srcSize);
/* Search Loop */
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
const size_t h = ZSTD_hashPtr(ip, hlog, mls);
const U32 matchIndex = hashTable[h];
const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
const BYTE* match = matchBase + matchIndex;
const U32 curr = (U32)(ip-base);
const U32 repIndex = curr + 1 - offset_1;
const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex;
hashTable[h] = curr; /* update hash table */
DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
{ U32 const curr = (U32)(ip0 - base);
U32 const maxRep = curr - dictStartIndex;
if (offset_2 >= maxRep) offsetSaved2 = offset_2, offset_2 = 0;
if (offset_1 >= maxRep) offsetSaved1 = offset_1, offset_1 = 0;
}
if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */
& (offset_1 <= curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
ip++;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, rLength);
ip += rLength;
anchor = ip;
} else {
if ( (matchIndex < dictStartIndex) ||
(MEM_read32(match) != MEM_read32(ip)) ) {
assert(stepSize >= 1);
ip += ((ip-anchor) >> kSearchStrength) + stepSize;
continue;
/* start each op */
_start: /* Requires: ip0 */
step = stepSize;
nextStep = ip0 + kStepIncr;
/* calculate positions, ip0 - anchor == 0, so we skip step calc */
ip1 = ip0 + 1;
ip2 = ip0 + step;
ip3 = ip2 + 1;
if (ip3 >= ilimit) {
goto _cleanup;
}
hash0 = ZSTD_hashPtr(ip0, hlog, mls);
hash1 = ZSTD_hashPtr(ip1, hlog, mls);
idx = hashTable[hash0];
idxBase = idx < prefixStartIndex ? dictBase : base;
do {
{ /* load repcode match for ip[2] */
U32 const current2 = (U32)(ip2 - base);
U32 const repIndex = current2 - offset_1;
const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
U32 rval;
if ( ((U32)(prefixStartIndex - repIndex) >= 4) /* intentional underflow */
& (offset_1 > 0) ) {
rval = MEM_read32(repBase + repIndex);
} else {
rval = MEM_read32(ip2) ^ 1; /* guaranteed to not match. */
}
{ const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
U32 const offset = curr - matchIndex;
size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
offset_2 = offset_1; offset_1 = offset; /* update offset history */
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
ip += mLength;
anchor = ip;
/* write back hash table entry */
current0 = (U32)(ip0 - base);
hashTable[hash0] = current0;
/* check repcode at ip[2] */
if (MEM_read32(ip2) == rval) {
ip0 = ip2;
match0 = repBase + repIndex;
matchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
assert((match0 != prefixStart) & (match0 != dictStart));
mLength = ip0[-1] == match0[-1];
ip0 -= mLength;
match0 -= mLength;
offcode = REPCODE1_TO_OFFBASE;
mLength += 4;
goto _match;
} }
if (ip <= ilimit) {
/* Fill Table */
hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;
hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
/* check immediate repcode */
while (ip <= ilimit) {
U32 const current2 = (U32)(ip-base);
U32 const repIndex2 = current2 - offset_2;
const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 <= curr - dictStartIndex)) /* intentional overflow */
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
{ U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, STORE_REPCODE_1, repLength2);
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
ip += repLength2;
anchor = ip;
continue;
}
break;
} } }
{ /* load match for ip[0] */
U32 const mval = idx >= dictStartIndex ?
MEM_read32(idxBase + idx) :
MEM_read32(ip0) ^ 1; /* guaranteed not to match */
/* check match at ip[0] */
if (MEM_read32(ip0) == mval) {
/* found a match! */
goto _offset;
} }
/* lookup ip[1] */
idx = hashTable[hash1];
idxBase = idx < prefixStartIndex ? dictBase : base;
/* hash ip[2] */
hash0 = hash1;
hash1 = ZSTD_hashPtr(ip2, hlog, mls);
/* advance to next positions */
ip0 = ip1;
ip1 = ip2;
ip2 = ip3;
/* write back hash table entry */
current0 = (U32)(ip0 - base);
hashTable[hash0] = current0;
{ /* load match for ip[0] */
U32 const mval = idx >= dictStartIndex ?
MEM_read32(idxBase + idx) :
MEM_read32(ip0) ^ 1; /* guaranteed not to match */
/* check match at ip[0] */
if (MEM_read32(ip0) == mval) {
/* found a match! */
goto _offset;
} }
/* lookup ip[1] */
idx = hashTable[hash1];
idxBase = idx < prefixStartIndex ? dictBase : base;
/* hash ip[2] */
hash0 = hash1;
hash1 = ZSTD_hashPtr(ip2, hlog, mls);
/* advance to next positions */
ip0 = ip1;
ip1 = ip2;
ip2 = ip0 + step;
ip3 = ip1 + step;
/* calculate step */
if (ip2 >= nextStep) {
step++;
PREFETCH_L1(ip1 + 64);
PREFETCH_L1(ip1 + 128);
nextStep += kStepIncr;
}
} while (ip3 < ilimit);
_cleanup:
/* Note that there are probably still a couple positions we could search.
* However, it seems to be a meaningful performance hit to try to search
* them. So let's not. */
/* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
* rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
/* save reps for next block */
rep[0] = offset_1;
rep[1] = offset_2;
rep[0] = offset_1 ? offset_1 : offsetSaved1;
rep[1] = offset_2 ? offset_2 : offsetSaved2;
/* Return the last literals size */
return (size_t)(iend - anchor);
_offset: /* Requires: ip0, idx, idxBase */
/* Compute the offset code. */
{ U32 const offset = current0 - idx;
const BYTE* const lowMatchPtr = idx < prefixStartIndex ? dictStart : prefixStart;
matchEnd = idx < prefixStartIndex ? dictEnd : iend;
match0 = idxBase + idx;
offset_2 = offset_1;
offset_1 = offset;
offcode = OFFSET_TO_OFFBASE(offset);
mLength = 4;
/* Count the backwards match length. */
while (((ip0>anchor) & (match0>lowMatchPtr)) && (ip0[-1] == match0[-1])) {
ip0--;
match0--;
mLength++;
} }
_match: /* Requires: ip0, match0, offcode, matchEnd */
/* Count the forward length. */
assert(matchEnd != 0);
mLength += ZSTD_count_2segments(ip0 + mLength, match0 + mLength, iend, matchEnd, prefixStart);
ZSTD_storeSeq(seqStore, (size_t)(ip0 - anchor), anchor, iend, offcode, mLength);
ip0 += mLength;
anchor = ip0;
/* write next hash table entry */
if (ip1 < ip0) {
hashTable[hash1] = (U32)(ip1 - base);
}
/* Fill table and check for immediate repcode. */
if (ip0 <= ilimit) {
/* Fill Table */
assert(base+current0+2 > istart); /* check base overflow */
hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */
hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
while (ip0 <= ilimit) {
U32 const repIndex2 = (U32)(ip0-base) - offset_2;
const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 > 0)) /* intentional underflow */
&& (MEM_read32(repMatch2) == MEM_read32(ip0)) ) {
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
{ U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
ip0 += repLength2;
anchor = ip0;
continue;
}
break;
} }
goto _start;
}
ZSTD_GEN_FAST_FN(extDict, 4, 0)
@ -660,6 +944,7 @@ size_t ZSTD_compressBlock_fast_extDict(
void const* src, size_t srcSize)
{
U32 const mls = ms->cParams.minMatch;
assert(ms->dictMatchState == NULL);
switch(mls)
{
default: /* includes case 3 */

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -19,7 +19,8 @@ extern "C" {
#include "zstd_compress_internal.h"
void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm);
void const* end, ZSTD_dictTableLoadMethod_e dtlm,
ZSTD_tableFillPurpose_e tfp);
size_t ZSTD_compressBlock_fast(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -25,6 +25,8 @@ extern "C" {
*/
#define ZSTD_LAZY_DDSS_BUCKET_LOG 2
#define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */
U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip);
@ -116,7 +118,7 @@ size_t ZSTD_compressBlock_lazy2_extDict_row(
size_t ZSTD_compressBlock_btlazy2_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
#if defined (__cplusplus)
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -242,11 +242,11 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
switch(ms->cParams.strategy)
{
case ZSTD_fast:
ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast);
ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx);
break;
case ZSTD_dfast:
ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast);
ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx);
break;
case ZSTD_greedy:
@ -549,7 +549,7 @@ size_t ZSTD_ldm_generateSequences(
* the window through early invalidation.
* TODO: * Test the chunk size.
* * Try invalidation after the sequence generation and test the
* the offset against maxDist directly.
* offset against maxDist directly.
*
* NOTE: Because of dictionaries + sequence splitting we MUST make sure
* that any offset used is valid at the END of the sequence, since it may
@ -711,7 +711,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
rep[0] = sequence.offset;
/* Store the sequence */
ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
STORE_OFFSET(sequence.offset),
OFFSET_TO_OFFBASE(sequence.offset),
sequence.matchLength);
ip += sequence.matchLength;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -16,7 +16,7 @@
#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
#define ZSTD_MAX_PRICE (1<<30)
#define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
#define ZSTD_PREDEF_THRESHOLD 8 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
/*-*************************************
@ -26,27 +26,35 @@
#if 0 /* approximation at bit level (for tests) */
# define BITCOST_ACCURACY 0
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
# define WEIGHT(stat, opt) ((void)opt, ZSTD_bitWeight(stat))
# define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat))
#elif 0 /* fractional bit accuracy (for tests) */
# define BITCOST_ACCURACY 8
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
# define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
# define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat))
#else /* opt==approx, ultra==accurate */
# define BITCOST_ACCURACY 8
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
# define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
# define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
#endif
/* ZSTD_bitWeight() :
* provide estimated "cost" of a stat in full bits only */
MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
{
return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
}
/* ZSTD_fracWeight() :
* provide fractional-bit "cost" of a stat,
* using linear interpolation approximation */
MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
{
U32 const stat = rawStat + 1;
U32 const hb = ZSTD_highbit32(stat);
U32 const BWeight = hb * BITCOST_MULTIPLIER;
/* Fweight was meant for "Fractional weight"
* but it's effectively a value between 1 and 2
* using fixed point arithmetic */
U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
U32 const weight = BWeight + FWeight;
assert(hb + BITCOST_ACCURACY < 31);
@ -57,7 +65,7 @@ MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
/* debugging function,
* @return price in bytes as fractional value
* for debug messages only */
MEM_STATIC double ZSTD_fCost(U32 price)
MEM_STATIC double ZSTD_fCost(int price)
{
return (double)price / (BITCOST_MULTIPLIER*8);
}
@ -88,20 +96,26 @@ static U32 sum_u32(const unsigned table[], size_t nbElts)
return total;
}
static U32 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift)
typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e;
static U32
ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1)
{
U32 s, sum=0;
DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift);
DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)",
(unsigned)lastEltIndex+1, (unsigned)shift );
assert(shift < 30);
for (s=0; s<lastEltIndex+1; s++) {
table[s] = 1 + (table[s] >> shift);
sum += table[s];
unsigned const base = base1 ? 1 : (table[s]>0);
unsigned const newStat = base + (table[s] >> shift);
sum += newStat;
table[s] = newStat;
}
return sum;
}
/* ZSTD_scaleStats() :
* reduce all elements in table is sum too large
* reduce all elt frequencies in table if sum too large
* return the resulting sum of elements */
static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
{
@ -110,7 +124,7 @@ static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
assert(logTarget < 30);
if (factor <= 1) return prevsum;
return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor));
return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed);
}
/* ZSTD_rescaleFreqs() :
@ -129,18 +143,22 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
optPtr->priceType = zop_dynamic;
if (optPtr->litLengthSum == 0) { /* first block : init */
if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */
DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef");
if (optPtr->litLengthSum == 0) { /* no literals stats collected -> first block assumed -> init */
/* heuristic: use pre-defined stats for too small inputs */
if (srcSize <= ZSTD_PREDEF_THRESHOLD) {
DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD);
optPtr->priceType = zop_predef;
}
assert(optPtr->symbolCosts != NULL);
if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
/* huffman table presumed generated by dictionary */
/* huffman stats covering the full value set : table presumed generated by dictionary */
optPtr->priceType = zop_dynamic;
if (compressedLiterals) {
/* generate literals statistics from huffman table */
unsigned lit;
assert(optPtr->litFreq != NULL);
optPtr->litSum = 0;
@ -188,13 +206,14 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
optPtr->offCodeSum += optPtr->offCodeFreq[of];
} }
} else { /* not a dictionary */
} else { /* first block, no dictionary */
assert(optPtr->litFreq != NULL);
if (compressedLiterals) {
/* base initial cost of literals on direct frequency within src */
unsigned lit = MaxLit;
HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8);
optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible);
}
{ unsigned const baseLLfreqs[MaxLL+1] = {
@ -224,10 +243,9 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
}
}
} else { /* new block : re-use previous statistics, scaled down */
} else { /* new block : scale down accumulated statistics */
if (compressedLiterals)
optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
@ -255,11 +273,14 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */
/* dynamic statistics */
{ U32 price = litLength * optPtr->litSumBasePrice;
{ U32 price = optPtr->litSumBasePrice * litLength;
U32 const litPriceMax = optPtr->litSumBasePrice - BITCOST_MULTIPLIER;
U32 u;
assert(optPtr->litSumBasePrice >= BITCOST_MULTIPLIER);
for (u=0; u < litLength; u++) {
assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice); /* literal cost should never be negative */
price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel);
U32 litPrice = WEIGHT(optPtr->litFreq[literals[u]], optLevel);
if (UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax;
price -= litPrice;
}
return price;
}
@ -272,10 +293,11 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
assert(litLength <= ZSTD_BLOCKSIZE_MAX);
if (optPtr->priceType == zop_predef)
return WEIGHT(litLength, optLevel);
/* We can't compute the litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
* because it isn't representable in the zstd format. So instead just
* call it 1 bit more than ZSTD_BLOCKSIZE_MAX - 1. In this case the block
* would be all literals.
/* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
* because it isn't representable in the zstd format.
* So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1.
* In such a case, the block would be all literals.
*/
if (litLength == ZSTD_BLOCKSIZE_MAX)
return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
@ -289,24 +311,25 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
}
/* ZSTD_getMatchPrice() :
* Provides the cost of the match part (offset + matchLength) of a sequence
* Provides the cost of the match part (offset + matchLength) of a sequence.
* Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
* @offcode : expects a scale where 0,1,2 are repcodes 1-3, and 3+ are real_offsets+2
* @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq()
* @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
*/
FORCE_INLINE_TEMPLATE U32
ZSTD_getMatchPrice(U32 const offcode,
ZSTD_getMatchPrice(U32 const offBase,
U32 const matchLength,
const optState_t* const optPtr,
int const optLevel)
{
U32 price;
U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offcode));
U32 const offCode = ZSTD_highbit32(offBase);
U32 const mlBase = matchLength - MINMATCH;
assert(matchLength >= MINMATCH);
if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */
return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER);
if (optPtr->priceType == zop_predef) /* fixed scheme, does not use statistics */
return WEIGHT(mlBase, optLevel)
+ ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */
/* dynamic statistics */
price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
@ -325,10 +348,10 @@ ZSTD_getMatchPrice(U32 const offcode,
}
/* ZSTD_updateStats() :
* assumption : literals + litLengtn <= iend */
* assumption : literals + litLength <= iend */
static void ZSTD_updateStats(optState_t* const optPtr,
U32 litLength, const BYTE* literals,
U32 offsetCode, U32 matchLength)
U32 offBase, U32 matchLength)
{
/* literals */
if (ZSTD_compressedLiterals(optPtr)) {
@ -344,8 +367,8 @@ static void ZSTD_updateStats(optState_t* const optPtr,
optPtr->litLengthSum++;
}
/* offset code : expected to follow storeSeq() numeric representation */
{ U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offsetCode));
/* offset code : follows storeSeq() numeric representation */
{ U32 const offCode = ZSTD_highbit32(offBase);
assert(offCode <= MaxOff);
optPtr->offCodeFreq[offCode]++;
optPtr->offCodeSum++;
@ -552,16 +575,17 @@ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
}
FORCE_INLINE_TEMPLATE
U32 ZSTD_insertBtAndGetAllMatches (
ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
ZSTD_matchState_t* ms,
U32* nextToUpdate3,
const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
const U32 rep[ZSTD_REP_NUM],
U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
const U32 lengthToBeat,
U32 const mls /* template */)
FORCE_INLINE_TEMPLATE U32
ZSTD_insertBtAndGetAllMatches (
ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
ZSTD_matchState_t* ms,
U32* nextToUpdate3,
const BYTE* const ip, const BYTE* const iLimit,
const ZSTD_dictMode_e dictMode,
const U32 rep[ZSTD_REP_NUM],
const U32 ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
const U32 lengthToBeat,
const U32 mls /* template */)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
@ -644,7 +668,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
repCode, ll0, repOffset, repLen);
bestLength = repLen;
matches[mnum].off = STORE_REPCODE(repCode - ll0 + 1); /* expect value between 1 and 3 */
matches[mnum].off = REPCODE_TO_OFFBASE(repCode - ll0 + 1); /* expect value between 1 and 3 */
matches[mnum].len = (U32)repLen;
mnum++;
if ( (repLen > sufficient_len)
@ -673,7 +697,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
bestLength = mlen;
assert(curr > matchIndex3);
assert(mnum==0); /* no prior solution */
matches[0].off = STORE_OFFSET(curr - matchIndex3);
matches[0].off = OFFSET_TO_OFFBASE(curr - matchIndex3);
matches[0].len = (U32)mlen;
mnum = 1;
if ( (mlen > sufficient_len) |
@ -706,13 +730,13 @@ U32 ZSTD_insertBtAndGetAllMatches (
}
if (matchLength > bestLength) {
DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
(U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex));
DEBUGLOG(8, "found match of length %u at distance %u (offBase=%u)",
(U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
assert(matchEndIdx > matchIndex);
if (matchLength > matchEndIdx - matchIndex)
matchEndIdx = matchIndex + (U32)matchLength;
bestLength = matchLength;
matches[mnum].off = STORE_OFFSET(curr - matchIndex);
matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
matches[mnum].len = (U32)matchLength;
mnum++;
if ( (matchLength > ZSTD_OPT_NUM)
@ -754,12 +778,12 @@ U32 ZSTD_insertBtAndGetAllMatches (
if (matchLength > bestLength) {
matchIndex = dictMatchIndex + dmsIndexDelta;
DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
(U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex));
DEBUGLOG(8, "found dms match of length %u at distance %u (offBase=%u)",
(U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
if (matchLength > matchEndIdx - matchIndex)
matchEndIdx = matchIndex + (U32)matchLength;
bestLength = matchLength;
matches[mnum].off = STORE_OFFSET(curr - matchIndex);
matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
matches[mnum].len = (U32)matchLength;
mnum++;
if ( (matchLength > ZSTD_OPT_NUM)
@ -960,7 +984,7 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
{
U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
/* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */
/* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */
U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
/* Ensure that current block position is not outside of the match */
@ -971,11 +995,11 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
}
if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
U32 const candidateOffCode = STORE_OFFSET(optLdm->offset);
DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u",
candidateOffCode, candidateMatchLength, currPosInBlock);
U32 const candidateOffBase = OFFSET_TO_OFFBASE(optLdm->offset);
DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u",
candidateOffBase, candidateMatchLength, currPosInBlock);
matches[*nbMatches].len = candidateMatchLength;
matches[*nbMatches].off = candidateOffCode;
matches[*nbMatches].off = candidateOffBase;
(*nbMatches)++;
}
}
@ -1062,6 +1086,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
ZSTD_optimal_t lastSequence;
ZSTD_optLdm_t optLdm;
ZSTD_memset(&lastSequence, 0, sizeof(ZSTD_optimal_t));
optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
@ -1098,14 +1124,14 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
/* large match -> immediate encoding */
{ U32 const maxML = matches[nbMatches-1].len;
U32 const maxOffcode = matches[nbMatches-1].off;
DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
nbMatches, maxML, maxOffcode, (U32)(ip-prefixStart));
U32 const maxOffBase = matches[nbMatches-1].off;
DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series",
nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart));
if (maxML > sufficient_len) {
lastSequence.litlen = litlen;
lastSequence.mlen = maxML;
lastSequence.off = maxOffcode;
lastSequence.off = maxOffBase;
DEBUGLOG(6, "large match (%u>%u), immediate encoding",
maxML, sufficient_len);
cur = 0;
@ -1122,15 +1148,15 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */
}
for (matchNb = 0; matchNb < nbMatches; matchNb++) {
U32 const offcode = matches[matchNb].off;
U32 const offBase = matches[matchNb].off;
U32 const end = matches[matchNb].len;
for ( ; pos <= end ; pos++ ) {
U32 const matchPrice = ZSTD_getMatchPrice(offcode, pos, optStatePtr, optLevel);
U32 const matchPrice = ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
U32 const sequencePrice = literalsPrice + matchPrice;
DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
pos, ZSTD_fCost(sequencePrice));
pos, ZSTD_fCost((int)sequencePrice));
opt[pos].mlen = pos;
opt[pos].off = offcode;
opt[pos].off = offBase;
opt[pos].litlen = litlen;
opt[pos].price = (int)sequencePrice;
} }
@ -1230,7 +1256,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
U32 mlen;
DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u",
DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
matchNb, matches[matchNb].off, lastML, litlen);
for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
@ -1296,7 +1322,7 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
for (storePos=storeStart; storePos <= storeEnd; storePos++) {
U32 const llen = opt[storePos].litlen;
U32 const mlen = opt[storePos].mlen;
U32 const offCode = opt[storePos].off;
U32 const offBase = opt[storePos].off;
U32 const advance = llen + mlen;
DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
anchor - istart, (unsigned)llen, (unsigned)mlen);
@ -1308,8 +1334,8 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
}
assert(anchor + llen <= iend);
ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen);
ZSTD_updateStats(optStatePtr, llen, anchor, offBase, mlen);
ZSTD_storeSeq(seqStore, llen, anchor, iend, offBase, mlen);
anchor += advance;
ip = anchor;
} }
@ -1349,7 +1375,7 @@ size_t ZSTD_compressBlock_btopt(
/* ZSTD_initStats_ultra():
* make a first compression pass, just to seed stats with more accurate starting values.
* only works on first block, with no dictionary and no ldm.
* this function cannot error, hence its contract must be respected.
* this function cannot error out, its narrow contract must be respected.
*/
static void
ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
@ -1368,7 +1394,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/
/* invalidate first scan from history */
/* invalidate first scan from history, only keep entropy stats */
ZSTD_resetSeqStore(seqStore);
ms->window.base -= srcSize;
ms->window.dictLimit += (U32)srcSize;
@ -1392,20 +1418,20 @@ size_t ZSTD_compressBlock_btultra2(
U32 const curr = (U32)((const BYTE*)src - ms->window.base);
DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
/* 2-pass strategy:
/* 2-passes strategy:
* this strategy makes a first pass over first block to collect statistics
* and seed next round's statistics with it.
* After 1st pass, function forgets everything, and starts a new block.
* in order to seed next round's statistics with it.
* After 1st pass, function forgets history, and starts a new block.
* Consequently, this can only work if no data has been previously loaded in tables,
* aka, no dictionary, no prefix, no ldm preprocessing.
* The compression ratio gain is generally small (~0.5% on first block),
* the cost is 2x cpu time on first block. */
** the cost is 2x cpu time on first block. */
assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
if ( (ms->opt.litLengthSum==0) /* first block */
&& (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
&& (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */
&& (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
&& (srcSize > ZSTD_PREDEF_THRESHOLD)
&& (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
&& (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */
) {
ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -20,6 +20,7 @@
/* ====== Dependencies ====== */
#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset, INT_MAX, UINT_MAX */
#include "../common/mem.h" /* MEM_STATIC */
#include "../common/pool.h" /* threadpool */
@ -266,11 +267,11 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
* 1 buffer for input loading
* 1 buffer for "next input" when submitting current one
* 1 buffer stuck in queue */
#define BUF_POOL_MAX_NB_BUFFERS(nbWorkers) 2*nbWorkers + 3
#define BUF_POOL_MAX_NB_BUFFERS(nbWorkers) (2*(nbWorkers) + 3)
/* After a worker releases its rawSeqStore, it is immediately ready for reuse.
* So we only need one seq buffer per worker. */
#define SEQ_POOL_MAX_NB_BUFFERS(nbWorkers) nbWorkers
#define SEQ_POOL_MAX_NB_BUFFERS(nbWorkers) (nbWorkers)
/* ===== Seq Pool Wrapper ====== */
@ -719,7 +720,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
ZSTDMT_serialState_update(job->serial, cctx, rawSeqStore, job->src, job->jobID);
if (!job->firstJob) { /* flush and overwrite frame header when it's not first job */
size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0);
size_t const hSize = ZSTD_compressContinue_public(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0);
if (ZSTD_isError(hSize)) JOB_ERROR(hSize);
DEBUGLOG(5, "ZSTDMT_compressionJob: flush and overwrite %u bytes of frame header (not first job)", (U32)hSize);
ZSTD_invalidateRepCodes(cctx);
@ -737,7 +738,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
DEBUGLOG(5, "ZSTDMT_compressionJob: compress %u bytes in %i blocks", (U32)job->src.size, nbChunks);
assert(job->cSize == 0);
for (chunkNb = 1; chunkNb < nbChunks; chunkNb++) {
size_t const cSize = ZSTD_compressContinue(cctx, op, oend-op, ip, chunkSize);
size_t const cSize = ZSTD_compressContinue_public(cctx, op, oend-op, ip, chunkSize);
if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
ip += chunkSize;
op += cSize; assert(op < oend);
@ -757,8 +758,8 @@ static void ZSTDMT_compressionJob(void* jobDescription)
size_t const lastBlockSize1 = job->src.size & (chunkSize-1);
size_t const lastBlockSize = ((lastBlockSize1==0) & (job->src.size>=chunkSize)) ? chunkSize : lastBlockSize1;
size_t const cSize = (job->lastJob) ?
ZSTD_compressEnd (cctx, op, oend-op, ip, lastBlockSize) :
ZSTD_compressContinue(cctx, op, oend-op, ip, lastBlockSize);
ZSTD_compressEnd_public(cctx, op, oend-op, ip, lastBlockSize) :
ZSTD_compressContinue_public(cctx, op, oend-op, ip, lastBlockSize);
if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
lastCBlockSize = cSize;
} }
@ -1734,7 +1735,7 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
}
} else {
/* We have enough bytes buffered to initialize the hash,
* and are have processed enough bytes to find a sync point.
* and have processed enough bytes to find a sync point.
* Start scanning at the beginning of the input.
*/
assert(mtctx->inBuff.filled >= RSYNC_MIN_BLOCK_SIZE);
@ -1761,17 +1762,24 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
* then a block will be emitted anyways, but this is okay, since if we
* are already synchronized we will remain synchronized.
*/
assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
for (; pos < syncPoint.toLoad; ++pos) {
BYTE const toRemove = pos < RSYNC_LENGTH ? prev[pos] : istart[pos - RSYNC_LENGTH];
assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
/* This assert is very expensive, and Debian compiles with asserts enabled.
* So disable it for now. We can get similar coverage by checking it at the
* beginning & end of the loop.
* assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
*/
hash = ZSTD_rollingHash_rotate(hash, toRemove, istart[pos], primePower);
assert(mtctx->inBuff.filled + pos >= RSYNC_MIN_BLOCK_SIZE);
if ((hash & hitMask) == hitMask) {
syncPoint.toLoad = pos + 1;
syncPoint.flush = 1;
++pos; /* for assert */
break;
}
}
assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
return syncPoint;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the