103 #include <sys/types.h> 126 unsigned short a = 0;
127 unsigned short b = 0;
132 rsum rcksum_calc_rsum_block(
const unsigned char *data,
size_t len) {
133 unsigned short a = 0;
134 unsigned short b = 0;
137 unsigned char c = *data++;
146 #define UPDATE_RSUM(a, b, oldc, newc, bshift) do { (a) += ((unsigned char)(newc)) - ((unsigned char)(oldc)); (b) += (a) - ((oldc) << (bshift)); } while (0) 152 void inline truncateRsum (
unsigned int &rs,
const int rsumlen )
172 MediaBlockList::MediaBlockList(off_t size)
197 memcpy(&
fsum[0], c, cl);
222 if (dig.empty() || dig.size() <
fsum.size())
224 return memcmp(&dig[0], &
fsum[0],
fsum.size()) ?
false :
true;
243 memcpy(&
chksums[csl * blkno], cs, csl);
280 size_t size =
blocks[blkno].size;
290 if (dig.empty() || dig.size() < size_t(
chksumlen))
301 unsigned short s = 0, m = 0;
302 s = (rs >> 16) & 65535;
304 for (; len > 0 ; len--)
306 unsigned short c = (
unsigned char)*bytes++;
310 return (s & 65535) << 16 | (m & 65535);
318 size_t size =
blocks[blkno].size;
323 unsigned short s = 0, m = 0;
324 s = (rs >> 16) & 65535;
327 rs = (s & 65535) << 16 | (m & 65535);
330 return rs ==
rsums[blkno];
336 if (blkno >=
blocks.size() || bufl <
blocks[blkno].size)
345 if (blkno >=
blocks.size() || bufl <
blocks[blkno].size)
378 if (blkno >=
blocks.size() || bufl <
blocks[blkno].size)
385 size_t size =
blocks[blkno].size;
386 size_t len = bufl - start > size ? size : bufl - start;
387 dig.
update((
const char *)buf + start, len);
389 dig.
update((
const char *)buf, size - len);
397 if (blkno >=
blocks.size() || bufl <
blocks[blkno].size)
399 off_t off =
blocks[blkno].off;
400 size_t size =
blocks[blkno].size;
401 if (fseeko(fp, off, SEEK_SET))
405 size_t len = bufl - start > size ? size : bufl - start;
406 if (fwrite(buf + start, len, 1, fp) != 1)
408 if (size > len && fwrite(buf, size - len, 1, fp) != 1)
411 found[
blocks.size()] =
true;
415 fetchnext(
FILE *fp,
unsigned char *bp,
size_t blksize,
size_t pushback,
unsigned char *pushbackp)
423 memmove(bp, pushbackp, pushback);
446 DBG <<
"Delta XFER: Can not reuse blocks because we have no chksumlen" << std::endl;
450 if ( (fp = fopen(filename.c_str(),
"r")) == 0 ) {
451 DBG <<
"Delta XFER: Can not reuse blocks, unable to open file "<< filename << std::endl;
455 size_t nblks =
blocks.size();
456 std::vector<bool> found( nblks + 1 );
459 const auto rsumAMask =
rsumlen < 3 ? 0 :
rsumlen == 3 ? 0xff : 0xffff;
465 auto zsyncRsumsData = std::make_unique<rsum[]>(
rsums.size() +
rsumseq );
466 auto zsyncRsums = zsyncRsumsData.get();
467 for ( std::size_t i = 0; i <
rsums.size(); i++ ) {
468 const auto &rs =
rsums[i];
469 unsigned short s = 0, m = 0;
470 s = (rs >> 16) & 65535;
472 zsyncRsums[i] = rsum{ s, m };
476 const auto & calc_rhash = [&](
const rsum* e ) ->
unsigned {
479 for ( uint i = 1; i <
rsumseq; i++ ) {
483 h ^= ( e[0].a & rsumAMask ) << 3;
488 size_t blksize =
blocks[0].size;
494 uint rsumHashMask = 0;
499 while ((2 << (i - 1)) > nblks && i > 4)
503 rsumHashMask = (2 << i) - 1;
507 auto rsumHashTableData = std::make_unique<std::vector<size_t>[]>( rsumHashMask + 1 );
508 auto rsumHashTable = rsumHashTableData.get();
511 for (
size_t id = 0;
id < nblks;
id++) {
512 const auto hash = calc_rhash( &zsyncRsums[
id] );
513 auto &hashList = rsumHashTable[ hash & rsumHashMask ];
514 hashList.push_back(
id);
518 constexpr
auto BLOCKCNT = 16;
522 const auto readBufSize = blksize *
rsumseq * BLOCKCNT;
525 auto readBufData = std::make_unique<unsigned char[]>( readBufSize );
526 memset(readBufData.get(), 0, blksize);
529 auto readBuf = readBufData.get();
532 auto seqRsumsData = std::make_unique<rsum[]> (
rsumseq );
533 auto seqRsums = seqRsumsData.get();
538 if ((blksize & (blksize - 1)) == 0)
539 for (bshift = 0; size_t(1 << bshift) != blksize; bshift++)
545 std::optional<size_t> nextReqMatchInSequence;
546 off_t dataOffset = 0;
550 const auto &tryWriteMatchingBlocks = [&](
const std::vector<size_t> &list,
const u_char *currBuf, uint reqMatches ){
552 int targetBlocksWritten = 0;
555 nextReqMatchInSequence.reset();
557 for (
const auto blkno : list ) {
562 const auto blockRsum = &zsyncRsums[blkno];
564 uint weakMatches = 0;
568 if ( (seqRsums[0].
a & rsumAMask) != blockRsum[0].
a ||
569 seqRsums[0].
b != blockRsum[0].
b )
574 for ( uint i = 1; i < reqMatches; i++ ) {
575 if ( (seqRsums[i].
a & rsumAMask) != blockRsum[i].
a ||
576 seqRsums[i].
b != blockRsum[i].
b )
581 if ( weakMatches < reqMatches )
585 uint realMatches = 0;
586 for( uint i = 0; i < reqMatches; i++ ) {
587 if ( !
checkChecksum(blkno + i, currBuf + ( i * blksize ), blksize ) ) {
594 if( realMatches < reqMatches )
599 const auto nextPossibleMatch = blkno + realMatches;
600 if ( !found[nextPossibleMatch] )
601 nextReqMatchInSequence = nextPossibleMatch;
603 for( uint i = 0; i < realMatches; i++ ) {
604 writeBlock( blkno + i, wfp, currBuf + ( i * blksize ), blksize, 0, found );
605 targetBlocksWritten++;
608 return targetBlocksWritten;
614 const off_t seqMatchLen = ( blksize *
rsumseq );
616 while (! feof(fp) ) {
619 dataLen = fread( readBuf, 1, readBufSize, fp );
623 const auto remainLen = dataLen-dataOffset;
625 memmove( readBuf, readBuf+dataOffset, remainLen );
627 dataLen = fread( readBuf+remainLen, 1, readBufSize-remainLen, fp );
628 dataLen += remainLen;
634 memset( readBuf + dataLen, 0, readBufSize - dataLen );
635 dataLen = readBufSize;
638 if ( dataLen < seqMatchLen )
642 for( uint i = 0; i <
rsumseq; i++ )
643 seqRsums[i] = rcksum_calc_rsum_block( readBuf + ( i * blksize ), blksize );
648 if ( dataOffset + seqMatchLen > dataLen )
651 u_char *currBuf = readBuf + dataOffset;
655 uint deltaBlocksMatched = 0;
657 if ( nextReqMatchInSequence.has_value() ) {
658 if ( tryWriteMatchingBlocks( { *nextReqMatchInSequence }, currBuf, 1 ) > 0 )
659 deltaBlocksMatched = 1;
662 const auto hash = calc_rhash( seqRsums );
665 auto &blockListForHash = rsumHashTable[ hash & rsumHashMask ];
666 if ( blockListForHash.size() ) {
667 if ( tryWriteMatchingBlocks( blockListForHash, currBuf,
rsumseq ) > 0 )
672 if ( deltaBlocksMatched > 0 ) {
674 dataOffset += ( deltaBlocksMatched * blksize );
676 if ( dataOffset + seqMatchLen > dataLen )
679 if ( deltaBlocksMatched <
rsumseq ) {
683 for( uint i = 0; i <
rsumseq; i++ )
684 seqRsums[i] = rcksum_calc_rsum_block( readBuf + dataOffset + ( i * blksize ), blksize );
690 if ( dataOffset + seqMatchLen > dataLen )
692 for ( uint i = 0; i <
rsumseq; i++ ) {
693 const auto blkOff = ( i*blksize );
694 u_char oldC = (currBuf + blkOff)[0];
695 u_char newC = (currBuf + blkOff)[blksize];
696 UPDATE_RSUM( seqRsums[i].
a, seqRsums[i].
b, oldC, newC, bshift );
707 auto buf = std::make_unique<unsigned char[]>( bufl );
708 for (
size_t blkno = 0; blkno <
blocks.size(); ++blkno)
710 if (off >
blocks[blkno].off)
712 size_t blksize =
blocks[blkno].size;
716 buf = std::make_unique<unsigned char[]>( bufl );
718 size_t skip =
blocks[blkno].off - off;
721 size_t l = skip > bufl ? bufl : skip;
722 if (fread(buf.get(), l, 1, fp) != 1)
727 if (fread(buf.get(), blksize, 1, fp) != 1)
730 writeBlock(blkno, wfp, buf.get(), blksize, 0, found);
735 DBG <<
"Delta XFER: No reusable blocks found for " << filename << std::endl;
739 std::vector<MediaBlock> nblocks;
740 std::vector<unsigned char> nchksums;
741 std::vector<unsigned int> nrsums;
743 size_t originalSize = 0;
745 for (
size_t blkno = 0; blkno <
blocks.size(); ++blkno)
747 const auto &blk =
blocks[blkno];
748 originalSize += blk.size;
752 nblocks.push_back(blk);
756 nchksums.resize(nblocks.size() *
chksumlen);
760 nrsums.push_back(
rsums[blkno]);
763 DBG <<
"Delta XFER: Found blocks to reuse, " <<
blocks.size() <<
" vs " << nblocks.size() <<
", resused blocks: " <<
blocks.size() - nblocks.size() <<
"\n" 764 <<
"Old transfer size: " << originalSize <<
" new size: " << newSize << std::endl;
776 DBG <<
"Delta XFER: Can not reuse blocks because we have no chksumlen" << std::endl;
780 if ( (fp = fopen(filename.c_str(),
"r")) == 0 ) {
781 DBG <<
"Delta XFER: Can not reuse blocks, unable to open file "<< filename << std::endl;
784 size_t nblks =
blocks.size();
785 std::vector<bool> found;
786 found.resize(nblks + 1);
789 size_t blksize =
blocks[0].size;
798 unsigned int hm =
rsums.size() * 2;
799 while (hm & (hm - 1)) {
809 auto rsumHashTable = std::make_unique<unsigned int[]>( hm+1 );
810 memset(rsumHashTable.get(), 0, (hm + 1) *
sizeof(
unsigned int));
813 for (
unsigned int i = 0; i <
rsums.size(); i++)
815 if (
blocks[i].size != blksize && (i != nblks - 1 ||
rsumpad != blksize))
817 unsigned int r =
rsums[i];
818 unsigned int h = r & hm;
820 while (rsumHashTable[h])
822 rsumHashTable[h] = i + 1;
831 auto ringBuf = std::make_unique<unsigned char[]>( blksize );
834 auto buf2 = std::make_unique<unsigned char[]>( blksize );
840 unsigned char *pushbackp = 0;
845 if ((blksize & (blksize - 1)) == 0)
846 for (bshift = 0; size_t(1 << bshift) != blksize; bshift++)
852 unsigned short a = 0,
b = 0;
854 memset(ringBuf.get(), 0, blksize);
863 for (
size_t i = 0; i < blksize; i++)
902 b +=
a - ( oc << bshift );
910 if (
size_t(i) != blksize - 1)
915 unsigned int r = ((
unsigned int)
a & 65535) << 16 | ((
unsigned int)
b & 65535);
918 unsigned int h = r & hm;
922 for (; rsumHashTable[h]; h = (h + hh++) & hm)
924 size_t blkno = rsumHashTable[h] - 1;
927 if (
rsums[blkno] != r)
936 if (eof || blkno + 1 >= nblks)
938 pushback =
fetchnext(fp, buf2.get(), blksize, pushback, pushbackp);
939 pushbackp = buf2.get();
943 if (!
checkRsum(blkno + 1, buf2.get(), blksize))
956 writeBlock(blkno, wfp, ringBuf.get(), blksize, i + 1, found);
959 writeBlock(blkno + 1, wfp, buf2.get(), blksize, 0, found);
968 pushback =
fetchnext(fp, buf2.get(), blksize, pushback, pushbackp);
969 pushbackp = buf2.get();
973 if (!
checkRsum(blkno, buf2.get(), blksize))
979 writeBlock(blkno, wfp, buf2.get(), blksize, 0, found);
986 memset(ringBuf.get(), 0, blksize);
999 auto buf = std::make_unique<unsigned char[]>( bufl );
1000 for (
size_t blkno = 0; blkno <
blocks.size(); ++blkno)
1002 if (off >
blocks[blkno].off)
1004 size_t blksize =
blocks[blkno].size;
1008 buf = std::make_unique<unsigned char[]>( bufl );
1010 size_t skip =
blocks[blkno].off - off;
1013 size_t l = skip > bufl ? bufl : skip;
1014 if (fread(buf.get(), l, 1, fp) != 1)
1019 if (fread(buf.get(), blksize, 1, fp) != 1)
1022 writeBlock(blkno, wfp, buf.get(), blksize, 0, found);
1026 if (!found[nblks]) {
1027 DBG <<
"Delta XFER: No reusable blocks found for " << filename << std::endl;
1031 std::vector<MediaBlock> nblocks;
1032 std::vector<unsigned char> nchksums;
1033 std::vector<unsigned int> nrsums;
1035 size_t originalSize = 0;
1037 for (
size_t blkno = 0; blkno <
blocks.size(); ++blkno)
1039 const auto &blk =
blocks[blkno];
1040 originalSize += blk.size;
1044 nblocks.push_back(blk);
1045 newSize += blk.size;
1048 nchksums.resize(nblocks.size() *
chksumlen);
1052 nrsums.push_back(
rsums[blkno]);
1055 DBG <<
"Delta XFER: Found blocks to reuse, " <<
blocks.size() <<
" vs " << nblocks.size() <<
", resused blocks: " <<
blocks.size() - nblocks.size() <<
"\n" 1056 <<
"Old transfer size: " << originalSize <<
" new size: " << newSize << std::endl;
1066 size_t i = 0, j = 0;
1074 s =
"[ BlockList, filesize unknown\n";
1076 s +=
" No block information\n";
1081 for (i = 0; i <
blocks.size(); ++i)
1083 long long off=
blocks[i].off;
1084 long long size=
blocks[i].size;
UByteArray digestVector()
get vector of unsigned char representation of the digest
Compute Message Digests (MD5, SHA1 etc)
std::string form(const char *format,...) __attribute__((format(printf
Printf style construction of std::string.
bool create(const std::string &name)
initialize creation of a new message digest
constexpr std::string_view FILE("file")
AutoDispose<FILE*> calling ::fclose
Easy-to use interface to the ZYPP dependency resolver.
bool update(const char *bytes, size_t len)
feed data into digest computation algorithm