psy0rz · kyle0r · Nov 22, 2023 · May 13, 2024
diff --git a/zfs_autobackup/BlockHasher.py b/zfs_autobackup/BlockHasher.py
@@ -1,6 +1,11 @@
 import hashlib
 import os
 
+xxhash = None
+try:
+    import xxhash
+except:
+    pass
 
 class BlockHasher():
     """This class was created to checksum huge files and blockdevices (TB's)
@@ -16,7 +21,7 @@ class BlockHasher():
 
     """
 
-    def __init__(self, count=10000, bs=4096, hash_class=hashlib.sha1, skip=0):
+    def __init__(self, count=10000, bs=4096, hash_class=None, skip=0):
         self.count = count
         self.bs = bs
         self.chunk_size=bs*count
@@ -28,6 +33,11 @@ def __init__(self, count=10000, bs=4096, hash_class=hashlib.sha1, skip=0):
 
         self.stats_total_bytes=0
 
+    def hash_factory(self):
+        if self.hash_class in hashlib.algorithms_available:
+            return hashlib.new(self.hash_class)
+        if self.hash_class.startswith('xxh'):
+            return getattr(xxhash, self.hash_class)()
 
     def _seek_next_chunk(self, fh, fsize):
         """seek fh to next chunk and update skip counter.
@@ -80,7 +90,7 @@ def generate(self, fname):
                     return
 
                 #read chunk
-                hash = self.hash_class()
+                hash = self.hash_factory()
                 block_nr = 0
                 while block_nr != self.count:
                     block=fh.read(self.bs)
@@ -105,7 +115,7 @@ def compare(self, fname, generator):
                     try:
 
                         checked = checked + 1
-                        hash = self.hash_class()
+                        hash = self.hash_factory()
                         f.seek(int(chunk_nr) * self.bs * self.count)
                         block_nr = 0
                         for block in iter(lambda: f.read(self.bs), b""):
@@ -124,4 +134,4 @@ def compare(self, fname, generator):
                         yield ( chunk_nr , hexdigest, 'ERROR: '+str(e))
 
         except Exception as e:
-            yield ( '-', '-', 'ERROR: '+ str(e))
+            yield ( '-', '-', 'ERROR: '+ str(e))
diff --git a/zfs_autobackup/ZfsCheck.py b/zfs_autobackup/ZfsCheck.py
@@ -10,6 +10,13 @@
 from .util import *
 from .CliBase import CliBase
 
+from hashlib import algorithms_available
+from copy import copy
+xxhash = None
+try:
+    import xxhash
+except:
+    pass
 
 class ZfsCheck(CliBase):
 
@@ -20,7 +27,17 @@ def __init__(self, argv, print_arguments=True):
 
         self.node = ZfsNode(self.log, utc=self.args.utc, readonly=self.args.test, debug_output=self.args.debug_output)
 
-        self.block_hasher = BlockHasher(count=self.args.count, bs=self.args.block_size, skip=self.args.skip)
+        self.block_hasher = BlockHasher(count=self.args.count, bs=self.args.block_size, skip=self.args.skip, hash_class=self.args.hash)
+
+    def determine_algorithms_available(self):
+        self.algorithms_available = copy(algorithms_available)
+
+        if None != xxhash:
+            for value in ( 'xxh128', 'xxh32', 'xxh3_128', 'xxh3_64', 'xxh64' ):
+                self.algorithms_available.add(value)
+            self.hash_default = 'xxh3_64'
+        else:
+            self.hash_default = 'sha1'
 
     def get_parser(self):
 
@@ -42,6 +59,9 @@ def get_parser(self):
         group.add_argument('--skip', '-s', metavar="NUMBER", default=0, type=int,
                            help="Skip this number of chunks after every hash. %(default)s")
 
+        self.determine_algorithms_available()
+        group.add_argument('--hash', default=self.hash_default,
+                           help="Specify the hashing algorithm to use", choices=sorted([item for item in self.algorithms_available]))
         return parser
 
     def parse_args(self, argv):