Poor performance on big file

We might cause weak hash collisions by switching over, but that should be fine.

Yeah, exactly. Dgryski has another ten hash functions or so over here, but I haven’t checked if any are rolling and fast: https://github.com/dgryski?utf8=✓&tab=repositories&q=hash&type=&language=

diff --git a/lib/weakhash/benchmark_test.go b/lib/weakhash/benchmark_test.go
index 79f5ce9..1ab3cb6 100644
--- a/lib/weakhash/benchmark_test.go
+++ b/lib/weakhash/benchmark_test.go
@@ -9,9 +9,14 @@ package weakhash
 import (
        "os"
        "testing"
+
+       "github.com/chmduquesne/rollinghash/adler32"
+       "github.com/chmduquesne/rollinghash/buzhash"
+       "github.com/chmduquesne/rollinghash/rabinkarp32"
 )

 const testFile = "../model/testdata/~syncthing~file.tmp"
+const size = 128 << 10

 func BenchmarkFind1MFile(b *testing.B) {
        b.ReportAllocs()
@@ -28,3 +33,163 @@ func BenchmarkFind1MFile(b *testing.B) {
                fd.Close()
        }
 }
+
+func BenchmarkFind1MFileRabinKarp(b *testing.B) {
+       b.ReportAllocs()
+       b.SetBytes(1 << 20)
+       for i := 0; i < b.N; i++ {
+               fd, err := os.Open(testFile)
+               if err != nil {
+                       b.Fatal(err)
+               }
+               _, err = FindCustom(fd, []uint32{0, 1, 2}, 128<<10, rabinkarp32.New)
+               if err != nil {
+                       b.Fatal(err)
+               }
+               fd.Close()
+       }
+}
+
+func BenchmarkFind1MFileAdler32(b *testing.B) {
+       b.ReportAllocs()
+       b.SetBytes(1 << 20)
+       for i := 0; i < b.N; i++ {
+               fd, err := os.Open(testFile)
+               if err != nil {
+                       b.Fatal(err)
+               }
+               _, err = FindCustom(fd, []uint32{0, 1, 2}, 128<<10, adler32.New)
+               if err != nil {
+                       b.Fatal(err)
+               }
+               fd.Close()
+       }
+}
+
+func BenchmarkFind1MFileBuzHash(b *testing.B) {
+       b.ReportAllocs()
+       b.SetBytes(1 << 20)
+       for i := 0; i < b.N; i++ {
+               fd, err := os.Open(testFile)
+               if err != nil {
+                       b.Fatal(err)
+               }
+               _, err = FindCustom(fd, []uint32{0, 1, 2}, 128<<10, buzhash.New)
+               if err != nil {
+                       b.Fatal(err)
+               }
+               fd.Close()
+       }
+}
+
+func BenchmarkWeakHashOurs(b *testing.B) {
+       data := make([]byte, size)
+       hf := NewHash(size)
+
+       for i := 0; i < b.N; i++ {
+               hf.Write(data)
+       }
+
+       _ = hf.Sum32()
+       b.SetBytes(size)
+}
+
+func BenchmarkWeakHashRabinKarp(b *testing.B) {
+       data := make([]byte, size)
+       hf := rabinkarp32.New()
+
+       for i := 0; i < b.N; i++ {
+               hf.Write(data)
+       }
+
+       _ = hf.Sum32()
+       b.SetBytes(size)
+}
+
+func BenchmarkWeakHashAdler32(b *testing.B) {
+       data := make([]byte, size)
+       hf := adler32.New()
+
+       for i := 0; i < b.N; i++ {
+               hf.Write(data)
+       }
+
+       _ = hf.Sum32()
+       b.SetBytes(size)
+}
+
+func BenchmarkWeakHashBuzHash(b *testing.B) {
+       data := make([]byte, size)
+       hf := buzhash.New()
+
+       for i := 0; i < b.N; i++ {
+               hf.Write(data)
+       }
+
+       _ = hf.Sum32()
+       b.SetBytes(size)
+}
+
+func BenchmarkWeakHashOursRoll(b *testing.B) {
+       data := make([]byte, size)
+       hf := NewHash(size)
+       hf.Write(data)
+
+       b.ResetTimer()
+
+       for i := 0; i < b.N; i++ {
+               for i := 0; i <= size; i++ {
+                       hf.Write([]byte{'a'})
+               }
+       }
+
+       b.SetBytes(size)
+}
+
+func BenchmarkWeakHashRabinKarpRoll(b *testing.B) {
+       data := make([]byte, size)
+       hf := rabinkarp32.New()
+       hf.Write(data)
+
+       b.ResetTimer()
+
+       for i := 0; i < b.N; i++ {
+               for i := 0; i <= size; i++ {
+                       hf.Roll('a')
+               }
+       }
+
+       b.SetBytes(size)
+}
+
+func BenchmarkWeakHashAdler32Roll(b *testing.B) {
+       data := make([]byte, size)
+       hf := adler32.New()
+       hf.Write(data)
+
+       b.ResetTimer()
+
+       for i := 0; i < b.N; i++ {
+               for i := 0; i <= size; i++ {
+                       hf.Roll('a')
+               }
+       }
+
+       b.SetBytes(size)
+}
+
+func BenchmarkWeakHashBuzHashRoll(b *testing.B) {
+       data := make([]byte, size)
+       hf := buzhash.New()
+       hf.Write(data)
+
+       b.ResetTimer()
+
+       for i := 0; i < b.N; i++ {
+               for i := 0; i <= size; i++ {
+                       hf.Roll('a')
+               }
+       }
+
+       b.SetBytes(size)
+}
diff --git a/lib/weakhash/weakhash.go b/lib/weakhash/weakhash.go
index 5c210a5..9411b54 100644
--- a/lib/weakhash/weakhash.go
+++ b/lib/weakhash/weakhash.go
@@ -11,6 +11,8 @@ import (
        "hash"
        "io"
        "os"
+
+       "github.com/chmduquesne/rollinghash"
 )

 const (
@@ -71,6 +73,53 @@ func Find(ir io.Reader, hashesToFind []uint32, size int) (map[uint32][]int64, er
        return offsets, nil
 }

+// Find finds all the blocks of the given size within io.Reader that matches
+// the hashes provided, and returns a hash -> slice of offsets within reader
+// map, that produces the same weak hash.
+func FindCustom(ir io.Reader, hashesToFind []uint32, size int, hashMaker func() rollinghash.Hash32) (map[uint32][]int64, error) {
+       if ir == nil {
+               return nil, nil
+       }
+
+       r := bufio.NewReader(ir)
+       hf := hashMaker()
+
+       n, err := io.CopyN(hf, r, int64(size))
+       if err == io.EOF {
+               return nil, nil
+       }
+       if err != nil {
+               return nil, err
+       }
+       if n != int64(size) {
+               return nil, io.ErrShortBuffer
+       }
+
+       offsets := make(map[uint32][]int64)
+       for _, hashToFind := range hashesToFind {
+               offsets[hashToFind] = nil
+       }
+
+       var i int64
+       var hash uint32
+       for {
+               hash = hf.Sum32()
+               if existing, ok := offsets[hash]; ok {
+                       offsets[hash] = append(existing, i)
+               }
+               i++
+
+               bt, err := r.ReadByte()
+               if err == io.EOF {
+                       break
+               } else if err != nil {
+                       return offsets, err
+               }
+               hf.Roll(bt)
+       }
+       return offsets, nil
+}
+
BenchmarkFind1MFile-4              	      20	  59880225 ns/op	  17.51 MB/s	 6952674 B/op	  917549 allocs/op
BenchmarkFind1MFileRabinKarp-4     	      50	  28291430 ns/op	  37.06 MB/s	  168810 B/op	      18 allocs/op
BenchmarkFind1MFileAdler32-4       	      50	  33253970 ns/op	  31.53 MB/s	  168816 B/op	      21 allocs/op
BenchmarkFind1MFileBuzHash-4       	      30	  43864000 ns/op	  23.91 MB/s	14800502 B/op	      52 allocs/op
BenchmarkWeakHashOurs-4            	    1000	   2108324 ns/op	  62.17 MB/s
BenchmarkWeakHashRabinKarp-4       	    3000	    468214 ns/op	 279.94 MB/s
BenchmarkWeakHashAdler32-4         	   20000	     89862 ns/op	1458.58 MB/s
BenchmarkWeakHashBuzHash-4         	    5000	    368838 ns/op	 355.36 MB/s
BenchmarkWeakHashOursRoll-4        	     300	   4396461 ns/op	  29.81 MB/s
BenchmarkWeakHashRabinKarpRoll-4   	    2000	   1029349 ns/op	 127.33 MB/s
BenchmarkWeakHashAdler32Roll-4     	    1000	   1489260 ns/op	  88.01 MB/s
BenchmarkWeakHashBuzHashRoll-4     	    1000	   1154916 ns/op	 113.49 MB/s
PASS
ok  	github.com/syncthing/syncthing/lib/weakhash	21.007s

But the actual roll is quite shit overall

Yeah. Anything we need to do byte by byte for an entire file is going to suck. But that part of it can at least be switched off by configuration, as opposed to what we do per block in the scanner.

If we wanted to go the complicated route we could add some of the heuristics we talked about when introducing the weakhash - only do the rolling if we see a lot of blocks that differ after a certain point in the file. That would avoid us weakhashing a 75 gig file at 88 MB/s just to see if we can find the one 128 KiB block we are looking for instead of transferring it over the network in like a millisecond…

I think we could optimize it a fair bit by hooking into the digest directly as well, for the rolling case. We’re doing the same amount of work really, it’s just that we have a bunch of function calls for every byte hashed. If we instead passed the entire block to Write (or a function like it) and let it return 128k weak hashes in one pass it would probably be about the same speed.

I actually also encountered doubled scanning progress a few days ago when I changed .stignore mid-scan. I assumed (apparently incorrectly) that this was expected behaviour, how ever weird (which is par for the course with .stignore).

Hi! Hopefully, this helps; where I work, a few years ago, we needed to hash a LOT of data, and we spent some time into investigating which algo would work best for us (I wasn’t involved in this myself, so I don’t know exactly if our needs are similair to Syncthing’s needs.)

BUT: We ended up using Murmurhash; I just checked, there is a Go implementation of it. I googled around a bit to find some benchmarks which compare adler32 to murmurhash, and found one (which was comparing Java implementations, but still):

http://jpountz.github.io/lz4-java/1.3.0/xxhash-benchmark/

Which indicated that murmur(2) is faster (at least in java) than adler32.

BUT: it also pointed to xxHash being even faster.

It also has a go implementation, and it’s webpage is here:

http://cyan4973.github.io/xxHash/

Hope this helps!

3 Likes

With 0.14.19 we are back in business :slight_smile:

25,8 GiB vdi file, Core i5-4590S (“sha256Perf”: 361.6), VM and ST DB on System SSD (960 EVO NVMe). Single thread hash performance is 385 MB/s using crypto/sha256 (379 MB/s using minio/sha256-simd).

Scan time: ~1:40 Scan Speed: 271 MB/s

Still ~100 MB/s less then the test on startup, but a huge improvement over the 47 MB/s :wink:

2 Likes

There is some overhead that brings down the scan speed a bit from the theoretical.

One we can calculate, although it’s not displayed in the startup or GUI: the weak hash checksum. Each block is hashed by SHA256 (385 MB/s above) and then the weak hash (~1200 MB/s on my computer). The resulting theoretical hash speed is 1/(1/385 + 1/1200) = 291 MB/s. We could potentially do these two in parallel, in the future…

Then there is some memory allocation, shuffling, waiting for disk/SSD etc that I think accounts for the rest. :slight_smile:

I got arround 15:40 for 75 GB, which is arround 80 MB/s. More or less doubled the speed for me. Note the printed hashing speed is still what it was in 0.14.18-1:

INFO: Single thread hash performance is 156 MB/s using minio/sha256-simd (117 MB/s using crypto/sha256).

As already mentioned by @wweich, that’s a pretty good improvement. Still, 80MB/s is about 10% less then half of the speed my disk is able to deliver. Is there a way to let at least 2 threads work on a single file, maybe by letting them run from the top to bottom and vice versa, meeting in the middle?

That explains my 271 MB/s.

I doubt that Syncthing has to wait for my SSD, as it has 2000 MB/s read speed :slight_smile: and very low access time (NVMe SSD).

Is there a way to disable the weakhash? I set weakHashThresholdPct to 101 but is is still used on scan.

Raise an issue, we should add a way to do that.

1 Like
1 Like

There’s currently no way to disable the weak hashing on the hashing side. The control that is there controls the use of it on the syncing side.

Indeed, and I have that same situation. But I think what we may end up waiting for is the IO system calls and so on, which in Go can mean a bit more context switching and stuff than usual. One of those per 128 KiB block, during which time no hashing happens, is probably the majority of the remaining loss.

I had a couple of branches a long time ago that did this concurrently - one routine to read data and put it in a queue, another one or several that hashed data from the queue. This was not a win at the time due to the overhead of managing that and the locking and communication it needs - the current hashing loop is quite well optimized to run fast and never allocate etc for a single file.

But it does mean we’re limited to a single core for a single file, while running two hashing algorithms. It might be worth it to try again and parallelize it - but I think it’s quite far down on the current list of things that need to happen.

1 Like

The weak hash seems to be somewhat broken. After the first test yesterday with version 0.14.19 both devices reported 0% sync and 0B/s speed, even after 4 hours (for 36GB in 2 files, Gbit LAN). Then I set the weakHashThresholdPct to 101 on both devices to disable weak hash for the puller. After that, they did sync and got to “in sync” after a while.

In my munin stats overview from yesterday (for the receiving device), I saw constant 100% CPU usage in the time between the first test and the weakHashThresholdPct setting change, while the disk read throughput was only at 8 - 9 MB/s for the first hour and then 0 until the setting change. After that setting change there was only ~25% CPU load for about an hour. That’s probably the time Syncthing took to sync the VM images. (Also the RAM usage was way higher during that 4 hours)

It would be interesting if you could get the stacktraces as it’s happening or the CPU profile.

@wucke13: Bugs aside, you could set the VM sync folder to a very high scan interval, and use a script in your VM manager that calls the Syncthing API to ask for a rescan once the VM closes. This should help prevent scanning while the VM is being used and make sure a scan is started as soon as it’s necessary.

1 Like

@ProactiveServices That is indeed a solid suggestion, thank you. I will try to modify my KVM script so that it does not modify last changed attribute on the file but to do a touch on shutdown, that will probably cause the same since a scan is only done when the las changed date changes.

I was in the same situation except that my file was only 250mb.

I set the weak hash setting to 101 and it synced fine then. Something is broken there for sure.

I had been battling with this cancerous sync file for over a week, finally it is resolved and I can sleep well.

This could be https://github.com/syncthing/syncthing/pull/3925

That may also explain some CPU usage (gc and map shuffling) but is probably not the whole truth.

This topic was automatically closed 30 days after the last reply. New replies are no longer allowed.