Skip to content
This repository was archived by the owner on Apr 29, 2020. It is now read-only.

Commit d4021db

Browse files
hugomrdiasachingbrain
authored andcommitted
feat: use a rabin chunker in wasm (#31)
* feat: use a rabin chunker in wasm * chore: fix package.json
1 parent f024451 commit d4021db

File tree

2 files changed

+7
-193
lines changed

2 files changed

+7
-193
lines changed

package.json

+3-6
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@
55
"leadMaintainer": "Alex Potsides <[email protected]>",
66
"main": "src/index.js",
77
"browser": {
8-
"fs": false,
9-
"rabin": false
8+
"fs": false
109
},
1110
"scripts": {
1211
"test": "aegir test",
@@ -65,10 +64,8 @@
6564
"long": "^4.0.0",
6665
"multicodec": "~0.5.1",
6766
"multihashing-async": "~0.7.0",
68-
"superstruct": "~0.6.1"
69-
},
70-
"optionalDependencies": {
71-
"rabin": "^1.6.0"
67+
"superstruct": "~0.6.1",
68+
"rabin-wasm": "~0.0.4"
7269
},
7370
"contributors": [
7471
"Alan Shaw <[email protected]>",

src/chunker/rabin.js

+4-187
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,10 @@
11
'use strict'
22

3-
const errCode = require('err-code')
4-
const Long = require('long')
53
const BufferList = require('bl')
6-
let rabin
4+
const { create } = require('rabin-wasm')
75

86
module.exports = async function * rabinChunker (source, options) {
9-
if (!rabin) {
10-
try {
11-
rabin = nativeRabin()
12-
} catch (_) {
13-
// fallback to js implementation
14-
rabin = jsRabin()
15-
}
16-
}
7+
const rabin = jsRabin()
178

189
let min, max, avg
1910

@@ -40,191 +31,17 @@ module.exports = async function * rabinChunker (source, options) {
4031
}
4132
}
4233

43-
const nativeRabin = () => {
44-
const createRabin = require('rabin')
45-
46-
if (typeof rabin !== 'function') {
47-
throw errCode(new Error(`rabin was not a function`), 'ERR_UNSUPPORTED')
48-
}
49-
50-
return async function * (source, options) {
51-
const rabin = createRabin(options)
52-
53-
// TODO: rewrite rabin using node streams v3
54-
for await (const chunk of source) {
55-
rabin.buffers.append(chunk)
56-
rabin.pending.push(chunk)
57-
58-
const sizes = []
59-
60-
rabin.rabin.fingerprint(rabin.pending, sizes)
61-
rabin.pending = []
62-
63-
for (let i = 0; i < sizes.length; i++) {
64-
const size = sizes[i]
65-
const buf = rabin.buffers.slice(0, size)
66-
rabin.buffers.consume(size)
67-
68-
yield buf
69-
}
70-
}
71-
72-
if (rabin.buffers.length) {
73-
yield rabin.buffers.slice(0)
74-
}
75-
}
76-
}
77-
7834
const jsRabin = () => {
79-
// see https://github.com./datproject/rabin/blob/c0378395dc0a125ab21ac176ec504f9995b34e62/src/rabin.cc
80-
class Rabin {
81-
constructor (options) {
82-
this.window = new Array(options.window || 64).fill(Long.fromInt(0))
83-
this.wpos = 0
84-
this.count = 0
85-
this.digest = Long.fromInt(0)
86-
this.chunkLength = 0
87-
this.polynomial = options.polynomial
88-
this.polynomialDegree = 53
89-
this.polynomialShift = this.polynomialDegree - 8
90-
this.averageBits = options.bits || 12
91-
this.minSize = options.min || 8 * 1024
92-
this.maxSize = options.max || 32 * 1024
93-
this.mask = Long.fromInt(1).shiftLeft(this.averageBits).subtract(1)
94-
this.modTable = []
95-
this.outTable = []
96-
97-
this.calculateTables()
98-
}
99-
100-
calculateTables () {
101-
for (let i = 0; i < 256; i++) {
102-
let hash = Long.fromInt(0, true)
103-
104-
hash = this.appendByte(hash, i)
105-
106-
for (let j = 0; j < this.window.length - 1; j++) {
107-
hash = this.appendByte(hash, 0)
108-
}
109-
110-
this.outTable[i] = hash
111-
}
112-
113-
const k = this.deg(this.polynomial)
114-
115-
for (let i = 0; i < 256; i++) {
116-
const b = Long.fromInt(i, true)
117-
118-
this.modTable[i] = b.shiftLeft(k)
119-
.modulo(this.polynomial)
120-
.or(b.shiftLeft(k))
121-
}
122-
}
123-
124-
deg (p) {
125-
let mask = Long.fromString('0x8000000000000000', true, 16)
126-
127-
for (let i = 0; i < 64; i++) {
128-
if (mask.and(p).greaterThan(0)) {
129-
return Long.fromInt(63 - i)
130-
}
131-
132-
mask = mask.shiftRight(1)
133-
}
134-
135-
return Long.fromInt(-1)
136-
}
137-
138-
appendByte (hash, b) {
139-
hash = hash.shiftLeft(8)
140-
hash = hash.or(b)
141-
142-
return hash.modulo(this.polynomial)
143-
}
144-
145-
getFingerprints (bufs) {
146-
const lengths = []
147-
148-
for (let i = 0; i < bufs.length; i++) {
149-
let buf = bufs[i]
150-
151-
while (true) {
152-
const remaining = this.nextChunk(buf)
153-
154-
if (remaining < 0) {
155-
break
156-
}
157-
158-
buf = buf.slice(remaining)
159-
160-
lengths.push(this.chunkLength)
161-
}
162-
}
163-
164-
return lengths
165-
}
166-
167-
nextChunk (buf) {
168-
for (let i = 0; i < buf.length; i++) {
169-
const val = Long.fromInt(buf[i])
170-
171-
this.slide(val)
172-
173-
this.count++
174-
175-
if ((this.count >= this.minSize && this.digest.and(this.mask).equals(0)) || this.count >= this.maxSize) {
176-
this.chunkLength = this.count
177-
178-
this.reset()
179-
180-
return i + 1
181-
}
182-
}
183-
184-
return -1
185-
}
186-
187-
slide (value) {
188-
const out = this.window[this.wpos].toInt() & 255
189-
this.window[this.wpos] = value
190-
this.digest = this.digest.xor(this.outTable[out])
191-
this.wpos = (this.wpos + 1) % this.window.length
192-
193-
this.append(value)
194-
}
195-
196-
reset () {
197-
this.window = this.window.map(() => Long.fromInt(0))
198-
this.wpos = 0
199-
this.count = 0
200-
this.digest = Long.fromInt(0)
201-
202-
this.slide(Long.fromInt(1))
203-
}
204-
205-
append (value) {
206-
const index = this.digest.shiftRight(this.polynomialShift).toInt() & 255
207-
this.digest = this.digest.shiftLeft(8)
208-
this.digest = this.digest.or(value)
209-
210-
const entry = this.modTable[index]
211-
212-
if (entry) {
213-
this.digest = this.digest.xor(entry)
214-
}
215-
}
216-
}
217-
21835
return async function * (source, options) {
219-
const r = new Rabin(options)
36+
const r = await create(options.bits, options.min, options.max, options.window)
22037
const buffers = new BufferList()
22138
let pending = []
22239

22340
for await (const chunk of source) {
22441
buffers.append(chunk)
22542
pending.push(chunk)
22643

227-
const sizes = r.getFingerprints(pending)
44+
const sizes = r.fingerprint(Buffer.concat(pending))
22845
pending = []
22946

23047
for (let i = 0; i < sizes.length; i++) {

0 commit comments

Comments
 (0)