Skip to content
This repository was archived by the owner on Apr 29, 2020. It is now read-only.

Commit 542b3e4

Browse files
authored
feat: adds js implementation of rabin chunker for windows and browser (#30)
1 parent c849359 commit 542b3e4

File tree

6 files changed

+232
-79
lines changed

6 files changed

+232
-79
lines changed

package.json

+1
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
"hamt-sharding": "~0.0.2",
6363
"ipfs-unixfs": "~0.1.16",
6464
"ipld-dag-pb": "~0.17.2",
65+
"long": "^4.0.0",
6566
"multicodec": "~0.5.1",
6667
"multihashing-async": "~0.7.0",
6768
"superstruct": "~0.6.1"

src/chunker/rabin.js

+208-24
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,17 @@
11
'use strict'
22

33
const errCode = require('err-code')
4-
5-
let createRabin
4+
const Long = require('long')
5+
const BufferList = require('bl')
6+
let rabin
67

78
module.exports = async function * rabinChunker (source, options) {
8-
if (!createRabin) {
9+
if (!rabin) {
910
try {
10-
createRabin = require('rabin')
11-
12-
if (typeof createRabin !== 'function') {
13-
throw errCode(new Error(`createRabin was not a function`), 'ERR_UNSUPPORTED')
14-
}
15-
} catch (err) {
16-
throw errCode(new Error(`Rabin chunker not available, it may have failed to install or not be supported on this platform`), 'ERR_UNSUPPORTED')
11+
rabin = nativeRabin()
12+
} catch (_) {
13+
// fallback to js implementation
14+
rabin = jsRabin()
1715
}
1816
}
1917

@@ -30,30 +28,216 @@ module.exports = async function * rabinChunker (source, options) {
3028
}
3129

3230
const sizepow = Math.floor(Math.log2(avg))
33-
const rabin = createRabin({
31+
32+
for await (const chunk of rabin(source, {
3433
min: min,
3534
max: max,
3635
bits: sizepow,
3736
window: options.window,
3837
polynomial: options.polynomial
39-
})
38+
})) {
39+
yield chunk
40+
}
41+
}
42+
43+
const nativeRabin = () => {
44+
const createRabin = require('rabin')
45+
46+
if (typeof rabin !== 'function') {
47+
throw errCode(new Error(`rabin was not a function`), 'ERR_UNSUPPORTED')
48+
}
49+
50+
return async function * (source, options) {
51+
const rabin = createRabin(options)
52+
53+
// TODO: rewrite rabin using node streams v3
54+
for await (const chunk of source) {
55+
rabin.buffers.append(chunk)
56+
rabin.pending.push(chunk)
57+
58+
const sizes = []
59+
60+
rabin.rabin.fingerprint(rabin.pending, sizes)
61+
rabin.pending = []
62+
63+
for (let i = 0; i < sizes.length; i++) {
64+
const size = sizes[i]
65+
const buf = rabin.buffers.slice(0, size)
66+
rabin.buffers.consume(size)
67+
68+
yield buf
69+
}
70+
}
71+
72+
if (rabin.buffers.length) {
73+
yield rabin.buffers.slice(0)
74+
}
75+
}
76+
}
77+
78+
const jsRabin = () => {
79+
// see https://github.com./datproject/rabin/blob/c0378395dc0a125ab21ac176ec504f9995b34e62/src/rabin.cc
80+
class Rabin {
81+
constructor (options) {
82+
this.window = new Array(options.window || 64).fill(Long.fromInt(0))
83+
this.wpos = 0
84+
this.count = 0
85+
this.digest = Long.fromInt(0)
86+
this.chunkLength = 0
87+
this.polynomial = options.polynomial
88+
this.polynomialDegree = 53
89+
this.polynomialShift = this.polynomialDegree - 8
90+
this.averageBits = options.bits || 12
91+
this.minSize = options.min || 8 * 1024
92+
this.maxSize = options.max || 32 * 1024
93+
this.mask = Long.fromInt(1).shiftLeft(this.averageBits).subtract(1)
94+
this.modTable = []
95+
this.outTable = []
96+
97+
this.calculateTables()
98+
}
99+
100+
calculateTables () {
101+
for (let i = 0; i < 256; i++) {
102+
let hash = Long.fromInt(0, true)
103+
104+
hash = this.appendByte(hash, i)
105+
106+
for (let j = 0; j < this.window.length - 1; j++) {
107+
hash = this.appendByte(hash, 0)
108+
}
109+
110+
this.outTable[i] = hash
111+
}
112+
113+
const k = this.deg(this.polynomial)
114+
115+
for (let i = 0; i < 256; i++) {
116+
const b = Long.fromInt(i, true)
117+
118+
this.modTable[i] = b.shiftLeft(k)
119+
.modulo(this.polynomial)
120+
.or(b.shiftLeft(k))
121+
}
122+
}
123+
124+
deg (p) {
125+
let mask = Long.fromString('0x8000000000000000', true, 16)
126+
127+
for (let i = 0; i < 64; i++) {
128+
if (mask.and(p).greaterThan(0)) {
129+
return Long.fromInt(63 - i)
130+
}
131+
132+
mask = mask.shiftRight(1)
133+
}
40134

41-
// TODO: rewrite rabin using node streams v3
42-
for await (const chunk of source) {
43-
rabin.buffers.append(chunk)
44-
rabin.pending.push(chunk)
135+
return Long.fromInt(-1)
136+
}
137+
138+
appendByte (hash, b) {
139+
hash = hash.shiftLeft(8)
140+
hash = hash.or(b)
141+
142+
return hash.modulo(this.polynomial)
143+
}
144+
145+
getFingerprints (bufs) {
146+
const lengths = []
147+
148+
for (let i = 0; i < bufs.length; i++) {
149+
let buf = bufs[i]
150+
151+
while (true) {
152+
const remaining = this.nextChunk(buf)
153+
154+
if (remaining < 0) {
155+
break
156+
}
157+
158+
buf = buf.slice(remaining)
159+
160+
lengths.push(this.chunkLength)
161+
}
162+
}
163+
164+
return lengths
165+
}
166+
167+
nextChunk (buf) {
168+
for (let i = 0; i < buf.length; i++) {
169+
const val = Long.fromInt(buf[i])
170+
171+
this.slide(val)
172+
173+
this.count++
174+
175+
if ((this.count >= this.minSize && this.digest.and(this.mask).equals(0)) || this.count >= this.maxSize) {
176+
this.chunkLength = this.count
177+
178+
this.reset()
179+
180+
return i + 1
181+
}
182+
}
183+
184+
return -1
185+
}
186+
187+
slide (value) {
188+
const out = this.window[this.wpos].toInt() & 255
189+
this.window[this.wpos] = value
190+
this.digest = this.digest.xor(this.outTable[out])
191+
this.wpos = (this.wpos + 1) % this.window.length
192+
193+
this.append(value)
194+
}
195+
196+
reset () {
197+
this.window = this.window.map(() => Long.fromInt(0))
198+
this.wpos = 0
199+
this.count = 0
200+
this.digest = Long.fromInt(0)
45201

46-
const sizes = []
202+
this.slide(Long.fromInt(1))
203+
}
47204

48-
rabin.rabin.fingerprint(rabin.pending, sizes)
49-
rabin.pending = []
205+
append (value) {
206+
const index = this.digest.shiftRight(this.polynomialShift).toInt() & 255
207+
this.digest = this.digest.shiftLeft(8)
208+
this.digest = this.digest.or(value)
50209

51-
for (let i = 0; i < sizes.length; i++) {
52-
const size = sizes[i]
53-
const buf = rabin.buffers.slice(0, size)
54-
rabin.buffers.consume(size)
210+
const entry = this.modTable[index]
211+
212+
if (entry) {
213+
this.digest = this.digest.xor(entry)
214+
}
215+
}
216+
}
217+
218+
return async function * (source, options) {
219+
const r = new Rabin(options)
220+
const buffers = new BufferList()
221+
let pending = []
222+
223+
for await (const chunk of source) {
224+
buffers.append(chunk)
225+
pending.push(chunk)
226+
227+
const sizes = r.getFingerprints(pending)
228+
pending = []
229+
230+
for (let i = 0; i < sizes.length; i++) {
231+
var size = sizes[i]
232+
var buf = buffers.slice(0, size)
233+
buffers.consume(size)
234+
235+
yield buf
236+
}
237+
}
55238

56-
yield buf
239+
if (buffers.length) {
240+
yield buffers.slice(0)
57241
}
58242
}
59243
}

src/index.js

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,12 @@ const ChunkerOptions = struct({
1818
maxChunkSize: 'number?',
1919
avgChunkSize: 'number?',
2020
window: 'number?',
21-
polynomial: 'string?'
21+
polynomial: 'number?'
2222
}, {
2323
maxChunkSize: 262144,
2424
avgChunkSize: 262144,
2525
window: 16,
26-
polynomial: '0x3DF305DFB2A805'
26+
polynomial: 17437180132763653 // https://github.com./ipfs/go-ipfs-chunker/blob/d0125832512163708c0804a3cda060e21acddae4/rabin.go#L11
2727
})
2828

2929
const BuilderOptions = struct({

test/chunker-fixed-size.spec.js

-6
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,6 @@ const rawFile = loadFixture((isNode ? __dirname : 'test') + '/fixtures/1MiB.txt'
1313
describe('chunker: fixed size', function () {
1414
this.timeout(30000)
1515

16-
before(function () {
17-
if (!isNode) {
18-
this.skip()
19-
}
20-
})
21-
2216
it('chunks non flat buffers', async () => {
2317
const b1 = Buffer.alloc(2 * 256)
2418
const b2 = Buffer.alloc(1 * 256)

test/chunker-rabin-browser.spec.js

-25
This file was deleted.

0 commit comments

Comments
 (0)