Skip to content

Fluffy: Simplify and optimize ContentDb pruning #3163

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 36 additions & 112 deletions fluffy/database/content_db.nim
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,13 @@ declareCounter portal_pruning_counter,
"Number of pruning events which occured during the node's uptime",
labels = ["protocol_id"]

declareGauge portal_pruning_deleted_elements,
"Number of elements deleted in the last pruning", labels = ["protocol_id"]
declareGauge portal_pruning_used_size,
"Total used size after the last pruning", labels = ["protocol_id"]

const
contentDeletionFraction = 0.05 ## 5% of the content will be deleted when the
## storage capacity is hit and radius gets adjusted.
declareGauge portal_pruning_size,
"Total size after the last pruning", labels = ["protocol_id"]

type
RowInfo =
tuple[contentId: array[32, byte], payloadLength: int64, distance: array[32, byte]]

ContentDB* = ref object
backend: SqStoreRef
kv: KvStoreRef
Expand All @@ -60,7 +56,6 @@ type
vacuumStmt: SqliteStmt[NoParams, void]
contentCountStmt: SqliteStmt[NoParams, int64]
contentSizeStmt: SqliteStmt[NoParams, int64]
getAllOrderedByDistanceStmt: SqliteStmt[array[32, byte], RowInfo]
deleteOutOfRadiusStmt: SqliteStmt[(array[32, byte], array[32, byte]), void]
largestDistanceStmt: SqliteStmt[array[32, byte], array[32, byte]]

Expand Down Expand Up @@ -234,12 +229,6 @@ proc new*(
let contentCountStmt =
db.prepareStmt("SELECT COUNT(key) FROM kvstore;", NoParams, int64)[]

let getAllOrderedByDistanceStmt = db.prepareStmt(
"SELECT key, length(value), xorDistance(?, key) as distance FROM kvstore ORDER BY distance DESC",
array[32, byte],
RowInfo,
)[]

let deleteOutOfRadiusStmt = db.prepareStmt(
"DELETE FROM kvstore WHERE isInRadius(?, key, ?) == 0",
(array[32, byte], array[32, byte]),
Expand All @@ -261,7 +250,6 @@ proc new*(
vacuumStmt: vacuumStmt,
contentSizeStmt: contentSizeStmt,
contentCountStmt: contentCountStmt,
getAllOrderedByDistanceStmt: getAllOrderedByDistanceStmt,
deleteOutOfRadiusStmt: deleteOutOfRadiusStmt,
largestDistanceStmt: largestDistanceStmt,
)
Expand All @@ -280,7 +268,6 @@ proc close*(db: ContentDB) =
db.vacuumStmt.disposeSafe()
db.contentCountStmt.disposeSafe()
db.contentSizeStmt.disposeSafe()
db.getAllOrderedByDistanceStmt.disposeSafe()
db.deleteOutOfRadiusStmt.disposeSafe()
db.largestDistanceStmt.disposeSafe()
discard db.kv.close()
Expand Down Expand Up @@ -325,36 +312,6 @@ proc del*(db: ContentDB, key: ContentId) =

## Pruning related calls

proc deleteContentFraction*(
db: ContentDB, target: UInt256, fraction: float64
): (UInt256, int64, int64, int64) =
## Deletes at most `fraction` percent of content from the database.
## The content furthest from the provided `target` is deleted first.
# TODO: The usage of `db.contentSize()` for the deletion calculation versus
# `db.usedSize()` for the pruning threshold leads sometimes to some unexpected
# results of how much content gets up deleted.
doAssert(fraction > 0 and fraction < 1, "Deleted fraction should be > 0 and < 1")

let totalContentSize = db.contentSize()
let bytesToDelete = int64(fraction * float64(totalContentSize))
var deletedElements: int64 = 0

var ri: RowInfo
var deletedBytes: int64 = 0
let targetBytes = target.toBytesBE()
for e in db.getAllOrderedByDistanceStmt.exec(targetBytes, ri):
if deletedBytes + ri.payloadLength <= bytesToDelete:
db.del(ri.contentId)
deletedBytes = deletedBytes + ri.payloadLength
inc deletedElements
else:
return (
UInt256.fromBytesBE(ri.distance),
deletedBytes,
totalContentSize,
deletedElements,
)

proc reclaimSpace*(db: ContentDB): void =
## Runs sqlite VACUUM commands which rebuilds the db, repacking it into a
## minimal amount of disk space.
Expand Down Expand Up @@ -390,9 +347,33 @@ proc forcePrune*(db: ContentDB, localId: UInt256, radius: UInt256) =
db.reclaimAndTruncate()
notice "Finished database pruning"

proc putAndPrune*(db: ContentDB, key: ContentId, value: openArray[byte]): PutResult =
db.put(key, value)
proc prune*(db: ContentDB) =
## Decrease the radius with `radiusDecreasePercentage` and prune the content
## outside of the new radius.
const radiusDecreasePercentage = 5
# The amount here is somewhat arbitrary but should be big enough to not
# constantly require pruning. If it is too small, it would adjust the radius
# so often that the network might not be able to keep up with the current
# radius of the node. At the same time, it would iterate over the content also
# way to often. If the amount is too big it could render the node unresponsive
# for too long.

let newRadius = db.dataRadius div 100 * (100 - radiusDecreasePercentage)

info "Pruning content outside of radius",
oldRadius = db.dataRadius, newRadius = newRadius
db.deleteContentOutOfRadius(db.localId, newRadius)
db.dataRadius = newRadius

let usedSize = db.usedSize()
let size = db.size()
portal_pruning_counter.inc()
portal_pruning_used_size.set(usedSize)
portal_pruning_size.set(size)

info "Finished pruning content", usedSize, size, storageCapacity = db.storageCapacity

proc putAndPrune*(db: ContentDB, key: ContentId, value: openArray[byte]) =
# The used size is used as pruning threshold. This means that the database
# size will reach the size specified in db.storageCapacity and will stay
# around that size throughout the node's lifetime, as after content deletion
Expand All @@ -404,55 +385,12 @@ proc putAndPrune*(db: ContentDB, key: ContentId, value: openArray[byte]): PutRes
# static radius.
# When not using the `forcePrune` functionality, pruning to the required
# capacity will not be very effictive and free pages will not be returned.
let dbSize = db.usedSize()

if dbSize < int64(db.storageCapacity):
return PutResult(kind: ContentStored)
else:
# Note:
# An approach of a deleting a full fraction is chosen here, in an attempt
# to not continuously require radius updates, which could have a negative
# impact on the network. However this should be further investigated, as
# doing a large fraction deletion could cause a temporary node performance
# degradation. The `contentDeletionFraction` might need further tuning or
# one could opt for a much more granular approach using sql statement
# in the trend of:
# "SELECT key FROM kvstore ORDER BY xorDistance(?, key) DESC LIMIT 1"
# Potential adjusting the LIMIT for how many items require deletion.
let (distanceOfFurthestElement, deletedBytes, totalContentSize, deletedElements) =
db.deleteContentFraction(db.localId, contentDeletionFraction)

let deletedFraction = float64(deletedBytes) / float64(totalContentSize)
info "Deleted content fraction", deletedBytes, deletedElements, deletedFraction

return PutResult(
kind: DbPruned,
distanceOfFurthestElement: distanceOfFurthestElement,
deletedFraction: deletedFraction,
deletedElements: deletedElements,
)
db.put(key, value)

proc adjustRadius(
db: ContentDB, deletedFraction: float64, distanceOfFurthestElement: UInt256
) =
# Invert fraction as the UInt256 implementation does not support
# multiplication by float
let invertedFractionAsInt = int64(1.0 / deletedFraction)
let scaledRadius = db.dataRadius div u256(invertedFractionAsInt)

# Choose a larger value to avoid the situation where the
# `distanceOfFurthestElement is very close to the local id so that the local
# radius would end up too small to accept any more data to the database.
# If scaledRadius radius will be larger it will still contain all elements.
let newRadius = max(scaledRadius, distanceOfFurthestElement)

info "Database radius adjusted",
oldRadius = db.dataRadius, newRadius = newRadius, distanceOfFurthestElement

# Both scaledRadius and distanceOfFurthestElement are smaller than current
# dataRadius, so the radius will constantly decrease through the node its
# lifetime.
db.dataRadius = newRadius
while db.usedSize() >= int64(db.storageCapacity):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't fully like that we have to do a while loop here, but else there might be situations where nothing or not enough content gets deleted.

# Note: This should typically only happen once, but if the content is not
# distributed uniformly over the id range, it could happen multiple times.
db.prune()

proc createGetHandler*(db: ContentDB): DbGetHandler =
return (
Expand All @@ -477,21 +415,7 @@ proc createStoreHandler*(db: ContentDB, cfg: RadiusConfig): DbStoreHandler =
of Dynamic:
# In case of dynamic radius, the radius gets adjusted based on the
# to storage capacity and content gets pruned accordingly.
let res = db.putAndPrune(contentId, content)
if res.kind == DbPruned:
portal_pruning_counter.inc()
portal_pruning_deleted_elements.set(res.deletedElements.int64)

if res.deletedFraction > 0.0:
db.adjustRadius(res.deletedFraction, res.distanceOfFurthestElement)
else:
# Note:
# This can occur when the furthest content is bigger than the fraction
# size. This is unlikely to happen as it would require either very
# small storage capacity or a very small `contentDeletionFraction`
# combined with some big content.
info "Database pruning attempt resulted in no content deleted"
return
db.putAndPrune(contentId, content)
of Static:
# If the radius is static, it may never be adjusted, database capacity
# is disabled and no pruning is ever done.
Expand Down
55 changes: 11 additions & 44 deletions fluffy/tests/test_content_db.nim
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Fluffy
# Copyright (c) 2021-2024 Status Research & Development GmbH
# Copyright (c) 2021-2025 Status Research & Development GmbH
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
Expand Down Expand Up @@ -36,7 +36,7 @@ suite "Content Database":
db.contains(key) == false

block:
discard db.putAndPrune(key, [byte 0, 1, 2, 3])
db.putAndPrune(key, [byte 0, 1, 2, 3])

var val = Opt.none(seq[byte])
proc onData(data: openArray[byte]) =
Expand Down Expand Up @@ -67,11 +67,11 @@ suite "Content Database":

let numBytes = 10000
let size1 = db.size()
discard db.putAndPrune(u256(1), genByteSeq(numBytes))
db.putAndPrune(u256(1), genByteSeq(numBytes))
let size2 = db.size()
discard db.putAndPrune(u256(2), genByteSeq(numBytes))
db.putAndPrune(u256(2), genByteSeq(numBytes))
let size3 = db.size()
discard db.putAndPrune(u256(2), genByteSeq(numBytes))
db.putAndPrune(u256(2), genByteSeq(numBytes))
let size4 = db.size()
let usedSize = db.usedSize()

Expand Down Expand Up @@ -104,53 +104,20 @@ suite "Content Database":
usedSize2 == size6

test "ContentDB pruning":
# TODO: This test is extremely breakable when changing
# `contentDeletionFraction` and/or the used test values.
# Need to rework either this test, or the pruning mechanism, or probably
# both.
let
storageCapacity = 100_000'u64
storageCapacity = 1_000_000'u64 # 1MB
db = ContentDB.new(
"", storageCapacity, RadiusConfig(kind: Dynamic), testId, inMemory = true
)
numBytes = 1_000
bytes = genByteSeq(numBytes)

furthestElement = u256(40)
secondFurthest = u256(30)
thirdFurthest = u256(20)

numBytes = 10_000
pr1 = db.putAndPrune(u256(1), genByteSeq(numBytes))
pr2 = db.putAndPrune(thirdFurthest, genByteSeq(numBytes))
pr3 = db.putAndPrune(u256(3), genByteSeq(numBytes))
pr4 = db.putAndPrune(u256(10), genByteSeq(numBytes))
pr5 = db.putAndPrune(u256(5), genByteSeq(numBytes))
pr6 = db.putAndPrune(u256(11), genByteSeq(numBytes))
pr7 = db.putAndPrune(furthestElement, genByteSeq(2000))
pr8 = db.putAndPrune(secondFurthest, genByteSeq(2000))
pr9 = db.putAndPrune(u256(2), genByteSeq(numBytes))
pr10 = db.putAndPrune(u256(4), genByteSeq(12000))

check:
pr1.kind == ContentStored
pr2.kind == ContentStored
pr3.kind == ContentStored
pr4.kind == ContentStored
pr5.kind == ContentStored
pr6.kind == ContentStored
pr7.kind == ContentStored
pr8.kind == ContentStored
pr9.kind == ContentStored
pr10.kind == DbPruned
for i in 0 .. 800:
let contentId = UInt256.high div 800 * i.u256
db.putAndPrune(contentId, bytes)

check:
pr10.deletedElements == 2
uint64(db.usedSize()) < storageCapacity
# With the current settings the 2 furthest elements will be deleted,
# i.e key 30 and 40. The furthest non deleted one will have key 20.
pr10.distanceOfFurthestElement == thirdFurthest
not db.contains(furthestElement)
not db.contains(secondFurthest)
db.contains(thirdFurthest)

test "ContentDB force pruning":
const
Expand Down
36 changes: 14 additions & 22 deletions fluffy/tests/wire_protocol_tests/test_portal_wire_protocol.nim
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
{.used.}

import
std/[algorithm, sequtils],
std/sequtils,
chronos,
testutils/unittests,
results,
Expand Down Expand Up @@ -386,10 +386,6 @@ procSuite "Portal Wire Protocol Tests":
await node2.stopPortalProtocol()

asyncTest "Adjusting radius after hitting full database":
# TODO: This test is extremely breakable when changing
# `contentDeletionFraction` and/or the used test values.
# Need to rework either this test, or the pruning mechanism, or probably
# both.
let
node1 = initDiscoveryNode(rng, PrivateKey.random(rng[]), localAddress(20303))

Expand All @@ -413,27 +409,23 @@ procSuite "Portal Wire Protocol Tests":
)

let item = genByteSeq(10_000)
var distances: seq[UInt256] = @[]
var contentIds: seq[UInt256] = @[]
let startRadius = db.dataRadius

for i in 0 ..< 40:
proto1.storeContent(ByteList[2048].init(@[uint8(i)]), u256(i), item)
distances.add(u256(i) xor proto1.localNode.id)
let contentId = UInt256.high div 39 * i.u256
proto1.storeContent(ByteList[2048].init(@[uint8(i)]), contentId, item)
contentIds.add(contentId)

distances.sort(order = SortOrder.Descending)
check db.dataRadius < startRadius

# With the selected db limit of 100_000 bytes and added elements of 10_000
# bytes each, the two furthest elements should be prined, i.e index 0 and 1.
# Index 2 should be still be in database and its distance should be <=
# updated radius
check:
not db.contains((distances[0] xor proto1.localNode.id))
not db.contains((distances[1] xor proto1.localNode.id))
not db.contains((distances[2] xor proto1.localNode.id))
db.contains((distances[3] xor proto1.localNode.id))
# The radius has been updated and is lower than the maximum start value.
proto1.dataRadius() < UInt256.high
# Yet higher than or equal to the furthest non deleted element.
proto1.dataRadius() >= distances[3]
for contentId in contentIds:
if db.dataRadius >= (contentId xor proto1.localNode.id):
check db.contains(contentId)
else:
check not db.contains(contentId)

check db.usedSize() < int64(dbLimit)

await proto1.stop()
await node1.closeWait()
Expand Down
Loading