Skip to content

Commit c0a5a21

Browse files
kkdwivediAlexei Starovoitov
authored and
Alexei Starovoitov
committed
bpf: Allow storing referenced kptr in map
Extending the code in previous commits, introduce referenced kptr support, which needs to be tagged using 'kptr_ref' tag instead. Unlike unreferenced kptr, referenced kptr have a lot more restrictions. In addition to the type matching, only a newly introduced bpf_kptr_xchg helper is allowed to modify the map value at that offset. This transfers the referenced pointer being stored into the map, releasing the references state for the program, and returning the old value and creating new reference state for the returned pointer. Similar to unreferenced pointer case, return value for this case will also be PTR_TO_BTF_ID_OR_NULL. The reference for the returned pointer must either be eventually released by calling the corresponding release function, otherwise it must be transferred into another map. It is also allowed to call bpf_kptr_xchg with a NULL pointer, to clear the value, and obtain the old value if any. BPF_LDX, BPF_STX, and BPF_ST cannot access referenced kptr. A future commit will permit using BPF_LDX for such pointers, but attempt at making it safe, since the lifetime of object won't be guaranteed. There are valid reasons to enforce the restriction of permitting only bpf_kptr_xchg to operate on referenced kptr. The pointer value must be consistent in face of concurrent modification, and any prior values contained in the map must also be released before a new one is moved into the map. To ensure proper transfer of this ownership, bpf_kptr_xchg returns the old value, which the verifier would require the user to either free or move into another map, and releases the reference held for the pointer being moved in. In the future, direct BPF_XCHG instruction may also be permitted to work like bpf_kptr_xchg helper. Note that process_kptr_func doesn't have to call check_helper_mem_access, since we already disallow rdonly/wronly flags for map, which is what check_map_access_type checks, and we already ensure the PTR_TO_MAP_VALUE refers to kptr by obtaining its off_desc, so check_map_access is also not required. Signed-off-by: Kumar Kartikeya Dwivedi <[email protected]> Signed-off-by: Alexei Starovoitov <[email protected]> Link: https://lore.kernel.org/bpf/[email protected]
1 parent 8f14852 commit c0a5a21

File tree

6 files changed

+151
-13
lines changed

6 files changed

+151
-13
lines changed

include/linux/bpf.h

+8
Original file line numberDiff line numberDiff line change
@@ -160,8 +160,14 @@ enum {
160160
BPF_MAP_VALUE_OFF_MAX = 8,
161161
};
162162

163+
enum bpf_kptr_type {
164+
BPF_KPTR_UNREF,
165+
BPF_KPTR_REF,
166+
};
167+
163168
struct bpf_map_value_off_desc {
164169
u32 offset;
170+
enum bpf_kptr_type type;
165171
struct {
166172
struct btf *btf;
167173
u32 btf_id;
@@ -418,6 +424,7 @@ enum bpf_arg_type {
418424
ARG_PTR_TO_STACK, /* pointer to stack */
419425
ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */
420426
ARG_PTR_TO_TIMER, /* pointer to bpf_timer */
427+
ARG_PTR_TO_KPTR, /* pointer to referenced kptr */
421428
__BPF_ARG_TYPE_MAX,
422429

423430
/* Extended arg_types. */
@@ -427,6 +434,7 @@ enum bpf_arg_type {
427434
ARG_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_SOCKET,
428435
ARG_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_ALLOC_MEM,
429436
ARG_PTR_TO_STACK_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_STACK,
437+
ARG_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_BTF_ID,
430438

431439
/* This must be the last entry. Its purpose is to ensure the enum is
432440
* wide enough to hold the higher bits reserved for bpf_type_flag.

include/uapi/linux/bpf.h

+12
Original file line numberDiff line numberDiff line change
@@ -5143,6 +5143,17 @@ union bpf_attr {
51435143
* The **hash_algo** is returned on success,
51445144
* **-EOPNOTSUP** if the hash calculation failed or **-EINVAL** if
51455145
* invalid arguments are passed.
5146+
*
5147+
* void *bpf_kptr_xchg(void *map_value, void *ptr)
5148+
* Description
5149+
* Exchange kptr at pointer *map_value* with *ptr*, and return the
5150+
* old value. *ptr* can be NULL, otherwise it must be a referenced
5151+
* pointer which will be released when this helper is called.
5152+
* Return
5153+
* The old value of kptr (which can be NULL). The returned pointer
5154+
* if not NULL, is a reference which must be released using its
5155+
* corresponding release function, or moved into a BPF map before
5156+
* program exit.
51465157
*/
51475158
#define __BPF_FUNC_MAPPER(FN) \
51485159
FN(unspec), \
@@ -5339,6 +5350,7 @@ union bpf_attr {
53395350
FN(copy_from_user_task), \
53405351
FN(skb_set_tstamp), \
53415352
FN(ima_file_hash), \
5353+
FN(kptr_xchg), \
53425354
/* */
53435355

53445356
/* integer value in 'imm' field of BPF_CALL instruction selects which helper

kernel/bpf/btf.c

+9-1
Original file line numberDiff line numberDiff line change
@@ -3177,6 +3177,7 @@ enum {
31773177
struct btf_field_info {
31783178
u32 type_id;
31793179
u32 off;
3180+
enum bpf_kptr_type type;
31803181
};
31813182

31823183
static int btf_find_struct(const struct btf *btf, const struct btf_type *t,
@@ -3193,6 +3194,7 @@ static int btf_find_struct(const struct btf *btf, const struct btf_type *t,
31933194
static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
31943195
u32 off, int sz, struct btf_field_info *info)
31953196
{
3197+
enum bpf_kptr_type type;
31963198
u32 res_id;
31973199

31983200
/* For PTR, sz is always == 8 */
@@ -3205,7 +3207,11 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
32053207
/* Reject extra tags */
32063208
if (btf_type_is_type_tag(btf_type_by_id(btf, t->type)))
32073209
return -EINVAL;
3208-
if (strcmp("kptr", __btf_name_by_offset(btf, t->name_off)))
3210+
if (!strcmp("kptr", __btf_name_by_offset(btf, t->name_off)))
3211+
type = BPF_KPTR_UNREF;
3212+
else if (!strcmp("kptr_ref", __btf_name_by_offset(btf, t->name_off)))
3213+
type = BPF_KPTR_REF;
3214+
else
32093215
return -EINVAL;
32103216

32113217
/* Get the base type */
@@ -3216,6 +3222,7 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
32163222

32173223
info->type_id = res_id;
32183224
info->off = off;
3225+
info->type = type;
32193226
return BTF_FIELD_FOUND;
32203227
}
32213228

@@ -3420,6 +3427,7 @@ struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf,
34203427
}
34213428

34223429
tab->off[i].offset = info_arr[i].off;
3430+
tab->off[i].type = info_arr[i].type;
34233431
tab->off[i].kptr.btf_id = id;
34243432
tab->off[i].kptr.btf = kernel_btf;
34253433
}

kernel/bpf/helpers.c

+24
Original file line numberDiff line numberDiff line change
@@ -1374,6 +1374,28 @@ void bpf_timer_cancel_and_free(void *val)
13741374
kfree(t);
13751375
}
13761376

1377+
BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr)
1378+
{
1379+
unsigned long *kptr = map_value;
1380+
1381+
return xchg(kptr, (unsigned long)ptr);
1382+
}
1383+
1384+
/* Unlike other PTR_TO_BTF_ID helpers the btf_id in bpf_kptr_xchg()
1385+
* helper is determined dynamically by the verifier.
1386+
*/
1387+
#define BPF_PTR_POISON ((void *)((0xeB9FUL << 2) + POISON_POINTER_DELTA))
1388+
1389+
const struct bpf_func_proto bpf_kptr_xchg_proto = {
1390+
.func = bpf_kptr_xchg,
1391+
.gpl_only = false,
1392+
.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
1393+
.ret_btf_id = BPF_PTR_POISON,
1394+
.arg1_type = ARG_PTR_TO_KPTR,
1395+
.arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL | OBJ_RELEASE,
1396+
.arg2_btf_id = BPF_PTR_POISON,
1397+
};
1398+
13771399
const struct bpf_func_proto bpf_get_current_task_proto __weak;
13781400
const struct bpf_func_proto bpf_get_current_task_btf_proto __weak;
13791401
const struct bpf_func_proto bpf_probe_read_user_proto __weak;
@@ -1452,6 +1474,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
14521474
return &bpf_timer_start_proto;
14531475
case BPF_FUNC_timer_cancel:
14541476
return &bpf_timer_cancel_proto;
1477+
case BPF_FUNC_kptr_xchg:
1478+
return &bpf_kptr_xchg_proto;
14551479
default:
14561480
break;
14571481
}

kernel/bpf/verifier.c

+86-12
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,7 @@ struct bpf_call_arg_meta {
258258
struct btf *ret_btf;
259259
u32 ret_btf_id;
260260
u32 subprogno;
261+
struct bpf_map_value_off_desc *kptr_off_desc;
261262
};
262263

263264
struct btf *btf_vmlinux;
@@ -489,7 +490,8 @@ static bool is_acquire_function(enum bpf_func_id func_id,
489490
if (func_id == BPF_FUNC_sk_lookup_tcp ||
490491
func_id == BPF_FUNC_sk_lookup_udp ||
491492
func_id == BPF_FUNC_skc_lookup_tcp ||
492-
func_id == BPF_FUNC_ringbuf_reserve)
493+
func_id == BPF_FUNC_ringbuf_reserve ||
494+
func_id == BPF_FUNC_kptr_xchg)
493495
return true;
494496

495497
if (func_id == BPF_FUNC_map_lookup_elem &&
@@ -3514,6 +3516,12 @@ static int map_kptr_match_type(struct bpf_verifier_env *env,
35143516
/* We need to verify reg->type and reg->btf, before accessing reg->btf */
35153517
reg_name = kernel_type_name(reg->btf, reg->btf_id);
35163518

3519+
/* For ref_ptr case, release function check should ensure we get one
3520+
* referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the
3521+
* normal store of unreferenced kptr, we must ensure var_off is zero.
3522+
* Since ref_ptr cannot be accessed directly by BPF insns, checks for
3523+
* reg->off and reg->ref_obj_id are not needed here.
3524+
*/
35173525
if (__check_ptr_off_reg(env, reg, regno, true))
35183526
return -EACCES;
35193527

@@ -3569,6 +3577,12 @@ static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
35693577
return -EACCES;
35703578
}
35713579

3580+
/* We cannot directly access kptr_ref */
3581+
if (off_desc->type == BPF_KPTR_REF) {
3582+
verbose(env, "accessing referenced kptr disallowed\n");
3583+
return -EACCES;
3584+
}
3585+
35723586
if (class == BPF_LDX) {
35733587
val_reg = reg_state(env, value_regno);
35743588
/* We can simply mark the value_regno receiving the pointer
@@ -5293,6 +5307,53 @@ static int process_timer_func(struct bpf_verifier_env *env, int regno,
52935307
return 0;
52945308
}
52955309

5310+
static int process_kptr_func(struct bpf_verifier_env *env, int regno,
5311+
struct bpf_call_arg_meta *meta)
5312+
{
5313+
struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
5314+
struct bpf_map_value_off_desc *off_desc;
5315+
struct bpf_map *map_ptr = reg->map_ptr;
5316+
u32 kptr_off;
5317+
int ret;
5318+
5319+
if (!tnum_is_const(reg->var_off)) {
5320+
verbose(env,
5321+
"R%d doesn't have constant offset. kptr has to be at the constant offset\n",
5322+
regno);
5323+
return -EINVAL;
5324+
}
5325+
if (!map_ptr->btf) {
5326+
verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
5327+
map_ptr->name);
5328+
return -EINVAL;
5329+
}
5330+
if (!map_value_has_kptrs(map_ptr)) {
5331+
ret = PTR_ERR(map_ptr->kptr_off_tab);
5332+
if (ret == -E2BIG)
5333+
verbose(env, "map '%s' has more than %d kptr\n", map_ptr->name,
5334+
BPF_MAP_VALUE_OFF_MAX);
5335+
else if (ret == -EEXIST)
5336+
verbose(env, "map '%s' has repeating kptr BTF tags\n", map_ptr->name);
5337+
else
5338+
verbose(env, "map '%s' has no valid kptr\n", map_ptr->name);
5339+
return -EINVAL;
5340+
}
5341+
5342+
meta->map_ptr = map_ptr;
5343+
kptr_off = reg->off + reg->var_off.value;
5344+
off_desc = bpf_map_kptr_off_contains(map_ptr, kptr_off);
5345+
if (!off_desc) {
5346+
verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
5347+
return -EACCES;
5348+
}
5349+
if (off_desc->type != BPF_KPTR_REF) {
5350+
verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off);
5351+
return -EACCES;
5352+
}
5353+
meta->kptr_off_desc = off_desc;
5354+
return 0;
5355+
}
5356+
52965357
static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
52975358
{
52985359
return base_type(type) == ARG_PTR_TO_MEM ||
@@ -5433,6 +5494,7 @@ static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
54335494
static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
54345495
static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
54355496
static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
5497+
static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } };
54365498

54375499
static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
54385500
[ARG_PTR_TO_MAP_KEY] = &map_key_value_types,
@@ -5460,11 +5522,13 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
54605522
[ARG_PTR_TO_STACK] = &stack_ptr_types,
54615523
[ARG_PTR_TO_CONST_STR] = &const_str_ptr_types,
54625524
[ARG_PTR_TO_TIMER] = &timer_types,
5525+
[ARG_PTR_TO_KPTR] = &kptr_types,
54635526
};
54645527

54655528
static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
54665529
enum bpf_arg_type arg_type,
5467-
const u32 *arg_btf_id)
5530+
const u32 *arg_btf_id,
5531+
struct bpf_call_arg_meta *meta)
54685532
{
54695533
struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
54705534
enum bpf_reg_type expected, type = reg->type;
@@ -5517,8 +5581,11 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
55175581
arg_btf_id = compatible->btf_id;
55185582
}
55195583

5520-
if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
5521-
btf_vmlinux, *arg_btf_id)) {
5584+
if (meta->func_id == BPF_FUNC_kptr_xchg) {
5585+
if (map_kptr_match_type(env, meta->kptr_off_desc, reg, regno))
5586+
return -EACCES;
5587+
} else if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
5588+
btf_vmlinux, *arg_btf_id)) {
55225589
verbose(env, "R%d is of type %s but %s is expected\n",
55235590
regno, kernel_type_name(reg->btf, reg->btf_id),
55245591
kernel_type_name(btf_vmlinux, *arg_btf_id));
@@ -5625,7 +5692,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
56255692
*/
56265693
goto skip_type_check;
56275694

5628-
err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg]);
5695+
err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg], meta);
56295696
if (err)
56305697
return err;
56315698

@@ -5801,6 +5868,9 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
58015868
verbose(env, "string is not zero-terminated\n");
58025869
return -EINVAL;
58035870
}
5871+
} else if (arg_type == ARG_PTR_TO_KPTR) {
5872+
if (process_kptr_func(env, regno, meta))
5873+
return -EACCES;
58045874
}
58055875

58065876
return err;
@@ -6143,10 +6213,10 @@ static bool check_btf_id_ok(const struct bpf_func_proto *fn)
61436213
int i;
61446214

61456215
for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
6146-
if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i])
6216+
if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i])
61476217
return false;
61486218

6149-
if (fn->arg_type[i] != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i])
6219+
if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i])
61506220
return false;
61516221
}
61526222

@@ -7012,21 +7082,25 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
70127082
regs[BPF_REG_0].btf_id = meta.ret_btf_id;
70137083
}
70147084
} else if (base_type(ret_type) == RET_PTR_TO_BTF_ID) {
7085+
struct btf *ret_btf;
70157086
int ret_btf_id;
70167087

70177088
mark_reg_known_zero(env, regs, BPF_REG_0);
70187089
regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
7019-
ret_btf_id = *fn->ret_btf_id;
7090+
if (func_id == BPF_FUNC_kptr_xchg) {
7091+
ret_btf = meta.kptr_off_desc->kptr.btf;
7092+
ret_btf_id = meta.kptr_off_desc->kptr.btf_id;
7093+
} else {
7094+
ret_btf = btf_vmlinux;
7095+
ret_btf_id = *fn->ret_btf_id;
7096+
}
70207097
if (ret_btf_id == 0) {
70217098
verbose(env, "invalid return type %u of func %s#%d\n",
70227099
base_type(ret_type), func_id_name(func_id),
70237100
func_id);
70247101
return -EINVAL;
70257102
}
7026-
/* current BPF helper definitions are only coming from
7027-
* built-in code with type IDs from vmlinux BTF
7028-
*/
7029-
regs[BPF_REG_0].btf = btf_vmlinux;
7103+
regs[BPF_REG_0].btf = ret_btf;
70307104
regs[BPF_REG_0].btf_id = ret_btf_id;
70317105
} else {
70327106
verbose(env, "unknown return type %u of func %s#%d\n",

tools/include/uapi/linux/bpf.h

+12
Original file line numberDiff line numberDiff line change
@@ -5143,6 +5143,17 @@ union bpf_attr {
51435143
* The **hash_algo** is returned on success,
51445144
* **-EOPNOTSUP** if the hash calculation failed or **-EINVAL** if
51455145
* invalid arguments are passed.
5146+
*
5147+
* void *bpf_kptr_xchg(void *map_value, void *ptr)
5148+
* Description
5149+
* Exchange kptr at pointer *map_value* with *ptr*, and return the
5150+
* old value. *ptr* can be NULL, otherwise it must be a referenced
5151+
* pointer which will be released when this helper is called.
5152+
* Return
5153+
* The old value of kptr (which can be NULL). The returned pointer
5154+
* if not NULL, is a reference which must be released using its
5155+
* corresponding release function, or moved into a BPF map before
5156+
* program exit.
51465157
*/
51475158
#define __BPF_FUNC_MAPPER(FN) \
51485159
FN(unspec), \
@@ -5339,6 +5350,7 @@ union bpf_attr {
53395350
FN(copy_from_user_task), \
53405351
FN(skb_set_tstamp), \
53415352
FN(ima_file_hash), \
5353+
FN(kptr_xchg), \
53425354
/* */
53435355

53445356
/* integer value in 'imm' field of BPF_CALL instruction selects which helper

0 commit comments

Comments
 (0)