* [PATCH 1/2] bpf: context casting for tail call
@ 2019-02-25 15:54 Kris Van Hees
2019-02-25 20:10 ` kbuild test robot
2019-02-25 20:10 ` kbuild test robot
0 siblings, 2 replies; 3+ messages in thread
From: Kris Van Hees @ 2019-02-25 15:54 UTC (permalink / raw)
To: netdev
Currently BPF programs are executed with a context that is provided by
code that initiates the execution. Tracing tools that want to make use
of existing probes and events that allow BPF programs to be attached to
them are thus limited to the context information provided by the probe
or event source. Often, more context is needed to allow tracing tools
the ablity to implement more complex constructs (e.g. more state-full
tracing).
This patch extends the tail-call mechanism to allow a BPF program of
one type to call a BPF program of another type. E.g. a kprobe BPF
program (working with a struct pt_regs context) can call a BPF program
with a more extensive context. The BPF program type is being extended
to provide can_cast() and cast_context() callback functions to handle
the context conversion.
The program array holding BPF programs that you can tail-call into
continues to require that all programs are of the same type. But when
a compatibility check is made in a program that performs a tail-call,
the can_cast() function is called (if available) to allow the target
type to determine whether it can handle the conversion of a context
from the source type to the target type. If can_cast() is not provided
by the program type, casting is denied.
During execution, the cast_context() function is called (if available)
to perform the conversion of the current context to the context that the
target type expects. Since the program type of the executing BPF program
is not explicitly known during execution, the verifier inserts an
instruction right before the tail-call to assign the current BPF program
type to R4.
The interpreter calls cast_context() using the program type in R4 as
source program type, the program type associated with the program array
as target program type, and the context as provided in R1.
The bpf_prog_types array is now being exposed to the rest of the BPF
code (where before it was local to just the syscall handling) because
the can_cast and cat_context operations need to be accessible.
There is no noticeable effect on BPF program types that do not implement
this new feature.
A JIT implementation is not available yet in this first iteration.
Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com>
Reviewed-by: Nick Alcock <nick.alcock@oracle.com>
---
include/linux/bpf.h | 4 ++++
kernel/bpf/core.c | 27 ++++++++++++++++++++++++++-
kernel/bpf/syscall.c | 2 +-
kernel/bpf/verifier.c | 16 ++++++++++++----
4 files changed, 43 insertions(+), 6 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index de18227b3d95..117d2bae51b9 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -281,6 +281,9 @@ bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size)
struct bpf_prog_ops {
int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr);
+ bool (*can_cast)(enum bpf_prog_type stype, enum bpf_prog_type ttype);
+ void *(*cast_context)(enum bpf_prog_type stype,
+ enum bpf_prog_type ttype, void *ctx);
};
struct bpf_verifier_ops {
@@ -528,6 +531,7 @@ extern const struct file_operations bpf_prog_fops;
#undef BPF_PROG_TYPE
#undef BPF_MAP_TYPE
+extern const struct bpf_prog_ops * const bpf_prog_types[];
extern const struct bpf_prog_ops bpf_offload_prog_ops;
extern const struct bpf_verifier_ops tc_cls_act_analyzer_ops;
extern const struct bpf_verifier_ops xdp_analyzer_ops;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index ef88b167959d..1b7c718d4e9d 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1426,10 +1426,12 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
CONT;
JMP_TAIL_CALL: {
+ void *ctx = (void *) (unsigned long) BPF_R1;
struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2;
struct bpf_array *array = container_of(map, struct bpf_array, map);
struct bpf_prog *prog;
u32 index = BPF_R3;
+ u32 type = BPF_R4;
if (unlikely(index >= array->map.max_entries))
goto out;
@@ -1441,6 +1443,14 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
prog = READ_ONCE(array->ptrs[index]);
if (!prog)
goto out;
+ if (prog->aux->ops->cast_context) {
+ ctx = prog->aux->ops->cast_context(type, prog->type,
+ ctx);
+ if (!ctx)
+ goto out;
+
+ BPF_R1 = (u64) ctx;
+ }
/* ARG1 at this point is guaranteed to point to CTX from
* the verifier side due to the fact that the tail call is
@@ -1637,6 +1647,20 @@ bool bpf_prog_array_compatible(struct bpf_array *array,
array->owner_jited == fp->jited;
}
+bool bpf_prog_array_can_cast(struct bpf_array *array, const struct bpf_prog *fp)
+{
+ const struct bpf_prog_ops *ops;
+
+ if (array->owner_jited != fp->jited)
+ return false;
+
+ ops = bpf_prog_types[array->owner_prog_type];
+ if (ops->can_cast)
+ return ops->can_cast(fp->type, array->owner_prog_type);
+
+ return false;
+}
+
static int bpf_check_tail_call(const struct bpf_prog *fp)
{
struct bpf_prog_aux *aux = fp->aux;
@@ -1650,7 +1674,8 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
continue;
array = container_of(map, struct bpf_array, map);
- if (!bpf_prog_array_compatible(array, fp))
+ if (!bpf_prog_array_compatible(array, fp) &&
+ !bpf_prog_array_can_cast(array, fp))
return -EINVAL;
}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index ec7c552af76b..d558d979100f 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1110,7 +1110,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
return err;
}
-static const struct bpf_prog_ops * const bpf_prog_types[] = {
+const struct bpf_prog_ops * const bpf_prog_types[] = {
#define BPF_PROG_TYPE(_id, _name) \
[_id] = & _name ## _prog_ops,
#define BPF_MAP_TYPE(_id, _ops)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1b9496c41383..b49820e82cf8 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7629,9 +7629,10 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
insn->imm = 0;
insn->code = BPF_JMP | BPF_TAIL_CALL;
+ cnt = 0;
aux = &env->insn_aux_data[i + delta];
if (!bpf_map_ptr_unpriv(aux))
- continue;
+ goto privileged;
/* instead of changing every JIT dealing with tail_call
* emit two extra insns:
@@ -7646,13 +7647,20 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
map_ptr = BPF_MAP_PTR(aux->map_state);
insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
- map_ptr->max_entries, 2);
+ map_ptr->max_entries, 3);
insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
container_of(map_ptr,
struct bpf_array,
map)->index_mask);
- insn_buf[2] = *insn;
- cnt = 3;
+ cnt = 2;
+
+privileged:
+ /* store the BPF program type of the currnet program in
+ * R4 so it is known in case this tail call requires
+ * casting the context to a different program type
+ */
+ insn_buf[cnt++] = BPF_MOV64_IMM(BPF_REG_4, prog->type);
+ insn_buf[cnt++] = *insn;
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
if (!new_prog)
return -ENOMEM;
--
2.20.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH 1/2] bpf: context casting for tail call
2019-02-25 15:54 [PATCH 1/2] bpf: context casting for tail call Kris Van Hees
@ 2019-02-25 20:10 ` kbuild test robot
2019-02-25 20:10 ` kbuild test robot
1 sibling, 0 replies; 3+ messages in thread
From: kbuild test robot @ 2019-02-25 20:10 UTC (permalink / raw)
To: Kris Van Hees; +Cc: kbuild-all, netdev
[-- Attachment #1: Type: text/plain, Size: 8559 bytes --]
Hi Kris,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on bpf-next/master]
[also build test WARNING on v5.0-rc8 next-20190225]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/Kris-Van-Hees/bpf-context-casting-for-tail-call-and-gtrace-prog-type/20190226-034827
base: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
config: xtensa-allyesconfig (attached as .config)
compiler: xtensa-linux-gcc (GCC) 8.2.0
reproduce:
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
GCC_VERSION=8.2.0 make.cross ARCH=xtensa
All warnings (new ones prefixed by >>):
kernel//bpf/core.c: In function '___bpf_prog_run':
>> kernel//bpf/core.c:1452:13: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
BPF_R1 = (u64) ctx;
^
vim +1452 kernel//bpf/core.c
1290
1291 select_insn:
1292 goto *jumptable[insn->code];
1293
1294 /* ALU */
1295 #define ALU(OPCODE, OP) \
1296 ALU64_##OPCODE##_X: \
1297 DST = DST OP SRC; \
1298 CONT; \
1299 ALU_##OPCODE##_X: \
1300 DST = (u32) DST OP (u32) SRC; \
1301 CONT; \
1302 ALU64_##OPCODE##_K: \
1303 DST = DST OP IMM; \
1304 CONT; \
1305 ALU_##OPCODE##_K: \
1306 DST = (u32) DST OP (u32) IMM; \
1307 CONT;
1308
1309 ALU(ADD, +)
1310 ALU(SUB, -)
1311 ALU(AND, &)
1312 ALU(OR, |)
1313 ALU(LSH, <<)
1314 ALU(RSH, >>)
1315 ALU(XOR, ^)
1316 ALU(MUL, *)
1317 #undef ALU
1318 ALU_NEG:
1319 DST = (u32) -DST;
1320 CONT;
1321 ALU64_NEG:
1322 DST = -DST;
1323 CONT;
1324 ALU_MOV_X:
1325 DST = (u32) SRC;
1326 CONT;
1327 ALU_MOV_K:
1328 DST = (u32) IMM;
1329 CONT;
1330 ALU64_MOV_X:
1331 DST = SRC;
1332 CONT;
1333 ALU64_MOV_K:
1334 DST = IMM;
1335 CONT;
1336 LD_IMM_DW:
1337 DST = (u64) (u32) insn[0].imm | ((u64) (u32) insn[1].imm) << 32;
1338 insn++;
1339 CONT;
1340 ALU_ARSH_X:
1341 DST = (u64) (u32) ((*(s32 *) &DST) >> SRC);
1342 CONT;
1343 ALU_ARSH_K:
1344 DST = (u64) (u32) ((*(s32 *) &DST) >> IMM);
1345 CONT;
1346 ALU64_ARSH_X:
1347 (*(s64 *) &DST) >>= SRC;
1348 CONT;
1349 ALU64_ARSH_K:
1350 (*(s64 *) &DST) >>= IMM;
1351 CONT;
1352 ALU64_MOD_X:
1353 div64_u64_rem(DST, SRC, &AX);
1354 DST = AX;
1355 CONT;
1356 ALU_MOD_X:
1357 AX = (u32) DST;
1358 DST = do_div(AX, (u32) SRC);
1359 CONT;
1360 ALU64_MOD_K:
1361 div64_u64_rem(DST, IMM, &AX);
1362 DST = AX;
1363 CONT;
1364 ALU_MOD_K:
1365 AX = (u32) DST;
1366 DST = do_div(AX, (u32) IMM);
1367 CONT;
1368 ALU64_DIV_X:
1369 DST = div64_u64(DST, SRC);
1370 CONT;
1371 ALU_DIV_X:
1372 AX = (u32) DST;
1373 do_div(AX, (u32) SRC);
1374 DST = (u32) AX;
1375 CONT;
1376 ALU64_DIV_K:
1377 DST = div64_u64(DST, IMM);
1378 CONT;
1379 ALU_DIV_K:
1380 AX = (u32) DST;
1381 do_div(AX, (u32) IMM);
1382 DST = (u32) AX;
1383 CONT;
1384 ALU_END_TO_BE:
1385 switch (IMM) {
1386 case 16:
1387 DST = (__force u16) cpu_to_be16(DST);
1388 break;
1389 case 32:
1390 DST = (__force u32) cpu_to_be32(DST);
1391 break;
1392 case 64:
1393 DST = (__force u64) cpu_to_be64(DST);
1394 break;
1395 }
1396 CONT;
1397 ALU_END_TO_LE:
1398 switch (IMM) {
1399 case 16:
1400 DST = (__force u16) cpu_to_le16(DST);
1401 break;
1402 case 32:
1403 DST = (__force u32) cpu_to_le32(DST);
1404 break;
1405 case 64:
1406 DST = (__force u64) cpu_to_le64(DST);
1407 break;
1408 }
1409 CONT;
1410
1411 /* CALL */
1412 JMP_CALL:
1413 /* Function call scratches BPF_R1-BPF_R5 registers,
1414 * preserves BPF_R6-BPF_R9, and stores return value
1415 * into BPF_R0.
1416 */
1417 BPF_R0 = (__bpf_call_base + insn->imm)(BPF_R1, BPF_R2, BPF_R3,
1418 BPF_R4, BPF_R5);
1419 CONT;
1420
1421 JMP_CALL_ARGS:
1422 BPF_R0 = (__bpf_call_base_args + insn->imm)(BPF_R1, BPF_R2,
1423 BPF_R3, BPF_R4,
1424 BPF_R5,
1425 insn + insn->off + 1);
1426 CONT;
1427
1428 JMP_TAIL_CALL: {
1429 void *ctx = (void *) (unsigned long) BPF_R1;
1430 struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2;
1431 struct bpf_array *array = container_of(map, struct bpf_array, map);
1432 struct bpf_prog *prog;
1433 u32 index = BPF_R3;
1434 u32 type = BPF_R4;
1435
1436 if (unlikely(index >= array->map.max_entries))
1437 goto out;
1438 if (unlikely(tail_call_cnt > MAX_TAIL_CALL_CNT))
1439 goto out;
1440
1441 tail_call_cnt++;
1442
1443 prog = READ_ONCE(array->ptrs[index]);
1444 if (!prog)
1445 goto out;
1446 if (prog->aux->ops->cast_context) {
1447 ctx = prog->aux->ops->cast_context(type, prog->type,
1448 ctx);
1449 if (!ctx)
1450 goto out;
1451
> 1452 BPF_R1 = (u64) ctx;
1453 }
1454
1455 /* ARG1 at this point is guaranteed to point to CTX from
1456 * the verifier side due to the fact that the tail call is
1457 * handeled like a helper, that is, bpf_tail_call_proto,
1458 * where arg1_type is ARG_PTR_TO_CTX.
1459 */
1460 insn = prog->insnsi;
1461 goto select_insn;
1462 out:
1463 CONT;
1464 }
1465 JMP_JA:
1466 insn += insn->off;
1467 CONT;
1468 JMP_EXIT:
1469 return BPF_R0;
1470 /* JMP */
1471 #define COND_JMP(SIGN, OPCODE, CMP_OP) \
1472 JMP_##OPCODE##_X: \
1473 if ((SIGN##64) DST CMP_OP (SIGN##64) SRC) { \
1474 insn += insn->off; \
1475 CONT_JMP; \
1476 } \
1477 CONT; \
1478 JMP32_##OPCODE##_X: \
1479 if ((SIGN##32) DST CMP_OP (SIGN##32) SRC) { \
1480 insn += insn->off; \
1481 CONT_JMP; \
1482 } \
1483 CONT; \
1484 JMP_##OPCODE##_K: \
1485 if ((SIGN##64) DST CMP_OP (SIGN##64) IMM) { \
1486 insn += insn->off; \
1487 CONT_JMP; \
1488 } \
1489 CONT; \
1490 JMP32_##OPCODE##_K: \
1491 if ((SIGN##32) DST CMP_OP (SIGN##32) IMM) { \
1492 insn += insn->off; \
1493 CONT_JMP; \
1494 } \
1495 CONT;
1496 COND_JMP(u, JEQ, ==)
1497 COND_JMP(u, JNE, !=)
1498 COND_JMP(u, JGT, >)
1499 COND_JMP(u, JLT, <)
1500 COND_JMP(u, JGE, >=)
1501 COND_JMP(u, JLE, <=)
1502 COND_JMP(u, JSET, &)
1503 COND_JMP(s, JSGT, >)
1504 COND_JMP(s, JSLT, <)
1505 COND_JMP(s, JSGE, >=)
1506 COND_JMP(s, JSLE, <=)
1507 #undef COND_JMP
1508 /* STX and ST and LDX*/
1509 #define LDST(SIZEOP, SIZE) \
1510 STX_MEM_##SIZEOP: \
1511 *(SIZE *)(unsigned long) (DST + insn->off) = SRC; \
1512 CONT; \
1513 ST_MEM_##SIZEOP: \
1514 *(SIZE *)(unsigned long) (DST + insn->off) = IMM; \
1515 CONT; \
1516 LDX_MEM_##SIZEOP: \
1517 DST = *(SIZE *)(unsigned long) (SRC + insn->off); \
1518 CONT;
1519
1520 LDST(B, u8)
1521 LDST(H, u16)
1522 LDST(W, u32)
1523 LDST(DW, u64)
1524 #undef LDST
1525 STX_XADD_W: /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */
1526 atomic_add((u32) SRC, (atomic_t *)(unsigned long)
1527 (DST + insn->off));
1528 CONT;
1529 STX_XADD_DW: /* lock xadd *(u64 *)(dst_reg + off16) += src_reg */
1530 atomic64_add((u64) SRC, (atomic64_t *)(unsigned long)
1531 (DST + insn->off));
1532 CONT;
1533
1534 default_label:
1535 /* If we ever reach this, we have a bug somewhere. Die hard here
1536 * instead of just returning 0; we could be somewhere in a subprog,
1537 * so execution could continue otherwise which we do /not/ want.
1538 *
1539 * Note, verifier whitelists all opcodes in bpf_opcode_in_insntable().
1540 */
1541 pr_warn("BPF interpreter: unknown opcode %02x\n", insn->code);
1542 BUG_ON(1);
1543 return 0;
1544 }
1545 STACK_FRAME_NON_STANDARD(___bpf_prog_run); /* jump table */
1546
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 56355 bytes --]
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH 1/2] bpf: context casting for tail call
2019-02-25 15:54 [PATCH 1/2] bpf: context casting for tail call Kris Van Hees
2019-02-25 20:10 ` kbuild test robot
@ 2019-02-25 20:10 ` kbuild test robot
1 sibling, 0 replies; 3+ messages in thread
From: kbuild test robot @ 2019-02-25 20:10 UTC (permalink / raw)
To: Kris Van Hees; +Cc: kbuild-all, netdev
[-- Attachment #1: Type: text/plain, Size: 1718 bytes --]
Hi Kris,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on bpf-next/master]
[also build test ERROR on v5.0-rc8 next-20190225]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/Kris-Van-Hees/bpf-context-casting-for-tail-call-and-gtrace-prog-type/20190226-034827
base: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
config: x86_64-randconfig-x016-201908 (attached as .config)
compiler: gcc-8 (Debian 8.2.0-20) 8.2.0
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64
All errors (new ones prefixed by >>):
kernel/bpf/core.c: In function 'bpf_prog_array_can_cast':
>> kernel/bpf/core.c:1657:8: error: 'bpf_prog_types' undeclared (first use in this function); did you mean 'bpf_prog_type'?
ops = bpf_prog_types[array->owner_prog_type];
^~~~~~~~~~~~~~
bpf_prog_type
kernel/bpf/core.c:1657:8: note: each undeclared identifier is reported only once for each function it appears in
vim +1657 kernel/bpf/core.c
1649
1650 bool bpf_prog_array_can_cast(struct bpf_array *array, const struct bpf_prog *fp)
1651 {
1652 const struct bpf_prog_ops *ops;
1653
1654 if (array->owner_jited != fp->jited)
1655 return false;
1656
> 1657 ops = bpf_prog_types[array->owner_prog_type];
1658 if (ops->can_cast)
1659 return ops->can_cast(fp->type, array->owner_prog_type);
1660
1661 return false;
1662 }
1663
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 28326 bytes --]
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2019-02-25 20:11 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-25 15:54 [PATCH 1/2] bpf: context casting for tail call Kris Van Hees
2019-02-25 20:10 ` kbuild test robot
2019-02-25 20:10 ` kbuild test robot
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).