Zhigang Gong writes: > The patchset LGTM and works well with beignet. The 80%+ performance regression issue in darktable also has been fixed > after this patchset applied and enable the atomic in L3 at beignet side. So, > > Reviewed-by: Zhigang Gong > > Thanks, > Zhigang Gong. > Thanks! >> -----Original Message----- >> From: Francisco Jerez [mailto:currojerez@riseup.net] >> Sent: Friday, May 29, 2015 9:44 PM >> To: intel-gfx@lists.freedesktop.org >> Cc: Ville Syrjälä; Zhigang Gong; Brad Volkin >> Subject: [PATCH 1/3] drm/i915: Fix command parser to validate multiple >> register access with the same command. >> >> Until now the software command checker assumed that commands could read >> or write at most a single register per packet. This is not necessarily the case, >> MI_LOAD_REGISTER_IMM expects a variable-length list of offset/value pairs >> and writes them in sequence. The previous code would only check whether >> the first entry was valid, effectively allowing userspace to write unrestricted >> registers of the MMIO space by sending a multi-register write with a legal first >> register, with potential security implications on Gen6 and 7 hardware. >> >> Fix it by extending the drm_i915_cmd_descriptor table to represent >> multi-register access and making validate_cmd() iterate for all register offsets >> present in the command packet. >> >> Signed-off-by: Francisco Jerez >> --- >> drivers/gpu/drm/i915/i915_cmd_parser.c | 74 >> ++++++++++++++++++++-------------- >> drivers/gpu/drm/i915/i915_drv.h | 5 +++ >> 2 files changed, 48 insertions(+), 31 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c >> b/drivers/gpu/drm/i915/i915_cmd_parser.c >> index 61ae8ff..c4a5f73 100644 >> --- a/drivers/gpu/drm/i915/i915_cmd_parser.c >> +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c >> @@ -123,7 +123,7 @@ static const struct drm_i915_cmd_descriptor >> common_cmds[] = { >> CMD( MI_SEMAPHORE_MBOX, SMI, !F, 0xFF, >> R ), >> CMD( MI_STORE_DWORD_INDEX, SMI, !F, 0xFF, >> R ), >> CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, >> W, >> - .reg = { .offset = 1, .mask = 0x007FFFFC } ), >> + .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ), >> CMD( MI_STORE_REGISTER_MEM(1), SMI, !F, 0xFF, >> W | B, >> .reg = { .offset = 1, .mask = 0x007FFFFC }, >> .bits = {{ >> @@ -939,7 +939,7 @@ bool i915_needs_cmd_parser(struct intel_engine_cs >> *ring) >> >> static bool check_cmd(const struct intel_engine_cs *ring, >> const struct drm_i915_cmd_descriptor *desc, >> - const u32 *cmd, >> + const u32 *cmd, u32 length, >> const bool is_master, >> bool *oacontrol_set) >> { >> @@ -955,38 +955,49 @@ static bool check_cmd(const struct intel_engine_cs >> *ring, >> } >> >> if (desc->flags & CMD_DESC_REGISTER) { >> - u32 reg_addr = cmd[desc->reg.offset] & desc->reg.mask; >> - >> /* >> - * OACONTROL requires some special handling for writes. We >> - * want to make sure that any batch which enables OA also >> - * disables it before the end of the batch. The goal is to >> - * prevent one process from snooping on the perf data from >> - * another process. To do that, we need to check the value >> - * that will be written to the register. Hence, limit >> - * OACONTROL writes to only MI_LOAD_REGISTER_IMM >> commands. >> + * Get the distance between individual register offset >> + * fields if the command can perform more than one >> + * access at a time. >> */ >> - if (reg_addr == OACONTROL) { >> - if (desc->cmd.value == MI_LOAD_REGISTER_MEM) { >> - DRM_DEBUG_DRIVER("CMD: Rejected LRM to >> OACONTROL\n"); >> - return false; >> + const u32 step = desc->reg.step ? desc->reg.step : length; >> + u32 offset; >> + >> + for (offset = desc->reg.offset; offset < length; >> + offset += step) { >> + const u32 reg_addr = cmd[offset] & desc->reg.mask; >> + >> + /* >> + * OACONTROL requires some special handling for >> + * writes. We want to make sure that any batch which >> + * enables OA also disables it before the end of the >> + * batch. The goal is to prevent one process from >> + * snooping on the perf data from another process. To do >> + * that, we need to check the value that will be written >> + * to the register. Hence, limit OACONTROL writes to >> + * only MI_LOAD_REGISTER_IMM commands. >> + */ >> + if (reg_addr == OACONTROL) { >> + if (desc->cmd.value == MI_LOAD_REGISTER_MEM) { >> + DRM_DEBUG_DRIVER("CMD: Rejected LRM to >> OACONTROL\n"); >> + return false; >> + } >> + >> + if (desc->cmd.value == MI_LOAD_REGISTER_IMM(1)) >> + *oacontrol_set = (cmd[offset + 1] != 0); >> } >> >> - if (desc->cmd.value == MI_LOAD_REGISTER_IMM(1)) >> - *oacontrol_set = (cmd[2] != 0); >> - } >> - >> - if (!valid_reg(ring->reg_table, >> - ring->reg_count, reg_addr)) { >> - if (!is_master || >> - !valid_reg(ring->master_reg_table, >> - ring->master_reg_count, >> - reg_addr)) { >> - DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in >> command: 0x%08X (ring=%d)\n", >> - reg_addr, >> - *cmd, >> - ring->id); >> - return false; >> + if (!valid_reg(ring->reg_table, >> + ring->reg_count, reg_addr)) { >> + if (!is_master || >> + !valid_reg(ring->master_reg_table, >> + ring->master_reg_count, >> + reg_addr)) { >> + DRM_DEBUG_DRIVER("CMD: Rejected register >> 0x%08X in command: 0x%08X (ring=%d)\n", >> + reg_addr, *cmd, >> + ring->id); >> + return false; >> + } >> } >> } >> } >> @@ -1110,7 +1121,8 @@ int i915_parse_cmds(struct intel_engine_cs *ring, >> break; >> } >> >> - if (!check_cmd(ring, desc, cmd, is_master, &oacontrol_set)) { >> + if (!check_cmd(ring, desc, cmd, length, is_master, >> + &oacontrol_set)) { >> ret = -EINVAL; >> break; >> } >> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h >> index 8ae6f7f..3850288 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.h >> +++ b/drivers/gpu/drm/i915/i915_drv.h >> @@ -2228,10 +2228,15 @@ struct drm_i915_cmd_descriptor { >> * Describes where to find a register address in the command to check >> * against the ring's register whitelist. Only valid if flags has the >> * CMD_DESC_REGISTER bit set. >> + * >> + * A non-zero step value implies that the command may access multiple >> + * registers in sequence (e.g. LRI), in that case step gives the >> + * distance in dwords between individual offset fields. >> */ >> struct { >> u32 offset; >> u32 mask; >> + u32 step; >> } reg; >> >> #define MAX_CMD_DESC_BITMASKS 3 >> -- >> 2.3.5