From: Thierry Reding <thierry.reding@gmail.com> To: Cai Huoqing <cai.huoqing@linux.dev> Cc: "Thomas Zimmermann" <tzimmermann@suse.de>, "David Airlie" <airlied@linux.ie>, linux-kernel@vger.kernel.org, "Christian König" <christian.koenig@amd.com>, linaro-mm-sig@lists.linaro.org, dri-devel@lists.freedesktop.org, "Sumit Semwal" <sumit.semwal@linaro.org>, linux-media@vger.kernel.org Subject: Re: [PATCH v2 2/4] drm/nvdla: Add driver support for NVDLA Date: Thu, 28 Apr 2022 17:18:13 +0200 [thread overview] Message-ID: <YmqwNVoTZZFaIM9S@orome> (raw) In-Reply-To: <20220426060808.78225-3-cai.huoqing@linux.dev> [-- Attachment #1: Type: text/plain, Size: 12733 bytes --] On Tue, Apr 26, 2022 at 02:07:59PM +0800, Cai Huoqing wrote: [...] > diff --git a/drivers/gpu/drm/nvdla/nvdla_drv.c b/drivers/gpu/drm/nvdla/nvdla_drv.c I'll look at this from an architectural level and leave it to other experts to review the more technical things. [...] > +static struct nvdla_config nvdla_config_os_initial = { > + .atom_size = 32, > + .bdma_enable = true, > + .rubik_enable = true, > + .weight_compress_support = true, > +}; > + > +static struct nvdla_config nvdla_config_small = { > + //.atom_size = 8, > + .atom_size = 32, // nv_large config > + .bdma_enable = false, > + .rubik_enable = false, > + .weight_compress_support = false, > +}; > + [...] > +static union nvdla_operation_container operation_desc[NVDLA_OP_NUM][NVDLA_NUM_GROUPS]; > +static union nvdla_surface_container surface_desc[NVDLA_OP_NUM][NVDLA_NUM_GROUPS]; > + > +static struct nvdla_task_desc global_task; > + > +static struct nvdla_engine engine = { > + .processors[NVDLA_OP_BDMA] = { > + .name = "BDMA", > + .op_type = NVDLA_OP_BDMA, > + .program = nvdla_bdma_program, > + .enable = nvdla_bdma_enable, > + .set_producer = nvdla_bdma_set_producer, > + .is_ready = nvdla_bdma_is_ready, > + .dump_config = nvdla_bdma_dump_config, > + .rdma_check = nvdla_bdma_rdma_check, > + .consumer_ptr = 0, > + .roi_index = 0, > + .group_status = 0, > + .rdma_status = 0, > + .last_group = 1, > + .groups[0] = { > + .id = 0, > + .rdma_id = 0, > + .active = 0, > + .events = 0, > + .roi_index = 0, > + .is_rdma_needed = 0, > + .lut_index = -1, > + .operation_desc = &operation_desc[NVDLA_OP_BDMA][0], > + .surface_desc = &surface_desc[NVDLA_OP_BDMA][0], > + }, > + .groups[1] = { > + .id = 1, > + .rdma_id = 0, > + .active = 0, > + .events = 0, > + .roi_index = 0, > + .is_rdma_needed = 0, > + .lut_index = -1, > + .operation_desc = &operation_desc[NVDLA_OP_BDMA][1], > + .surface_desc = &surface_desc[NVDLA_OP_BDMA][1], > + }, > + }, > + .processors[NVDLA_OP_CONV] = { > + .name = "Convolution", > + .op_type = NVDLA_OP_CONV, > + .program = nvdla_conv_program, > + .enable = nvdla_conv_enable, > + .set_producer = nvdla_conv_set_producer, > + .is_ready = nvdla_conv_is_ready, > + .dump_config = nvdla_conv_dump_config, > + .rdma_check = nvdla_conv_rdma_check, > + .consumer_ptr = 0, > + .roi_index = 0, > + .group_status = 0, > + .rdma_status = 0, > + .last_group = 1, > + .groups[0] = { > + .id = 0, > + .rdma_id = 0, > + .active = 0, > + .events = 0, > + .roi_index = 0, > + .is_rdma_needed = 0, > + .lut_index = -1, > + .operation_desc = &operation_desc[NVDLA_OP_CONV][0], > + .surface_desc = &surface_desc[NVDLA_OP_CONV][0], > + }, > + .groups[1] = { > + .id = 1, > + .rdma_id = 0, > + .active = 0, > + .events = 0, > + .roi_index = 0, > + .is_rdma_needed = 0, > + .lut_index = -1, > + .operation_desc = &operation_desc[NVDLA_OP_CONV][1], > + .surface_desc = &surface_desc[NVDLA_OP_CONV][1], > + }, > + }, > + .processors[NVDLA_OP_SDP] = { > + .name = "SDP", > + .op_type = NVDLA_OP_SDP, > + .program = nvdla_sdp_program, > + .enable = nvdla_sdp_enable, > + .set_producer = nvdla_sdp_set_producer, > + .is_ready = nvdla_sdp_is_ready, > + .dump_config = nvdla_sdp_dump_config, > + .rdma_check = nvdla_sdp_rdma_check, > + .consumer_ptr = 0, > + .roi_index = 0, > + .group_status = 0, > + .rdma_status = 0, > + .last_group = 1, > + .groups[0] = { > + .id = 0, > + .rdma_id = 0, > + .active = 0, > + .events = 0, > + .roi_index = 0, > + .is_rdma_needed = 0, > + .lut_index = -1, > + .operation_desc = &operation_desc[NVDLA_OP_SDP][0], > + .surface_desc = &surface_desc[NVDLA_OP_SDP][0], > + }, > + .groups[1] = { > + .id = 1, > + .rdma_id = 0, > + .active = 0, > + .events = 0, > + .roi_index = 0, > + .is_rdma_needed = 0, > + .lut_index = -1, > + .operation_desc = &operation_desc[NVDLA_OP_SDP][1], > + .surface_desc = &surface_desc[NVDLA_OP_SDP][1], > + }, > + }, > + .processors[NVDLA_OP_PDP] = { > + .name = "PDP", > + .op_type = NVDLA_OP_PDP, > + .program = nvdla_pdp_program, > + .enable = nvdla_pdp_enable, > + .set_producer = nvdla_pdp_set_producer, > + .is_ready = nvdla_pdp_is_ready, > + .dump_config = nvdla_pdp_dump_config, > + .rdma_check = nvdla_pdp_rdma_check, > + .consumer_ptr = 0, > + .roi_index = 0, > + .group_status = 0, > + .rdma_status = 0, > + .last_group = 1, > + .groups[0] = { > + .id = 0, > + .rdma_id = 0, > + .active = 0, > + .events = 0, > + .roi_index = 0, > + .is_rdma_needed = 0, > + .lut_index = -1, > + .operation_desc = &operation_desc[NVDLA_OP_PDP][0], > + .surface_desc = &surface_desc[NVDLA_OP_PDP][0], > + }, > + .groups[1] = { > + .id = 1, > + .rdma_id = 0, > + .active = 0, > + .events = 0, > + .roi_index = 0, > + .is_rdma_needed = 0, > + .lut_index = -1, > + .operation_desc = &operation_desc[NVDLA_OP_PDP][1], > + .surface_desc = &surface_desc[NVDLA_OP_PDP][1], > + }, > + }, > + .processors[NVDLA_OP_CDP] = { > + .name = "CDP", > + .op_type = NVDLA_OP_CDP, > + .program = nvdla_cdp_program, > + .enable = nvdla_cdp_enable, > + .set_producer = nvdla_cdp_set_producer, > + .is_ready = nvdla_cdp_is_ready, > + .dump_config = nvdla_cdp_dump_config, > + .rdma_check = nvdla_cdp_rdma_check, > + .consumer_ptr = 0, > + .roi_index = 0, > + .group_status = 0, > + .rdma_status = 0, > + .last_group = 1, > + .groups[0] = { > + .id = 0, > + .rdma_id = 0, > + .active = 0, > + .events = 0, > + .roi_index = 0, > + .is_rdma_needed = 0, > + .lut_index = -1, > + .operation_desc = &operation_desc[NVDLA_OP_CDP][0], > + .surface_desc = &surface_desc[NVDLA_OP_CDP][0], > + }, > + .groups[1] = { > + .id = 1, > + .rdma_id = 0, > + .active = 0, > + .events = 0, > + .roi_index = 0, > + .is_rdma_needed = 0, > + .lut_index = -1, > + .operation_desc = &operation_desc[NVDLA_OP_CDP][1], > + .surface_desc = &surface_desc[NVDLA_OP_CDP][1], > + }, > + }, > + > + .processors[NVDLA_OP_RUBIK] = { > + .name = "RUBIK", > + .op_type = NVDLA_OP_RUBIK, > + .program = nvdla_rubik_program, > + .enable = nvdla_rubik_enable, > + .set_producer = nvdla_rubik_set_producer, > + .is_ready = nvdla_rubik_is_ready, > + .dump_config = nvdla_rubik_dump_config, > + .rdma_check = nvdla_rubik_rdma_check, > + .consumer_ptr = 0, > + .roi_index = 0, > + .group_status = 0, > + .rdma_status = 0, > + .last_group = 1, > + .groups[0] = { > + .id = 0, > + .rdma_id = 0, > + .active = 0, > + .events = 0, > + .roi_index = 0, > + .is_rdma_needed = 0, > + .lut_index = -1, > + .operation_desc = &operation_desc[NVDLA_OP_RUBIK][0], > + .surface_desc = &surface_desc[NVDLA_OP_RUBIK][0], > + }, > + .groups[1] = { > + .id = 1, > + .rdma_id = 0, > + .active = 0, > + .events = 0, > + .roi_index = 0, > + .is_rdma_needed = 0, > + .lut_index = -1, > + .operation_desc = &operation_desc[NVDLA_OP_RUBIK][1], > + .surface_desc = &surface_desc[NVDLA_OP_RUBIK][1], > + }, > + }, > + > +}; These global variables aren't going to work because Tegra234 (Tegra194's successor) has two instances of NVDLA. So this data needs to be allocated dynamically, per instance. It also seems to me like some of this data could be separated into read-only (i.e. const) and variable data. For instance none of the function pointers ever seem to change, so those could be refactored into an "ops" structure and then passed by pointer into the engines. That might also be useful if we need to make some of these implementations different on subsequent generations of the IP. > +/* driver probe and init */ > +static const struct of_device_id nvdla_of_match[] = { > + { > + .compatible = "nvidia,nvdla_os_initial", > + .data = &nvdla_config_os_initial, > + }, > + { > + .compatible = "nvidia,nvdla_2", > + .data = &nvdla_config_small, > + }, > + { }, > +}; Looking at the definition of these configuration structures these seem to be software configuration variants rather than actually different hardware blocks. In fact, none of the above seem to be specific to any particular hardware version. I would expect these compatible strings to reflect the exact chip that they can be found in. I.e. for Tegra194 I would expect this to be just: "nvidia,tegra194-dla" And then on Tegra234 it would become: "nvidia,tegra234-dla" at which point the parameterization can happen based on that compatible string, if we need to. Also, and I think Peter Robinson already mentioned that, we'll need the device tree bindings for this as well as device tree changes that create this device on one of the platforms supported upstream so that we can test this. I think in your first version of these patches you had also made a reference to the userspace driver component that you've used to test this on. Can you include links to those in the cover letter of subsequent versions so that people know where to look? > + > +static int32_t nvdla_probe(struct platform_device *pdev) We generally don't use the sized types unless absolutely necessary. So this (and potentially other occurrences) should just switch to simple "int". > +{ > + int32_t err = 0; > + struct resource *res; > + struct nvdla_device *nvdla_dev; > + struct device *dev = &pdev->dev; > + const struct of_device_id *match; > + > + if (!pdev->dev.of_node) > + return -EINVAL; > + > + match = of_match_device(nvdla_of_match, &pdev->dev); > + if (!match) { > + pr_err("Missing DT entry!\n"); > + return -EINVAL; > + } There's usually no need for this check. If there was no device tree entry then the nvdla_probe() function wouldn't have been called. > + > + nvdla_dev = devm_kzalloc(dev, sizeof(*nvdla_dev), GFP_KERNEL); > + if (!nvdla_dev) > + return -ENOMEM; > + > + platform_set_drvdata(pdev, nvdla_dev); > + nvdla_dev->pdev = pdev; Do we really need the platform device later on? Typically access to the struct device (i.e. &pdev->dev) is enough. > + nvdla_dev->config_data = (struct nvdla_config *)match->data; You can get this in a single call to of_device_get_match_data(), so you don't need the extra match local variable. > + > + init_completion(&nvdla_dev->event_notifier); > + > + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); > + nvdla_dev->base = devm_ioremap_resource(&pdev->dev, res); I think people prefer devm_platform_get_and_ioremap_resource() these days... > + if (IS_ERR(nvdla_dev->base)) > + return PTR_ERR(nvdla_dev->base); > + > + res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); > + if (!res) { > + dev_err(&pdev->dev, "no irq resource\n"); > + return -EINVAL; > + } > + nvdla_dev->irq = res->start; You can use platform_get_irq() here, which makes this a bit simpler. > + > + err = devm_request_irq(&pdev->dev, nvdla_dev->irq, > + nvdla_engine_isr, 0, > + dev_name(&pdev->dev), nvdla_dev); > + if (err) > + return err; > + > + nvdla_dev->engine_context = &engine; > + engine.task = &global_task; > + engine.driver_context = (void *)nvdla_dev; Why the generic void *? Can't we just store a struct nvdla_device * as the driver context directly? Do we expect this type to vary? What I'm missing in this ->probe() function is references to the various clocks, resets and power gates that need to be controlled in order for this IP to work. The power gates are typically implemented using generic power domains, so you can safely ignore those, provided you at least have them in the device tree, but I think you'd need to at the very least grab the clocks and resets that this needs. This might work without them for a while because Tegra194 and later do some hidden magic with clocks and resets, but for any finer-grained control we'll need them (for instance, we will want to switch into a low-power state when no tasks are being processed). Thierry > + engine.task->task_data = NULL; > + > + nvdla_init_op_cache(&engine); > + nvdla_clear_task(nvdla_dev->engine_context); > + > + err = nvdla_drm_probe(nvdla_dev); > + if (err) > + dev_err(&pdev->dev, "failed to register drm device\n"); > + > + return err; > +} > + > +static int32_t __exit nvdla_remove(struct platform_device *pdev) > +{ > + struct nvdla_device *nvdla_dev = dev_get_drvdata(&pdev->dev); > + > + nvdla_drm_remove(nvdla_dev); > + > + return 0; > +} > + > +static struct platform_driver nvdla_driver = { > + .probe = nvdla_probe, > + .remove = __exit_p(nvdla_remove), > + .driver = { > + .owner = THIS_MODULE, > + .name = "NVDLA", > + .of_match_table = of_match_ptr(nvdla_of_match), > + }, > +}; > +module_platform_driver(nvdla_driver); > + > +MODULE_LICENSE("Dual BSD/GPL"); > +MODULE_DESCRIPTION("Nvidia Deep Learning Accelerator driver"); > +MODULE_IMPORT_NS(DMA_BUF); [-- Attachment #2: signature.asc --] [-- Type: application/pgp-signature, Size: 833 bytes --]
WARNING: multiple messages have this Message-ID (diff)
From: Thierry Reding <thierry.reding@gmail.com> To: Cai Huoqing <cai.huoqing@linux.dev> Cc: "David Airlie" <airlied@linux.ie>, linux-kernel@vger.kernel.org, dri-devel@lists.freedesktop.org, "Sumit Semwal" <sumit.semwal@linaro.org>, linaro-mm-sig@lists.linaro.org, "Thomas Zimmermann" <tzimmermann@suse.de>, "Christian König" <christian.koenig@amd.com>, linux-media@vger.kernel.org Subject: Re: [PATCH v2 2/4] drm/nvdla: Add driver support for NVDLA Date: Thu, 28 Apr 2022 17:18:13 +0200 [thread overview] Message-ID: <YmqwNVoTZZFaIM9S@orome> (raw) In-Reply-To: <20220426060808.78225-3-cai.huoqing@linux.dev> [-- Attachment #1: Type: text/plain, Size: 12733 bytes --] On Tue, Apr 26, 2022 at 02:07:59PM +0800, Cai Huoqing wrote: [...] > diff --git a/drivers/gpu/drm/nvdla/nvdla_drv.c b/drivers/gpu/drm/nvdla/nvdla_drv.c I'll look at this from an architectural level and leave it to other experts to review the more technical things. [...] > +static struct nvdla_config nvdla_config_os_initial = { > + .atom_size = 32, > + .bdma_enable = true, > + .rubik_enable = true, > + .weight_compress_support = true, > +}; > + > +static struct nvdla_config nvdla_config_small = { > + //.atom_size = 8, > + .atom_size = 32, // nv_large config > + .bdma_enable = false, > + .rubik_enable = false, > + .weight_compress_support = false, > +}; > + [...] > +static union nvdla_operation_container operation_desc[NVDLA_OP_NUM][NVDLA_NUM_GROUPS]; > +static union nvdla_surface_container surface_desc[NVDLA_OP_NUM][NVDLA_NUM_GROUPS]; > + > +static struct nvdla_task_desc global_task; > + > +static struct nvdla_engine engine = { > + .processors[NVDLA_OP_BDMA] = { > + .name = "BDMA", > + .op_type = NVDLA_OP_BDMA, > + .program = nvdla_bdma_program, > + .enable = nvdla_bdma_enable, > + .set_producer = nvdla_bdma_set_producer, > + .is_ready = nvdla_bdma_is_ready, > + .dump_config = nvdla_bdma_dump_config, > + .rdma_check = nvdla_bdma_rdma_check, > + .consumer_ptr = 0, > + .roi_index = 0, > + .group_status = 0, > + .rdma_status = 0, > + .last_group = 1, > + .groups[0] = { > + .id = 0, > + .rdma_id = 0, > + .active = 0, > + .events = 0, > + .roi_index = 0, > + .is_rdma_needed = 0, > + .lut_index = -1, > + .operation_desc = &operation_desc[NVDLA_OP_BDMA][0], > + .surface_desc = &surface_desc[NVDLA_OP_BDMA][0], > + }, > + .groups[1] = { > + .id = 1, > + .rdma_id = 0, > + .active = 0, > + .events = 0, > + .roi_index = 0, > + .is_rdma_needed = 0, > + .lut_index = -1, > + .operation_desc = &operation_desc[NVDLA_OP_BDMA][1], > + .surface_desc = &surface_desc[NVDLA_OP_BDMA][1], > + }, > + }, > + .processors[NVDLA_OP_CONV] = { > + .name = "Convolution", > + .op_type = NVDLA_OP_CONV, > + .program = nvdla_conv_program, > + .enable = nvdla_conv_enable, > + .set_producer = nvdla_conv_set_producer, > + .is_ready = nvdla_conv_is_ready, > + .dump_config = nvdla_conv_dump_config, > + .rdma_check = nvdla_conv_rdma_check, > + .consumer_ptr = 0, > + .roi_index = 0, > + .group_status = 0, > + .rdma_status = 0, > + .last_group = 1, > + .groups[0] = { > + .id = 0, > + .rdma_id = 0, > + .active = 0, > + .events = 0, > + .roi_index = 0, > + .is_rdma_needed = 0, > + .lut_index = -1, > + .operation_desc = &operation_desc[NVDLA_OP_CONV][0], > + .surface_desc = &surface_desc[NVDLA_OP_CONV][0], > + }, > + .groups[1] = { > + .id = 1, > + .rdma_id = 0, > + .active = 0, > + .events = 0, > + .roi_index = 0, > + .is_rdma_needed = 0, > + .lut_index = -1, > + .operation_desc = &operation_desc[NVDLA_OP_CONV][1], > + .surface_desc = &surface_desc[NVDLA_OP_CONV][1], > + }, > + }, > + .processors[NVDLA_OP_SDP] = { > + .name = "SDP", > + .op_type = NVDLA_OP_SDP, > + .program = nvdla_sdp_program, > + .enable = nvdla_sdp_enable, > + .set_producer = nvdla_sdp_set_producer, > + .is_ready = nvdla_sdp_is_ready, > + .dump_config = nvdla_sdp_dump_config, > + .rdma_check = nvdla_sdp_rdma_check, > + .consumer_ptr = 0, > + .roi_index = 0, > + .group_status = 0, > + .rdma_status = 0, > + .last_group = 1, > + .groups[0] = { > + .id = 0, > + .rdma_id = 0, > + .active = 0, > + .events = 0, > + .roi_index = 0, > + .is_rdma_needed = 0, > + .lut_index = -1, > + .operation_desc = &operation_desc[NVDLA_OP_SDP][0], > + .surface_desc = &surface_desc[NVDLA_OP_SDP][0], > + }, > + .groups[1] = { > + .id = 1, > + .rdma_id = 0, > + .active = 0, > + .events = 0, > + .roi_index = 0, > + .is_rdma_needed = 0, > + .lut_index = -1, > + .operation_desc = &operation_desc[NVDLA_OP_SDP][1], > + .surface_desc = &surface_desc[NVDLA_OP_SDP][1], > + }, > + }, > + .processors[NVDLA_OP_PDP] = { > + .name = "PDP", > + .op_type = NVDLA_OP_PDP, > + .program = nvdla_pdp_program, > + .enable = nvdla_pdp_enable, > + .set_producer = nvdla_pdp_set_producer, > + .is_ready = nvdla_pdp_is_ready, > + .dump_config = nvdla_pdp_dump_config, > + .rdma_check = nvdla_pdp_rdma_check, > + .consumer_ptr = 0, > + .roi_index = 0, > + .group_status = 0, > + .rdma_status = 0, > + .last_group = 1, > + .groups[0] = { > + .id = 0, > + .rdma_id = 0, > + .active = 0, > + .events = 0, > + .roi_index = 0, > + .is_rdma_needed = 0, > + .lut_index = -1, > + .operation_desc = &operation_desc[NVDLA_OP_PDP][0], > + .surface_desc = &surface_desc[NVDLA_OP_PDP][0], > + }, > + .groups[1] = { > + .id = 1, > + .rdma_id = 0, > + .active = 0, > + .events = 0, > + .roi_index = 0, > + .is_rdma_needed = 0, > + .lut_index = -1, > + .operation_desc = &operation_desc[NVDLA_OP_PDP][1], > + .surface_desc = &surface_desc[NVDLA_OP_PDP][1], > + }, > + }, > + .processors[NVDLA_OP_CDP] = { > + .name = "CDP", > + .op_type = NVDLA_OP_CDP, > + .program = nvdla_cdp_program, > + .enable = nvdla_cdp_enable, > + .set_producer = nvdla_cdp_set_producer, > + .is_ready = nvdla_cdp_is_ready, > + .dump_config = nvdla_cdp_dump_config, > + .rdma_check = nvdla_cdp_rdma_check, > + .consumer_ptr = 0, > + .roi_index = 0, > + .group_status = 0, > + .rdma_status = 0, > + .last_group = 1, > + .groups[0] = { > + .id = 0, > + .rdma_id = 0, > + .active = 0, > + .events = 0, > + .roi_index = 0, > + .is_rdma_needed = 0, > + .lut_index = -1, > + .operation_desc = &operation_desc[NVDLA_OP_CDP][0], > + .surface_desc = &surface_desc[NVDLA_OP_CDP][0], > + }, > + .groups[1] = { > + .id = 1, > + .rdma_id = 0, > + .active = 0, > + .events = 0, > + .roi_index = 0, > + .is_rdma_needed = 0, > + .lut_index = -1, > + .operation_desc = &operation_desc[NVDLA_OP_CDP][1], > + .surface_desc = &surface_desc[NVDLA_OP_CDP][1], > + }, > + }, > + > + .processors[NVDLA_OP_RUBIK] = { > + .name = "RUBIK", > + .op_type = NVDLA_OP_RUBIK, > + .program = nvdla_rubik_program, > + .enable = nvdla_rubik_enable, > + .set_producer = nvdla_rubik_set_producer, > + .is_ready = nvdla_rubik_is_ready, > + .dump_config = nvdla_rubik_dump_config, > + .rdma_check = nvdla_rubik_rdma_check, > + .consumer_ptr = 0, > + .roi_index = 0, > + .group_status = 0, > + .rdma_status = 0, > + .last_group = 1, > + .groups[0] = { > + .id = 0, > + .rdma_id = 0, > + .active = 0, > + .events = 0, > + .roi_index = 0, > + .is_rdma_needed = 0, > + .lut_index = -1, > + .operation_desc = &operation_desc[NVDLA_OP_RUBIK][0], > + .surface_desc = &surface_desc[NVDLA_OP_RUBIK][0], > + }, > + .groups[1] = { > + .id = 1, > + .rdma_id = 0, > + .active = 0, > + .events = 0, > + .roi_index = 0, > + .is_rdma_needed = 0, > + .lut_index = -1, > + .operation_desc = &operation_desc[NVDLA_OP_RUBIK][1], > + .surface_desc = &surface_desc[NVDLA_OP_RUBIK][1], > + }, > + }, > + > +}; These global variables aren't going to work because Tegra234 (Tegra194's successor) has two instances of NVDLA. So this data needs to be allocated dynamically, per instance. It also seems to me like some of this data could be separated into read-only (i.e. const) and variable data. For instance none of the function pointers ever seem to change, so those could be refactored into an "ops" structure and then passed by pointer into the engines. That might also be useful if we need to make some of these implementations different on subsequent generations of the IP. > +/* driver probe and init */ > +static const struct of_device_id nvdla_of_match[] = { > + { > + .compatible = "nvidia,nvdla_os_initial", > + .data = &nvdla_config_os_initial, > + }, > + { > + .compatible = "nvidia,nvdla_2", > + .data = &nvdla_config_small, > + }, > + { }, > +}; Looking at the definition of these configuration structures these seem to be software configuration variants rather than actually different hardware blocks. In fact, none of the above seem to be specific to any particular hardware version. I would expect these compatible strings to reflect the exact chip that they can be found in. I.e. for Tegra194 I would expect this to be just: "nvidia,tegra194-dla" And then on Tegra234 it would become: "nvidia,tegra234-dla" at which point the parameterization can happen based on that compatible string, if we need to. Also, and I think Peter Robinson already mentioned that, we'll need the device tree bindings for this as well as device tree changes that create this device on one of the platforms supported upstream so that we can test this. I think in your first version of these patches you had also made a reference to the userspace driver component that you've used to test this on. Can you include links to those in the cover letter of subsequent versions so that people know where to look? > + > +static int32_t nvdla_probe(struct platform_device *pdev) We generally don't use the sized types unless absolutely necessary. So this (and potentially other occurrences) should just switch to simple "int". > +{ > + int32_t err = 0; > + struct resource *res; > + struct nvdla_device *nvdla_dev; > + struct device *dev = &pdev->dev; > + const struct of_device_id *match; > + > + if (!pdev->dev.of_node) > + return -EINVAL; > + > + match = of_match_device(nvdla_of_match, &pdev->dev); > + if (!match) { > + pr_err("Missing DT entry!\n"); > + return -EINVAL; > + } There's usually no need for this check. If there was no device tree entry then the nvdla_probe() function wouldn't have been called. > + > + nvdla_dev = devm_kzalloc(dev, sizeof(*nvdla_dev), GFP_KERNEL); > + if (!nvdla_dev) > + return -ENOMEM; > + > + platform_set_drvdata(pdev, nvdla_dev); > + nvdla_dev->pdev = pdev; Do we really need the platform device later on? Typically access to the struct device (i.e. &pdev->dev) is enough. > + nvdla_dev->config_data = (struct nvdla_config *)match->data; You can get this in a single call to of_device_get_match_data(), so you don't need the extra match local variable. > + > + init_completion(&nvdla_dev->event_notifier); > + > + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); > + nvdla_dev->base = devm_ioremap_resource(&pdev->dev, res); I think people prefer devm_platform_get_and_ioremap_resource() these days... > + if (IS_ERR(nvdla_dev->base)) > + return PTR_ERR(nvdla_dev->base); > + > + res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); > + if (!res) { > + dev_err(&pdev->dev, "no irq resource\n"); > + return -EINVAL; > + } > + nvdla_dev->irq = res->start; You can use platform_get_irq() here, which makes this a bit simpler. > + > + err = devm_request_irq(&pdev->dev, nvdla_dev->irq, > + nvdla_engine_isr, 0, > + dev_name(&pdev->dev), nvdla_dev); > + if (err) > + return err; > + > + nvdla_dev->engine_context = &engine; > + engine.task = &global_task; > + engine.driver_context = (void *)nvdla_dev; Why the generic void *? Can't we just store a struct nvdla_device * as the driver context directly? Do we expect this type to vary? What I'm missing in this ->probe() function is references to the various clocks, resets and power gates that need to be controlled in order for this IP to work. The power gates are typically implemented using generic power domains, so you can safely ignore those, provided you at least have them in the device tree, but I think you'd need to at the very least grab the clocks and resets that this needs. This might work without them for a while because Tegra194 and later do some hidden magic with clocks and resets, but for any finer-grained control we'll need them (for instance, we will want to switch into a low-power state when no tasks are being processed). Thierry > + engine.task->task_data = NULL; > + > + nvdla_init_op_cache(&engine); > + nvdla_clear_task(nvdla_dev->engine_context); > + > + err = nvdla_drm_probe(nvdla_dev); > + if (err) > + dev_err(&pdev->dev, "failed to register drm device\n"); > + > + return err; > +} > + > +static int32_t __exit nvdla_remove(struct platform_device *pdev) > +{ > + struct nvdla_device *nvdla_dev = dev_get_drvdata(&pdev->dev); > + > + nvdla_drm_remove(nvdla_dev); > + > + return 0; > +} > + > +static struct platform_driver nvdla_driver = { > + .probe = nvdla_probe, > + .remove = __exit_p(nvdla_remove), > + .driver = { > + .owner = THIS_MODULE, > + .name = "NVDLA", > + .of_match_table = of_match_ptr(nvdla_of_match), > + }, > +}; > +module_platform_driver(nvdla_driver); > + > +MODULE_LICENSE("Dual BSD/GPL"); > +MODULE_DESCRIPTION("Nvidia Deep Learning Accelerator driver"); > +MODULE_IMPORT_NS(DMA_BUF); [-- Attachment #2: signature.asc --] [-- Type: application/pgp-signature, Size: 833 bytes --]
next prev parent reply other threads:[~2022-04-28 15:18 UTC|newest] Thread overview: 52+ messages / expand[flat|nested] mbox.gz Atom feed top 2022-04-26 6:07 [PATCH v2 0/4] drm/nvdla: Add driver support for NVDLA Cai Huoqing 2022-04-26 6:07 ` Cai Huoqing 2022-04-26 6:07 ` [PATCH v2 1/4] MAINTAINERS: Add the driver info of the NVDLA Cai Huoqing 2022-04-26 6:07 ` Cai Huoqing 2022-04-26 6:07 ` [PATCH v2 2/4] drm/nvdla: Add driver support for NVDLA Cai Huoqing 2022-04-26 6:07 ` Cai Huoqing 2022-04-28 15:18 ` Thierry Reding [this message] 2022-04-28 15:18 ` Thierry Reding 2022-04-28 15:19 ` Thierry Reding 2022-04-28 15:19 ` Thierry Reding 2022-04-26 6:08 ` [PATCH v2 3/4] drm/nvdla: Add register head file of NVDLA Cai Huoqing 2022-04-26 6:08 ` Cai Huoqing 2022-04-28 14:31 ` Thierry Reding 2022-04-28 14:31 ` Thierry Reding 2022-04-26 6:08 ` [PATCH v2 4/4] drm/nvdla/uapi: Add UAPI of NVDLA driver Cai Huoqing 2022-04-26 6:08 ` Cai Huoqing 2022-04-26 6:31 ` Christian König 2022-04-26 6:31 ` Christian König 2022-04-26 8:23 ` Cai Huoqing 2022-04-26 8:23 ` Cai Huoqing 2022-04-26 8:29 ` Christian König 2022-04-26 8:29 ` Christian König 2022-04-28 14:45 ` Thierry Reding 2022-04-28 14:45 ` Thierry Reding 2022-04-29 3:58 ` Cai Huoqing 2022-04-29 3:58 ` Cai Huoqing 2022-04-26 10:50 ` [Linaro-mm-sig] " Arnd Bergmann 2022-04-26 10:50 ` Arnd Bergmann 2022-04-26 12:24 ` Cai Huoqing 2022-04-26 12:24 ` Cai Huoqing 2022-04-26 12:38 ` Arnd Bergmann 2022-04-26 12:38 ` Arnd Bergmann 2022-04-26 10:12 ` kernel test robot 2022-04-26 10:12 ` kernel test robot 2022-04-26 11:23 ` kernel test robot 2022-04-26 11:23 ` kernel test robot 2022-04-28 14:40 ` Thierry Reding 2022-04-28 14:40 ` Thierry Reding 2022-04-28 14:10 ` [PATCH v2 0/4] drm/nvdla: Add driver support for NVDLA Thierry Reding 2022-04-28 14:10 ` Thierry Reding 2022-04-28 15:56 ` Mikko Perttunen 2022-04-28 15:56 ` Mikko Perttunen 2022-04-28 16:35 ` Jon Hunter 2022-04-28 16:35 ` Jon Hunter 2022-04-29 3:37 ` Cai Huoqing 2022-04-29 3:37 ` Cai Huoqing 2022-04-29 3:28 ` Cai Huoqing 2022-04-29 3:28 ` Cai Huoqing 2022-05-02 17:04 ` Thierry Reding 2022-05-02 17:04 ` Thierry Reding 2022-05-07 9:05 ` Cai Huoqing 2022-05-07 9:05 ` Cai Huoqing
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=YmqwNVoTZZFaIM9S@orome \ --to=thierry.reding@gmail.com \ --cc=airlied@linux.ie \ --cc=cai.huoqing@linux.dev \ --cc=christian.koenig@amd.com \ --cc=dri-devel@lists.freedesktop.org \ --cc=linaro-mm-sig@lists.linaro.org \ --cc=linux-kernel@vger.kernel.org \ --cc=linux-media@vger.kernel.org \ --cc=sumit.semwal@linaro.org \ --cc=tzimmermann@suse.de \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.