* [PATCH v5 rdma-next 1/6] RDMA/core: Create mmap database and cookie helper functions
2019-07-08 9:14 [PATCH v5 rdma-next 0/6] RDMA/qedr: Use the doorbell overflow recovery mechanism for RDMA Michal Kalderon
@ 2019-07-08 9:14 ` Michal Kalderon
2019-07-08 13:47 ` Gal Pressman
2019-07-09 7:02 ` Leon Romanovsky
2019-07-08 9:14 ` [PATCH v5 rdma-next 2/6] RDMA/efa: Use the common mmap_xa helpers Michal Kalderon
` (4 subsequent siblings)
5 siblings, 2 replies; 14+ messages in thread
From: Michal Kalderon @ 2019-07-08 9:14 UTC (permalink / raw)
To: michal.kalderon, ariel.elior, jgg, dledford, galpress
Cc: linux-rdma, davem, netdev
Create some common API's for adding entries to a xa_mmap.
Searching for an entry and freeing one.
The code was copied from the efa driver almost as is, just renamed
function to be generic and not efa specific.
Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
---
drivers/infiniband/core/device.c | 1 +
drivers/infiniband/core/rdma_core.c | 1 +
drivers/infiniband/core/uverbs_cmd.c | 1 +
drivers/infiniband/core/uverbs_main.c | 105 ++++++++++++++++++++++++++++++++++
include/rdma/ib_verbs.h | 32 +++++++++++
5 files changed, 140 insertions(+)
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 8a6ccb936dfe..a830c2c5d691 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -2521,6 +2521,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, map_mr_sg_pi);
SET_DEVICE_OP(dev_ops, map_phys_fmr);
SET_DEVICE_OP(dev_ops, mmap);
+ SET_DEVICE_OP(dev_ops, mmap_free);
SET_DEVICE_OP(dev_ops, modify_ah);
SET_DEVICE_OP(dev_ops, modify_cq);
SET_DEVICE_OP(dev_ops, modify_device);
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index ccf4d069c25c..7166741834c8 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -817,6 +817,7 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
rdma_restrack_del(&ucontext->res);
ib_dev->ops.dealloc_ucontext(ucontext);
+ rdma_user_mmap_entries_remove_free(ucontext);
kfree(ucontext);
ufile->ucontext = NULL;
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 7ddd0e5bc6b3..44c0600245e4 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -254,6 +254,7 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
mutex_init(&ucontext->per_mm_list_lock);
INIT_LIST_HEAD(&ucontext->per_mm_list);
+ xa_init(&ucontext->mmap_xa);
ret = get_unused_fd_flags(O_CLOEXEC);
if (ret < 0)
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 11c13c1381cf..37507cc27e8c 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -965,6 +965,111 @@ int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
}
EXPORT_SYMBOL(rdma_user_mmap_io);
+static inline u64
+rdma_user_mmap_get_key(const struct rdma_user_mmap_entry *entry)
+{
+ return (u64)entry->mmap_page << PAGE_SHIFT;
+}
+
+struct rdma_user_mmap_entry *
+rdma_user_mmap_entry_get(struct ib_ucontext *ucontext, u64 key, u64 len)
+{
+ struct rdma_user_mmap_entry *entry;
+ u64 mmap_page;
+
+ mmap_page = key >> PAGE_SHIFT;
+ if (mmap_page > U32_MAX)
+ return NULL;
+
+ entry = xa_load(&ucontext->mmap_xa, mmap_page);
+ if (!entry || rdma_user_mmap_get_key(entry) != key ||
+ entry->length != len)
+ return NULL;
+
+ ibdev_dbg(ucontext->device,
+ "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n",
+ entry->obj, key, entry->address, entry->length);
+
+ return entry;
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_get);
+
+/*
+ * Note this locking scheme cannot support removal of entries, except during
+ * ucontext destruction when the core code guarentees no concurrency.
+ */
+u64 rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, void *obj,
+ u64 address, u64 length, u8 mmap_flag)
+{
+ struct rdma_user_mmap_entry *entry;
+ u32 next_mmap_page;
+ int err;
+
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return RDMA_USER_MMAP_INVALID;
+
+ entry->obj = obj;
+ entry->address = address;
+ entry->length = length;
+ entry->mmap_flag = mmap_flag;
+
+ xa_lock(&ucontext->mmap_xa);
+ if (check_add_overflow(ucontext->mmap_xa_page,
+ (u32)(length >> PAGE_SHIFT),
+ &next_mmap_page))
+ goto err_unlock;
+
+ entry->mmap_page = ucontext->mmap_xa_page;
+ ucontext->mmap_xa_page = next_mmap_page;
+ err = __xa_insert(&ucontext->mmap_xa, entry->mmap_page, entry,
+ GFP_KERNEL);
+ if (err)
+ goto err_unlock;
+
+ xa_unlock(&ucontext->mmap_xa);
+
+ ibdev_dbg(ucontext->device,
+ "mmap: obj[0x%p] addr[%#llx], len[%#llx], key[%#llx] inserted\n",
+ entry->obj, entry->address, entry->length,
+ rdma_user_mmap_get_key(entry));
+
+ return rdma_user_mmap_get_key(entry);
+
+err_unlock:
+ xa_unlock(&ucontext->mmap_xa);
+ kfree(entry);
+ return RDMA_USER_MMAP_INVALID;
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_insert);
+
+/*
+ * This is only called when the ucontext is destroyed and there can be no
+ * concurrent query via mmap or allocate on the xarray, thus we can be sure no
+ * other thread is using the entry pointer. We also know that all the BAR
+ * pages have either been zap'd or munmaped at this point. Normal pages are
+ * refcounted and will be freed at the proper time.
+ */
+void rdma_user_mmap_entries_remove_free(struct ib_ucontext *ucontext)
+{
+ struct rdma_user_mmap_entry *entry;
+ unsigned long mmap_page;
+
+ xa_for_each(&ucontext->mmap_xa, mmap_page, entry) {
+ xa_erase(&ucontext->mmap_xa, mmap_page);
+
+ ibdev_dbg(ucontext->device,
+ "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n",
+ entry->obj, rdma_user_mmap_get_key(entry),
+ entry->address, entry->length);
+ if (ucontext->device->ops.mmap_free)
+ ucontext->device->ops.mmap_free(entry->address,
+ entry->length,
+ entry->mmap_flag);
+ kfree(entry);
+ }
+}
+
void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
{
struct rdma_umap_priv *priv, *next_priv;
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 26e9c2594913..54ce3fdae180 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1425,6 +1425,8 @@ struct ib_ucontext {
* Implementation details of the RDMA core, don't use in drivers:
*/
struct rdma_restrack_entry res;
+ struct xarray mmap_xa;
+ u32 mmap_xa_page;
};
struct ib_uobject {
@@ -2311,6 +2313,7 @@ struct ib_device_ops {
struct ib_udata *udata);
void (*dealloc_ucontext)(struct ib_ucontext *context);
int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma);
+ void (*mmap_free)(u64 address, u64 length, u8 mmap_flag);
void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
void (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
@@ -2706,9 +2709,23 @@ void ib_set_client_data(struct ib_device *device, struct ib_client *client,
void ib_set_device_ops(struct ib_device *device,
const struct ib_device_ops *ops);
+#define RDMA_USER_MMAP_INVALID U64_MAX
+struct rdma_user_mmap_entry {
+ void *obj;
+ u64 address;
+ u64 length;
+ u32 mmap_page;
+ u8 mmap_flag;
+};
+
#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
unsigned long pfn, unsigned long size, pgprot_t prot);
+u64 rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, void *obj,
+ u64 address, u64 length, u8 mmap_flag);
+struct rdma_user_mmap_entry *
+rdma_user_mmap_entry_get(struct ib_ucontext *ucontext, u64 key, u64 len);
+void rdma_user_mmap_entries_remove_free(struct ib_ucontext *ucontext);
#else
static inline int rdma_user_mmap_io(struct ib_ucontext *ucontext,
struct vm_area_struct *vma,
@@ -2717,6 +2734,21 @@ static inline int rdma_user_mmap_io(struct ib_ucontext *ucontext,
{
return -EINVAL;
}
+
+static u64 rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, void *obj,
+ u64 address, u64 length, u8 mmap_flag)
+{
+ return RDMA_USER_MMAP_INVALID;
+}
+
+static struct rdma_user_mmap_entry *
+rdma_user_mmap_entry_get(struct ib_ucontext *ucontext, u64 key, u64 len)
+{
+ return NULL;
+}
+
+static void rdma_user_mmap_entries_remove_free(struct ib_ucontext *ucontext) {}
+
#endif
static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len)
--
2.14.5
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH v5 rdma-next 1/6] RDMA/core: Create mmap database and cookie helper functions
2019-07-08 9:14 ` [PATCH v5 rdma-next 1/6] RDMA/core: Create mmap database and cookie helper functions Michal Kalderon
@ 2019-07-08 13:47 ` Gal Pressman
2019-07-09 10:26 ` Michal Kalderon
2019-07-09 7:02 ` Leon Romanovsky
1 sibling, 1 reply; 14+ messages in thread
From: Gal Pressman @ 2019-07-08 13:47 UTC (permalink / raw)
To: Michal Kalderon, ariel.elior, jgg, dledford; +Cc: linux-rdma, davem, netdev
On 08/07/2019 12:14, Michal Kalderon wrote:
> diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
> index 8a6ccb936dfe..a830c2c5d691 100644
> --- a/drivers/infiniband/core/device.c
> +++ b/drivers/infiniband/core/device.c
> @@ -2521,6 +2521,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
> SET_DEVICE_OP(dev_ops, map_mr_sg_pi);
> SET_DEVICE_OP(dev_ops, map_phys_fmr);
> SET_DEVICE_OP(dev_ops, mmap);
> + SET_DEVICE_OP(dev_ops, mmap_free);
> SET_DEVICE_OP(dev_ops, modify_ah);
> SET_DEVICE_OP(dev_ops, modify_cq);
> SET_DEVICE_OP(dev_ops, modify_device);
> diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
> index ccf4d069c25c..7166741834c8 100644
> --- a/drivers/infiniband/core/rdma_core.c
> +++ b/drivers/infiniband/core/rdma_core.c
> @@ -817,6 +817,7 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
> rdma_restrack_del(&ucontext->res);
>
> ib_dev->ops.dealloc_ucontext(ucontext);
> + rdma_user_mmap_entries_remove_free(ucontext);
This should happen before dealloc_ucontext.
> +struct rdma_user_mmap_entry *
> +rdma_user_mmap_entry_get(struct ib_ucontext *ucontext, u64 key, u64 len)
> +{
> + struct rdma_user_mmap_entry *entry;
> + u64 mmap_page;
> +
> + mmap_page = key >> PAGE_SHIFT;
> + if (mmap_page > U32_MAX)
> + return NULL;
> +
> + entry = xa_load(&ucontext->mmap_xa, mmap_page);
> + if (!entry || rdma_user_mmap_get_key(entry) != key ||
I wonder if the 'rdma_user_mmap_get_key(entry) != key' check is still needed.
> +/*
> + * This is only called when the ucontext is destroyed and there can be no
> + * concurrent query via mmap or allocate on the xarray, thus we can be sure no
> + * other thread is using the entry pointer. We also know that all the BAR
> + * pages have either been zap'd or munmaped at this point. Normal pages are
> + * refcounted and will be freed at the proper time.
> + */
> +void rdma_user_mmap_entries_remove_free(struct ib_ucontext *ucontext)
> +{
> + struct rdma_user_mmap_entry *entry;
> + unsigned long mmap_page;
> +
> + xa_for_each(&ucontext->mmap_xa, mmap_page, entry) {
> + xa_erase(&ucontext->mmap_xa, mmap_page);
> +
> + ibdev_dbg(ucontext->device,
> + "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n",
> + entry->obj, rdma_user_mmap_get_key(entry),
> + entry->address, entry->length);
> + if (ucontext->device->ops.mmap_free)
> + ucontext->device->ops.mmap_free(entry->address,
> + entry->length,
> + entry->mmap_flag);
Pass entry instead?
> + kfree(entry);
> + }
> +}
> +
> void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
> {
> struct rdma_umap_priv *priv, *next_priv;
> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
> index 26e9c2594913..54ce3fdae180 100644
> --- a/include/rdma/ib_verbs.h
> +++ b/include/rdma/ib_verbs.h
> @@ -1425,6 +1425,8 @@ struct ib_ucontext {
> * Implementation details of the RDMA core, don't use in drivers:
> */
> struct rdma_restrack_entry res;
> + struct xarray mmap_xa;
> + u32 mmap_xa_page;
> };
>
> struct ib_uobject {
> @@ -2311,6 +2313,7 @@ struct ib_device_ops {
> struct ib_udata *udata);
> void (*dealloc_ucontext)(struct ib_ucontext *context);
> int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma);
> + void (*mmap_free)(u64 address, u64 length, u8 mmap_flag);
I feel like this callback needs some documentation.
> void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
> int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
> void (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
> @@ -2706,9 +2709,23 @@ void ib_set_client_data(struct ib_device *device, struct ib_client *client,
> void ib_set_device_ops(struct ib_device *device,
> const struct ib_device_ops *ops);
>
> +#define RDMA_USER_MMAP_INVALID U64_MAX
> +struct rdma_user_mmap_entry {
> + void *obj;
I know EFA is the culprit here, but please remove the extra space :).
> + u64 address;
> + u64 length;
> + u32 mmap_page;
> + u8 mmap_flag;
> +};
> +
^ permalink raw reply [flat|nested] 14+ messages in thread
* RE: [PATCH v5 rdma-next 1/6] RDMA/core: Create mmap database and cookie helper functions
2019-07-08 13:47 ` Gal Pressman
@ 2019-07-09 10:26 ` Michal Kalderon
0 siblings, 0 replies; 14+ messages in thread
From: Michal Kalderon @ 2019-07-09 10:26 UTC (permalink / raw)
To: Gal Pressman, Ariel Elior, jgg, dledford; +Cc: linux-rdma, davem, netdev
> From: linux-rdma-owner@vger.kernel.org <linux-rdma-
> owner@vger.kernel.org> On Behalf Of Gal Pressman
>
> On 08/07/2019 12:14, Michal Kalderon wrote:
> > diff --git a/drivers/infiniband/core/device.c
> > b/drivers/infiniband/core/device.c
> > index 8a6ccb936dfe..a830c2c5d691 100644
> > --- a/drivers/infiniband/core/device.c
> > +++ b/drivers/infiniband/core/device.c
> > @@ -2521,6 +2521,7 @@ void ib_set_device_ops(struct ib_device *dev,
> const struct ib_device_ops *ops)
> > SET_DEVICE_OP(dev_ops, map_mr_sg_pi);
> > SET_DEVICE_OP(dev_ops, map_phys_fmr);
> > SET_DEVICE_OP(dev_ops, mmap);
> > + SET_DEVICE_OP(dev_ops, mmap_free);
> > SET_DEVICE_OP(dev_ops, modify_ah);
> > SET_DEVICE_OP(dev_ops, modify_cq);
> > SET_DEVICE_OP(dev_ops, modify_device); diff --git
> > a/drivers/infiniband/core/rdma_core.c
> > b/drivers/infiniband/core/rdma_core.c
> > index ccf4d069c25c..7166741834c8 100644
> > --- a/drivers/infiniband/core/rdma_core.c
> > +++ b/drivers/infiniband/core/rdma_core.c
> > @@ -817,6 +817,7 @@ static void ufile_destroy_ucontext(struct
> ib_uverbs_file *ufile,
> > rdma_restrack_del(&ucontext->res);
> >
> > ib_dev->ops.dealloc_ucontext(ucontext);
> > + rdma_user_mmap_entries_remove_free(ucontext);
>
> This should happen before dealloc_ucontext.
Right, will fix.
>
> > +struct rdma_user_mmap_entry *
> > +rdma_user_mmap_entry_get(struct ib_ucontext *ucontext, u64 key, u64
> > +len) {
> > + struct rdma_user_mmap_entry *entry;
> > + u64 mmap_page;
> > +
> > + mmap_page = key >> PAGE_SHIFT;
> > + if (mmap_page > U32_MAX)
> > + return NULL;
> > +
> > + entry = xa_load(&ucontext->mmap_xa, mmap_page);
> > + if (!entry || rdma_user_mmap_get_key(entry) != key ||
>
> I wonder if the 'rdma_user_mmap_get_key(entry) != key' check is still
> needed.
I guess it's not since the key is used to get the entry. I'll remove the check
>
> > +/*
> > + * This is only called when the ucontext is destroyed and there can
> > +be no
> > + * concurrent query via mmap or allocate on the xarray, thus we can
> > +be sure no
> > + * other thread is using the entry pointer. We also know that all the
> > +BAR
> > + * pages have either been zap'd or munmaped at this point. Normal
> > +pages are
> > + * refcounted and will be freed at the proper time.
> > + */
> > +void rdma_user_mmap_entries_remove_free(struct ib_ucontext
> *ucontext)
> > +{
> > + struct rdma_user_mmap_entry *entry;
> > + unsigned long mmap_page;
> > +
> > + xa_for_each(&ucontext->mmap_xa, mmap_page, entry) {
> > + xa_erase(&ucontext->mmap_xa, mmap_page);
> > +
> > + ibdev_dbg(ucontext->device,
> > + "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx]
> removed\n",
> > + entry->obj, rdma_user_mmap_get_key(entry),
> > + entry->address, entry->length);
> > + if (ucontext->device->ops.mmap_free)
> > + ucontext->device->ops.mmap_free(entry->address,
> > + entry->length,
> > + entry->mmap_flag);
>
> Pass entry instead?
>
> > + kfree(entry);
> > + }
> > +}
> > +
> > void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) {
> > struct rdma_umap_priv *priv, *next_priv; diff --git
> > a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index
> > 26e9c2594913..54ce3fdae180 100644
> > --- a/include/rdma/ib_verbs.h
> > +++ b/include/rdma/ib_verbs.h
> > @@ -1425,6 +1425,8 @@ struct ib_ucontext {
> > * Implementation details of the RDMA core, don't use in drivers:
> > */
> > struct rdma_restrack_entry res;
> > + struct xarray mmap_xa;
> > + u32 mmap_xa_page;
> > };
> >
> > struct ib_uobject {
> > @@ -2311,6 +2313,7 @@ struct ib_device_ops {
> > struct ib_udata *udata);
> > void (*dealloc_ucontext)(struct ib_ucontext *context);
> > int (*mmap)(struct ib_ucontext *context, struct vm_area_struct
> > *vma);
> > + void (*mmap_free)(u64 address, u64 length, u8 mmap_flag);
>
> I feel like this callback needs some documentation.
>
> > void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
> > int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
> > void (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata); @@
> > -2706,9 +2709,23 @@ void ib_set_client_data(struct ib_device *device,
> > struct ib_client *client, void ib_set_device_ops(struct ib_device *device,
> > const struct ib_device_ops *ops);
> >
> > +#define RDMA_USER_MMAP_INVALID U64_MAX struct
> rdma_user_mmap_entry {
> > + void *obj;
>
> I know EFA is the culprit here, but please remove the extra space :).
>
> > + u64 address;
> > + u64 length;
> > + u32 mmap_page;
> > + u8 mmap_flag;
> > +};
> > +
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v5 rdma-next 1/6] RDMA/core: Create mmap database and cookie helper functions
2019-07-08 9:14 ` [PATCH v5 rdma-next 1/6] RDMA/core: Create mmap database and cookie helper functions Michal Kalderon
2019-07-08 13:47 ` Gal Pressman
@ 2019-07-09 7:02 ` Leon Romanovsky
2019-07-09 10:29 ` Michal Kalderon
1 sibling, 1 reply; 14+ messages in thread
From: Leon Romanovsky @ 2019-07-09 7:02 UTC (permalink / raw)
To: Michal Kalderon
Cc: ariel.elior, jgg, dledford, galpress, linux-rdma, davem, netdev
On Mon, Jul 08, 2019 at 12:14:58PM +0300, Michal Kalderon wrote:
> Create some common API's for adding entries to a xa_mmap.
> Searching for an entry and freeing one.
>
> The code was copied from the efa driver almost as is, just renamed
> function to be generic and not efa specific.
>
> Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
> ---
> drivers/infiniband/core/device.c | 1 +
> drivers/infiniband/core/rdma_core.c | 1 +
> drivers/infiniband/core/uverbs_cmd.c | 1 +
> drivers/infiniband/core/uverbs_main.c | 105 ++++++++++++++++++++++++++++++++++
> include/rdma/ib_verbs.h | 32 +++++++++++
> 5 files changed, 140 insertions(+)
>
> diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
> index 8a6ccb936dfe..a830c2c5d691 100644
> --- a/drivers/infiniband/core/device.c
> +++ b/drivers/infiniband/core/device.c
> @@ -2521,6 +2521,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
> SET_DEVICE_OP(dev_ops, map_mr_sg_pi);
> SET_DEVICE_OP(dev_ops, map_phys_fmr);
> SET_DEVICE_OP(dev_ops, mmap);
> + SET_DEVICE_OP(dev_ops, mmap_free);
> SET_DEVICE_OP(dev_ops, modify_ah);
> SET_DEVICE_OP(dev_ops, modify_cq);
> SET_DEVICE_OP(dev_ops, modify_device);
> diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
> index ccf4d069c25c..7166741834c8 100644
> --- a/drivers/infiniband/core/rdma_core.c
> +++ b/drivers/infiniband/core/rdma_core.c
> @@ -817,6 +817,7 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
> rdma_restrack_del(&ucontext->res);
>
> ib_dev->ops.dealloc_ucontext(ucontext);
> + rdma_user_mmap_entries_remove_free(ucontext);
> kfree(ucontext);
>
> ufile->ucontext = NULL;
> diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
> index 7ddd0e5bc6b3..44c0600245e4 100644
> --- a/drivers/infiniband/core/uverbs_cmd.c
> +++ b/drivers/infiniband/core/uverbs_cmd.c
> @@ -254,6 +254,7 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
>
> mutex_init(&ucontext->per_mm_list_lock);
> INIT_LIST_HEAD(&ucontext->per_mm_list);
> + xa_init(&ucontext->mmap_xa);
>
> ret = get_unused_fd_flags(O_CLOEXEC);
> if (ret < 0)
> diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
> index 11c13c1381cf..37507cc27e8c 100644
> --- a/drivers/infiniband/core/uverbs_main.c
> +++ b/drivers/infiniband/core/uverbs_main.c
> @@ -965,6 +965,111 @@ int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
> }
> EXPORT_SYMBOL(rdma_user_mmap_io);
>
> +static inline u64
> +rdma_user_mmap_get_key(const struct rdma_user_mmap_entry *entry)
> +{
> + return (u64)entry->mmap_page << PAGE_SHIFT;
> +}
> +
> +struct rdma_user_mmap_entry *
> +rdma_user_mmap_entry_get(struct ib_ucontext *ucontext, u64 key, u64 len)
> +{
> + struct rdma_user_mmap_entry *entry;
> + u64 mmap_page;
> +
> + mmap_page = key >> PAGE_SHIFT;
> + if (mmap_page > U32_MAX)
> + return NULL;
> +
> + entry = xa_load(&ucontext->mmap_xa, mmap_page);
> + if (!entry || rdma_user_mmap_get_key(entry) != key ||
> + entry->length != len)
> + return NULL;
> +
> + ibdev_dbg(ucontext->device,
> + "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n",
> + entry->obj, key, entry->address, entry->length);
> +
> + return entry;
> +}
> +EXPORT_SYMBOL(rdma_user_mmap_entry_get);
Please add function description in kernel doc format for all newly EXPORT_SYMBOL()
functions you introduced in RDMA/core.
> +
> +/*
> + * Note this locking scheme cannot support removal of entries, except during
> + * ucontext destruction when the core code guarentees no concurrency.
> + */
> +u64 rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, void *obj,
> + u64 address, u64 length, u8 mmap_flag)
> +{
> + struct rdma_user_mmap_entry *entry;
> + u32 next_mmap_page;
> + int err;
> +
> + entry = kmalloc(sizeof(*entry), GFP_KERNEL);
It is worth to use kzalloc and not kmalloc.
Thanks
^ permalink raw reply [flat|nested] 14+ messages in thread
* RE: [PATCH v5 rdma-next 1/6] RDMA/core: Create mmap database and cookie helper functions
2019-07-09 7:02 ` Leon Romanovsky
@ 2019-07-09 10:29 ` Michal Kalderon
2019-07-09 10:52 ` Leon Romanovsky
0 siblings, 1 reply; 14+ messages in thread
From: Michal Kalderon @ 2019-07-09 10:29 UTC (permalink / raw)
To: Leon Romanovsky
Cc: Ariel Elior, jgg, dledford, galpress, linux-rdma, davem, netdev
> From: linux-rdma-owner@vger.kernel.org <linux-rdma-
> owner@vger.kernel.org> On Behalf Of Leon Romanovsky
>
> On Mon, Jul 08, 2019 at 12:14:58PM +0300, Michal Kalderon wrote:
> > Create some common API's for adding entries to a xa_mmap.
> > Searching for an entry and freeing one.
> >
> > The code was copied from the efa driver almost as is, just renamed
> > function to be generic and not efa specific.
> >
> > Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
> > ---
> > drivers/infiniband/core/device.c | 1 +
> > drivers/infiniband/core/rdma_core.c | 1 +
> > drivers/infiniband/core/uverbs_cmd.c | 1 +
> > drivers/infiniband/core/uverbs_main.c | 105
> ++++++++++++++++++++++++++++++++++
> > include/rdma/ib_verbs.h | 32 +++++++++++
> > 5 files changed, 140 insertions(+)
> >
> > diff --git a/drivers/infiniband/core/device.c
> > b/drivers/infiniband/core/device.c
> > index 8a6ccb936dfe..a830c2c5d691 100644
> > --- a/drivers/infiniband/core/device.c
> > +++ b/drivers/infiniband/core/device.c
> > @@ -2521,6 +2521,7 @@ void ib_set_device_ops(struct ib_device *dev,
> const struct ib_device_ops *ops)
> > SET_DEVICE_OP(dev_ops, map_mr_sg_pi);
> > SET_DEVICE_OP(dev_ops, map_phys_fmr);
> > SET_DEVICE_OP(dev_ops, mmap);
> > + SET_DEVICE_OP(dev_ops, mmap_free);
> > SET_DEVICE_OP(dev_ops, modify_ah);
> > SET_DEVICE_OP(dev_ops, modify_cq);
> > SET_DEVICE_OP(dev_ops, modify_device); diff --git
> > a/drivers/infiniband/core/rdma_core.c
> > b/drivers/infiniband/core/rdma_core.c
> > index ccf4d069c25c..7166741834c8 100644
> > --- a/drivers/infiniband/core/rdma_core.c
> > +++ b/drivers/infiniband/core/rdma_core.c
> > @@ -817,6 +817,7 @@ static void ufile_destroy_ucontext(struct
> ib_uverbs_file *ufile,
> > rdma_restrack_del(&ucontext->res);
> >
> > ib_dev->ops.dealloc_ucontext(ucontext);
> > + rdma_user_mmap_entries_remove_free(ucontext);
> > kfree(ucontext);
> >
> > ufile->ucontext = NULL;
> > diff --git a/drivers/infiniband/core/uverbs_cmd.c
> > b/drivers/infiniband/core/uverbs_cmd.c
> > index 7ddd0e5bc6b3..44c0600245e4 100644
> > --- a/drivers/infiniband/core/uverbs_cmd.c
> > +++ b/drivers/infiniband/core/uverbs_cmd.c
> > @@ -254,6 +254,7 @@ static int ib_uverbs_get_context(struct
> > uverbs_attr_bundle *attrs)
> >
> > mutex_init(&ucontext->per_mm_list_lock);
> > INIT_LIST_HEAD(&ucontext->per_mm_list);
> > + xa_init(&ucontext->mmap_xa);
> >
> > ret = get_unused_fd_flags(O_CLOEXEC);
> > if (ret < 0)
> > diff --git a/drivers/infiniband/core/uverbs_main.c
> > b/drivers/infiniband/core/uverbs_main.c
> > index 11c13c1381cf..37507cc27e8c 100644
> > --- a/drivers/infiniband/core/uverbs_main.c
> > +++ b/drivers/infiniband/core/uverbs_main.c
> > @@ -965,6 +965,111 @@ int rdma_user_mmap_io(struct ib_ucontext
> > *ucontext, struct vm_area_struct *vma, }
> > EXPORT_SYMBOL(rdma_user_mmap_io);
> >
> > +static inline u64
> > +rdma_user_mmap_get_key(const struct rdma_user_mmap_entry
> *entry) {
> > + return (u64)entry->mmap_page << PAGE_SHIFT; }
> > +
> > +struct rdma_user_mmap_entry *
> > +rdma_user_mmap_entry_get(struct ib_ucontext *ucontext, u64 key, u64
> > +len) {
> > + struct rdma_user_mmap_entry *entry;
> > + u64 mmap_page;
> > +
> > + mmap_page = key >> PAGE_SHIFT;
> > + if (mmap_page > U32_MAX)
> > + return NULL;
> > +
> > + entry = xa_load(&ucontext->mmap_xa, mmap_page);
> > + if (!entry || rdma_user_mmap_get_key(entry) != key ||
> > + entry->length != len)
> > + return NULL;
> > +
> > + ibdev_dbg(ucontext->device,
> > + "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx]
> removed\n",
> > + entry->obj, key, entry->address, entry->length);
> > +
> > + return entry;
> > +}
> > +EXPORT_SYMBOL(rdma_user_mmap_entry_get);
>
> Please add function description in kernel doc format for all newly
> EXPORT_SYMBOL() functions you introduced in RDMA/core.
Ok. Could you give me a reference to an example? Where should the
Documentation be added to?
>
> > +
> > +/*
> > + * Note this locking scheme cannot support removal of entries, except
> > +during
> > + * ucontext destruction when the core code guarentees no concurrency.
> > + */
> > +u64 rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, void
> *obj,
> > + u64 address, u64 length, u8 mmap_flag) {
> > + struct rdma_user_mmap_entry *entry;
> > + u32 next_mmap_page;
> > + int err;
> > +
> > + entry = kmalloc(sizeof(*entry), GFP_KERNEL);
>
> It is worth to use kzalloc and not kmalloc.
>
> Thanks
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v5 rdma-next 1/6] RDMA/core: Create mmap database and cookie helper functions
2019-07-09 10:29 ` Michal Kalderon
@ 2019-07-09 10:52 ` Leon Romanovsky
0 siblings, 0 replies; 14+ messages in thread
From: Leon Romanovsky @ 2019-07-09 10:52 UTC (permalink / raw)
To: Michal Kalderon
Cc: Ariel Elior, jgg, dledford, galpress, linux-rdma, davem, netdev
On Tue, Jul 09, 2019 at 10:29:36AM +0000, Michal Kalderon wrote:
> > From: linux-rdma-owner@vger.kernel.org <linux-rdma-
> > owner@vger.kernel.org> On Behalf Of Leon Romanovsky
> >
> > On Mon, Jul 08, 2019 at 12:14:58PM +0300, Michal Kalderon wrote:
> > > Create some common API's for adding entries to a xa_mmap.
> > > Searching for an entry and freeing one.
> > >
> > > The code was copied from the efa driver almost as is, just renamed
> > > function to be generic and not efa specific.
> > >
> > > Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
> > > ---
> > > drivers/infiniband/core/device.c | 1 +
> > > drivers/infiniband/core/rdma_core.c | 1 +
> > > drivers/infiniband/core/uverbs_cmd.c | 1 +
> > > drivers/infiniband/core/uverbs_main.c | 105
> > ++++++++++++++++++++++++++++++++++
> > > include/rdma/ib_verbs.h | 32 +++++++++++
> > > 5 files changed, 140 insertions(+)
> > >
> > > diff --git a/drivers/infiniband/core/device.c
> > > b/drivers/infiniband/core/device.c
> > > index 8a6ccb936dfe..a830c2c5d691 100644
> > > --- a/drivers/infiniband/core/device.c
> > > +++ b/drivers/infiniband/core/device.c
> > > @@ -2521,6 +2521,7 @@ void ib_set_device_ops(struct ib_device *dev,
> > const struct ib_device_ops *ops)
> > > SET_DEVICE_OP(dev_ops, map_mr_sg_pi);
> > > SET_DEVICE_OP(dev_ops, map_phys_fmr);
> > > SET_DEVICE_OP(dev_ops, mmap);
> > > + SET_DEVICE_OP(dev_ops, mmap_free);
> > > SET_DEVICE_OP(dev_ops, modify_ah);
> > > SET_DEVICE_OP(dev_ops, modify_cq);
> > > SET_DEVICE_OP(dev_ops, modify_device); diff --git
> > > a/drivers/infiniband/core/rdma_core.c
> > > b/drivers/infiniband/core/rdma_core.c
> > > index ccf4d069c25c..7166741834c8 100644
> > > --- a/drivers/infiniband/core/rdma_core.c
> > > +++ b/drivers/infiniband/core/rdma_core.c
> > > @@ -817,6 +817,7 @@ static void ufile_destroy_ucontext(struct
> > ib_uverbs_file *ufile,
> > > rdma_restrack_del(&ucontext->res);
> > >
> > > ib_dev->ops.dealloc_ucontext(ucontext);
> > > + rdma_user_mmap_entries_remove_free(ucontext);
> > > kfree(ucontext);
> > >
> > > ufile->ucontext = NULL;
> > > diff --git a/drivers/infiniband/core/uverbs_cmd.c
> > > b/drivers/infiniband/core/uverbs_cmd.c
> > > index 7ddd0e5bc6b3..44c0600245e4 100644
> > > --- a/drivers/infiniband/core/uverbs_cmd.c
> > > +++ b/drivers/infiniband/core/uverbs_cmd.c
> > > @@ -254,6 +254,7 @@ static int ib_uverbs_get_context(struct
> > > uverbs_attr_bundle *attrs)
> > >
> > > mutex_init(&ucontext->per_mm_list_lock);
> > > INIT_LIST_HEAD(&ucontext->per_mm_list);
> > > + xa_init(&ucontext->mmap_xa);
> > >
> > > ret = get_unused_fd_flags(O_CLOEXEC);
> > > if (ret < 0)
> > > diff --git a/drivers/infiniband/core/uverbs_main.c
> > > b/drivers/infiniband/core/uverbs_main.c
> > > index 11c13c1381cf..37507cc27e8c 100644
> > > --- a/drivers/infiniband/core/uverbs_main.c
> > > +++ b/drivers/infiniband/core/uverbs_main.c
> > > @@ -965,6 +965,111 @@ int rdma_user_mmap_io(struct ib_ucontext
> > > *ucontext, struct vm_area_struct *vma, }
> > > EXPORT_SYMBOL(rdma_user_mmap_io);
> > >
> > > +static inline u64
> > > +rdma_user_mmap_get_key(const struct rdma_user_mmap_entry
> > *entry) {
> > > + return (u64)entry->mmap_page << PAGE_SHIFT; }
> > > +
> > > +struct rdma_user_mmap_entry *
> > > +rdma_user_mmap_entry_get(struct ib_ucontext *ucontext, u64 key, u64
> > > +len) {
> > > + struct rdma_user_mmap_entry *entry;
> > > + u64 mmap_page;
> > > +
> > > + mmap_page = key >> PAGE_SHIFT;
> > > + if (mmap_page > U32_MAX)
> > > + return NULL;
> > > +
> > > + entry = xa_load(&ucontext->mmap_xa, mmap_page);
> > > + if (!entry || rdma_user_mmap_get_key(entry) != key ||
> > > + entry->length != len)
> > > + return NULL;
> > > +
> > > + ibdev_dbg(ucontext->device,
> > > + "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx]
> > removed\n",
> > > + entry->obj, key, entry->address, entry->length);
> > > +
> > > + return entry;
> > > +}
> > > +EXPORT_SYMBOL(rdma_user_mmap_entry_get);
> >
> > Please add function description in kernel doc format for all newly
> > EXPORT_SYMBOL() functions you introduced in RDMA/core.
> Ok. Could you give me a reference to an example? Where should the
> Documentation be added to?
Above function in *.c file.
For example, see function rdma_set_ack_timeout():
https://patchwork.kernel.org/patch/10778827/
Thanks
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v5 rdma-next 2/6] RDMA/efa: Use the common mmap_xa helpers
2019-07-08 9:14 [PATCH v5 rdma-next 0/6] RDMA/qedr: Use the doorbell overflow recovery mechanism for RDMA Michal Kalderon
2019-07-08 9:14 ` [PATCH v5 rdma-next 1/6] RDMA/core: Create mmap database and cookie helper functions Michal Kalderon
@ 2019-07-08 9:14 ` Michal Kalderon
2019-07-09 9:02 ` Gal Pressman
2019-07-08 9:15 ` [PATCH v5 rdma-next 3/6] RDMA/qedr: Use the common mmap API Michal Kalderon
` (3 subsequent siblings)
5 siblings, 1 reply; 14+ messages in thread
From: Michal Kalderon @ 2019-07-08 9:14 UTC (permalink / raw)
To: michal.kalderon, ariel.elior, jgg, dledford, galpress
Cc: linux-rdma, davem, netdev
Remove the functions related to managing the mmap_xa database.
This code was copied to the ib_core. Use the common API's instead.
Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
---
drivers/infiniband/hw/efa/efa.h | 3 +-
drivers/infiniband/hw/efa/efa_main.c | 1 +
drivers/infiniband/hw/efa/efa_verbs.c | 183 ++++++++--------------------------
3 files changed, 42 insertions(+), 145 deletions(-)
diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h
index 119f8efec564..4afcf3679cdb 100644
--- a/drivers/infiniband/hw/efa/efa.h
+++ b/drivers/infiniband/hw/efa/efa.h
@@ -71,8 +71,6 @@ struct efa_dev {
struct efa_ucontext {
struct ib_ucontext ibucontext;
- struct xarray mmap_xa;
- u32 mmap_xa_page;
u16 uarn;
};
@@ -147,6 +145,7 @@ int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata);
void efa_dealloc_ucontext(struct ib_ucontext *ibucontext);
int efa_mmap(struct ib_ucontext *ibucontext,
struct vm_area_struct *vma);
+void efa_mmap_free(u64 address, u64 length, u8 mmap_flag);
int efa_create_ah(struct ib_ah *ibah,
struct rdma_ah_attr *ah_attr,
u32 flags,
diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c
index dd1c6d49466f..65508c73accd 100644
--- a/drivers/infiniband/hw/efa/efa_main.c
+++ b/drivers/infiniband/hw/efa/efa_main.c
@@ -215,6 +215,7 @@ static const struct ib_device_ops efa_dev_ops = {
.get_link_layer = efa_port_link_layer,
.get_port_immutable = efa_get_port_immutable,
.mmap = efa_mmap,
+ .mmap_free = efa_mmap_free,
.modify_qp = efa_modify_qp,
.query_device = efa_query_device,
.query_gid = efa_query_gid,
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
index df77bc312a25..5dff892da161 100644
--- a/drivers/infiniband/hw/efa/efa_verbs.c
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -13,34 +13,15 @@
#include "efa.h"
-#define EFA_MMAP_FLAG_SHIFT 56
-#define EFA_MMAP_PAGE_MASK GENMASK(EFA_MMAP_FLAG_SHIFT - 1, 0)
-#define EFA_MMAP_INVALID U64_MAX
-
enum {
EFA_MMAP_DMA_PAGE = 0,
EFA_MMAP_IO_WC,
EFA_MMAP_IO_NC,
};
-
#define EFA_AENQ_ENABLED_GROUPS \
(BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \
BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE))
-struct efa_mmap_entry {
- void *obj;
- u64 address;
- u64 length;
- u32 mmap_page;
- u8 mmap_flag;
-};
-
-static inline u64 get_mmap_key(const struct efa_mmap_entry *efa)
-{
- return ((u64)efa->mmap_flag << EFA_MMAP_FLAG_SHIFT) |
- ((u64)efa->mmap_page << PAGE_SHIFT);
-}
-
#define EFA_CHUNK_PAYLOAD_SHIFT 12
#define EFA_CHUNK_PAYLOAD_SIZE BIT(EFA_CHUNK_PAYLOAD_SHIFT)
#define EFA_CHUNK_PAYLOAD_PTR_SIZE 8
@@ -145,105 +126,7 @@ static void *efa_zalloc_mapped(struct efa_dev *dev, dma_addr_t *dma_addr,
return addr;
}
-/*
- * This is only called when the ucontext is destroyed and there can be no
- * concurrent query via mmap or allocate on the xarray, thus we can be sure no
- * other thread is using the entry pointer. We also know that all the BAR
- * pages have either been zap'd or munmaped at this point. Normal pages are
- * refcounted and will be freed at the proper time.
- */
-static void mmap_entries_remove_free(struct efa_dev *dev,
- struct efa_ucontext *ucontext)
-{
- struct efa_mmap_entry *entry;
- unsigned long mmap_page;
- xa_for_each(&ucontext->mmap_xa, mmap_page, entry) {
- xa_erase(&ucontext->mmap_xa, mmap_page);
-
- ibdev_dbg(
- &dev->ibdev,
- "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n",
- entry->obj, get_mmap_key(entry), entry->address,
- entry->length);
- if (entry->mmap_flag == EFA_MMAP_DMA_PAGE)
- /* DMA mapping is already gone, now free the pages */
- free_pages_exact(phys_to_virt(entry->address),
- entry->length);
- kfree(entry);
- }
-}
-
-static struct efa_mmap_entry *mmap_entry_get(struct efa_dev *dev,
- struct efa_ucontext *ucontext,
- u64 key, u64 len)
-{
- struct efa_mmap_entry *entry;
- u64 mmap_page;
-
- mmap_page = (key & EFA_MMAP_PAGE_MASK) >> PAGE_SHIFT;
- if (mmap_page > U32_MAX)
- return NULL;
-
- entry = xa_load(&ucontext->mmap_xa, mmap_page);
- if (!entry || get_mmap_key(entry) != key || entry->length != len)
- return NULL;
-
- ibdev_dbg(&dev->ibdev,
- "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n",
- entry->obj, key, entry->address, entry->length);
-
- return entry;
-}
-
-/*
- * Note this locking scheme cannot support removal of entries, except during
- * ucontext destruction when the core code guarentees no concurrency.
- */
-static u64 mmap_entry_insert(struct efa_dev *dev, struct efa_ucontext *ucontext,
- void *obj, u64 address, u64 length, u8 mmap_flag)
-{
- struct efa_mmap_entry *entry;
- u32 next_mmap_page;
- int err;
-
- entry = kmalloc(sizeof(*entry), GFP_KERNEL);
- if (!entry)
- return EFA_MMAP_INVALID;
-
- entry->obj = obj;
- entry->address = address;
- entry->length = length;
- entry->mmap_flag = mmap_flag;
-
- xa_lock(&ucontext->mmap_xa);
- if (check_add_overflow(ucontext->mmap_xa_page,
- (u32)(length >> PAGE_SHIFT),
- &next_mmap_page))
- goto err_unlock;
-
- entry->mmap_page = ucontext->mmap_xa_page;
- ucontext->mmap_xa_page = next_mmap_page;
- err = __xa_insert(&ucontext->mmap_xa, entry->mmap_page, entry,
- GFP_KERNEL);
- if (err)
- goto err_unlock;
-
- xa_unlock(&ucontext->mmap_xa);
-
- ibdev_dbg(
- &dev->ibdev,
- "mmap: obj[0x%p] addr[%#llx], len[%#llx], key[%#llx] inserted\n",
- entry->obj, entry->address, entry->length, get_mmap_key(entry));
-
- return get_mmap_key(entry);
-
-err_unlock:
- xa_unlock(&ucontext->mmap_xa);
- kfree(entry);
- return EFA_MMAP_INVALID;
-
-}
int efa_query_device(struct ib_device *ibdev,
struct ib_device_attr *props,
@@ -488,45 +371,52 @@ static int qp_mmap_entries_setup(struct efa_qp *qp,
struct efa_com_create_qp_params *params,
struct efa_ibv_create_qp_resp *resp)
{
+ u64 address;
+ u64 length;
/*
* Once an entry is inserted it might be mmapped, hence cannot be
* cleaned up until dealloc_ucontext.
*/
resp->sq_db_mmap_key =
- mmap_entry_insert(dev, ucontext, qp,
- dev->db_bar_addr + resp->sq_db_offset,
- PAGE_SIZE, EFA_MMAP_IO_NC);
- if (resp->sq_db_mmap_key == EFA_MMAP_INVALID)
+ rdma_user_mmap_entry_insert(&ucontext->ibucontext, qp,
+ dev->db_bar_addr +
+ resp->sq_db_offset,
+ PAGE_SIZE, EFA_MMAP_IO_NC);
+ if (resp->sq_db_mmap_key == RDMA_USER_MMAP_INVALID)
return -ENOMEM;
resp->sq_db_offset &= ~PAGE_MASK;
+ address = dev->mem_bar_addr + resp->llq_desc_offset;
+ length = PAGE_ALIGN(params->sq_ring_size_in_bytes +
+ (resp->llq_desc_offset & ~PAGE_MASK));
resp->llq_desc_mmap_key =
- mmap_entry_insert(dev, ucontext, qp,
- dev->mem_bar_addr + resp->llq_desc_offset,
- PAGE_ALIGN(params->sq_ring_size_in_bytes +
- (resp->llq_desc_offset & ~PAGE_MASK)),
- EFA_MMAP_IO_WC);
- if (resp->llq_desc_mmap_key == EFA_MMAP_INVALID)
+ rdma_user_mmap_entry_insert(&ucontext->ibucontext, qp,
+ address,
+ length,
+ EFA_MMAP_IO_WC);
+ if (resp->llq_desc_mmap_key == RDMA_USER_MMAP_INVALID)
return -ENOMEM;
resp->llq_desc_offset &= ~PAGE_MASK;
if (qp->rq_size) {
+ address = dev->db_bar_addr + resp->rq_db_offset;
resp->rq_db_mmap_key =
- mmap_entry_insert(dev, ucontext, qp,
- dev->db_bar_addr + resp->rq_db_offset,
- PAGE_SIZE, EFA_MMAP_IO_NC);
- if (resp->rq_db_mmap_key == EFA_MMAP_INVALID)
+ rdma_user_mmap_entry_insert(&ucontext->ibucontext, qp,
+ address, PAGE_SIZE,
+ EFA_MMAP_IO_NC);
+ if (resp->rq_db_mmap_key == RDMA_USER_MMAP_INVALID)
return -ENOMEM;
resp->rq_db_offset &= ~PAGE_MASK;
+ address = virt_to_phys(qp->rq_cpu_addr);
resp->rq_mmap_key =
- mmap_entry_insert(dev, ucontext, qp,
- virt_to_phys(qp->rq_cpu_addr),
- qp->rq_size, EFA_MMAP_DMA_PAGE);
- if (resp->rq_mmap_key == EFA_MMAP_INVALID)
+ rdma_user_mmap_entry_insert(&ucontext->ibucontext, qp,
+ address, qp->rq_size,
+ EFA_MMAP_DMA_PAGE);
+ if (resp->rq_mmap_key == RDMA_USER_MMAP_INVALID)
return -ENOMEM;
resp->rq_mmap_size = qp->rq_size;
@@ -875,11 +765,13 @@ void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq,
struct efa_ibv_create_cq_resp *resp)
{
+ struct efa_ucontext *ucontext = cq->ucontext;
resp->q_mmap_size = cq->size;
- resp->q_mmap_key = mmap_entry_insert(dev, cq->ucontext, cq,
- virt_to_phys(cq->cpu_addr),
- cq->size, EFA_MMAP_DMA_PAGE);
- if (resp->q_mmap_key == EFA_MMAP_INVALID)
+ resp->q_mmap_key =
+ rdma_user_mmap_entry_insert(&ucontext->ibucontext, cq,
+ virt_to_phys(cq->cpu_addr),
+ cq->size, EFA_MMAP_DMA_PAGE);
+ if (resp->q_mmap_key == RDMA_USER_MMAP_INVALID)
return -ENOMEM;
return 0;
@@ -1531,7 +1423,6 @@ int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata)
goto err_out;
ucontext->uarn = result.uarn;
- xa_init(&ucontext->mmap_xa);
resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE;
resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_CREATE_AH;
@@ -1560,19 +1451,25 @@ void efa_dealloc_ucontext(struct ib_ucontext *ibucontext)
struct efa_ucontext *ucontext = to_eucontext(ibucontext);
struct efa_dev *dev = to_edev(ibucontext->device);
- mmap_entries_remove_free(dev, ucontext);
efa_dealloc_uar(dev, ucontext->uarn);
}
+void efa_mmap_free(u64 address, u64 length, u8 mmap_flag)
+{
+ /* DMA mapping is already gone, now free the pages */
+ if (mmap_flag == EFA_MMAP_DMA_PAGE)
+ free_pages_exact(phys_to_virt(address), length);
+}
+
static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext,
struct vm_area_struct *vma, u64 key, u64 length)
{
- struct efa_mmap_entry *entry;
+ struct rdma_user_mmap_entry *entry;
unsigned long va;
u64 pfn;
int err;
- entry = mmap_entry_get(dev, ucontext, key, length);
+ entry = rdma_user_mmap_entry_get(&ucontext->ibucontext, key, length);
if (!entry) {
ibdev_dbg(&dev->ibdev, "key[%#llx] does not have valid entry\n",
key);
--
2.14.5
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH v5 rdma-next 2/6] RDMA/efa: Use the common mmap_xa helpers
2019-07-08 9:14 ` [PATCH v5 rdma-next 2/6] RDMA/efa: Use the common mmap_xa helpers Michal Kalderon
@ 2019-07-09 9:02 ` Gal Pressman
2019-07-09 10:30 ` Michal Kalderon
0 siblings, 1 reply; 14+ messages in thread
From: Gal Pressman @ 2019-07-09 9:02 UTC (permalink / raw)
To: Michal Kalderon, ariel.elior, jgg, dledford, galpress
Cc: linux-rdma, davem, netdev
On 08/07/2019 12:14, Michal Kalderon wrote:
Hi, a few nits:
> Remove the functions related to managing the mmap_xa database.
> This code was copied to the ib_core. Use the common API's instead.
>
> Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
> ---
> drivers/infiniband/hw/efa/efa.h | 3 +-
> drivers/infiniband/hw/efa/efa_main.c | 1 +
> drivers/infiniband/hw/efa/efa_verbs.c | 183 ++++++++--------------------------
> 3 files changed, 42 insertions(+), 145 deletions(-)
> diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
> index df77bc312a25..5dff892da161 100644
> --- a/drivers/infiniband/hw/efa/efa_verbs.c
> +++ b/drivers/infiniband/hw/efa/efa_verbs.c
> @@ -13,34 +13,15 @@
>
> #include "efa.h"
>
> -#define EFA_MMAP_FLAG_SHIFT 56
> -#define EFA_MMAP_PAGE_MASK GENMASK(EFA_MMAP_FLAG_SHIFT - 1, 0)
> -#define EFA_MMAP_INVALID U64_MAX
> -
Don't delete the blank line please.
> enum {
> EFA_MMAP_DMA_PAGE = 0,
> EFA_MMAP_IO_WC,
> EFA_MMAP_IO_NC,
> };
> -
> #define EFA_AENQ_ENABLED_GROUPS \
> (BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \
> BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE))
>
> -struct efa_mmap_entry {
> - void *obj;
> - u64 address;
> - u64 length;
> - u32 mmap_page;
> - u8 mmap_flag;
> -};
> -
> -static inline u64 get_mmap_key(const struct efa_mmap_entry *efa)
> -{
> - return ((u64)efa->mmap_flag << EFA_MMAP_FLAG_SHIFT) |
> - ((u64)efa->mmap_page << PAGE_SHIFT);
> -}
> -
> #define EFA_CHUNK_PAYLOAD_SHIFT 12
> #define EFA_CHUNK_PAYLOAD_SIZE BIT(EFA_CHUNK_PAYLOAD_SHIFT)
> #define EFA_CHUNK_PAYLOAD_PTR_SIZE 8
> @@ -145,105 +126,7 @@ static void *efa_zalloc_mapped(struct efa_dev *dev, dma_addr_t *dma_addr,
> return addr;
> }
>
> -/*
> - * This is only called when the ucontext is destroyed and there can be no
> - * concurrent query via mmap or allocate on the xarray, thus we can be sure no
> - * other thread is using the entry pointer. We also know that all the BAR
> - * pages have either been zap'd or munmaped at this point. Normal pages are
> - * refcounted and will be freed at the proper time.
> - */
> -static void mmap_entries_remove_free(struct efa_dev *dev,
> - struct efa_ucontext *ucontext)
> -{
> - struct efa_mmap_entry *entry;
> - unsigned long mmap_page;
>
> - xa_for_each(&ucontext->mmap_xa, mmap_page, entry) {
> - xa_erase(&ucontext->mmap_xa, mmap_page);
> -
> - ibdev_dbg(
> - &dev->ibdev,
> - "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n",
> - entry->obj, get_mmap_key(entry), entry->address,
> - entry->length);
> - if (entry->mmap_flag == EFA_MMAP_DMA_PAGE)
> - /* DMA mapping is already gone, now free the pages */
> - free_pages_exact(phys_to_virt(entry->address),
> - entry->length);
> - kfree(entry);
> - }
> -}
> -
> -static struct efa_mmap_entry *mmap_entry_get(struct efa_dev *dev,
> - struct efa_ucontext *ucontext,
> - u64 key, u64 len)
> -{
> - struct efa_mmap_entry *entry;
> - u64 mmap_page;
> -
> - mmap_page = (key & EFA_MMAP_PAGE_MASK) >> PAGE_SHIFT;
> - if (mmap_page > U32_MAX)
> - return NULL;
> -
> - entry = xa_load(&ucontext->mmap_xa, mmap_page);
> - if (!entry || get_mmap_key(entry) != key || entry->length != len)
> - return NULL;
> -
> - ibdev_dbg(&dev->ibdev,
> - "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n",
> - entry->obj, key, entry->address, entry->length);
> -
> - return entry;
> -}
> -
> -/*
> - * Note this locking scheme cannot support removal of entries, except during
> - * ucontext destruction when the core code guarentees no concurrency.
> - */
> -static u64 mmap_entry_insert(struct efa_dev *dev, struct efa_ucontext *ucontext,
> - void *obj, u64 address, u64 length, u8 mmap_flag)
> -{
> - struct efa_mmap_entry *entry;
> - u32 next_mmap_page;
> - int err;
> -
> - entry = kmalloc(sizeof(*entry), GFP_KERNEL);
> - if (!entry)
> - return EFA_MMAP_INVALID;
> -
> - entry->obj = obj;
> - entry->address = address;
> - entry->length = length;
> - entry->mmap_flag = mmap_flag;
> -
> - xa_lock(&ucontext->mmap_xa);
> - if (check_add_overflow(ucontext->mmap_xa_page,
> - (u32)(length >> PAGE_SHIFT),
> - &next_mmap_page))
> - goto err_unlock;
> -
> - entry->mmap_page = ucontext->mmap_xa_page;
> - ucontext->mmap_xa_page = next_mmap_page;
> - err = __xa_insert(&ucontext->mmap_xa, entry->mmap_page, entry,
> - GFP_KERNEL);
> - if (err)
> - goto err_unlock;
> -
> - xa_unlock(&ucontext->mmap_xa);
> -
> - ibdev_dbg(
> - &dev->ibdev,
> - "mmap: obj[0x%p] addr[%#llx], len[%#llx], key[%#llx] inserted\n",
> - entry->obj, entry->address, entry->length, get_mmap_key(entry));
> -
> - return get_mmap_key(entry);
> -
> -err_unlock:
> - xa_unlock(&ucontext->mmap_xa);
> - kfree(entry);
> - return EFA_MMAP_INVALID;
> -
> -}
>
You left two extra blank lines between efa_zalloc_mapped and efa_query_device.
> int efa_query_device(struct ib_device *ibdev,
> struct ib_device_attr *props,
> @@ -488,45 +371,52 @@ static int qp_mmap_entries_setup(struct efa_qp *qp,
> struct efa_com_create_qp_params *params,
> struct efa_ibv_create_qp_resp *resp)
> {
> + u64 address;
> + u64 length;
Line break.
> /*
> * Once an entry is inserted it might be mmapped, hence cannot be
> * cleaned up until dealloc_ucontext.
> */
> resp->sq_db_mmap_key =
> - mmap_entry_insert(dev, ucontext, qp,
> - dev->db_bar_addr + resp->sq_db_offset,
> - PAGE_SIZE, EFA_MMAP_IO_NC);
> - if (resp->sq_db_mmap_key == EFA_MMAP_INVALID)
> + rdma_user_mmap_entry_insert(&ucontext->ibucontext, qp,
> + dev->db_bar_addr +
> + resp->sq_db_offset,
> + PAGE_SIZE, EFA_MMAP_IO_NC);
> + if (resp->sq_db_mmap_key == RDMA_USER_MMAP_INVALID)
> return -ENOMEM;
>
> resp->sq_db_offset &= ~PAGE_MASK;
>
> + address = dev->mem_bar_addr + resp->llq_desc_offset;
> + length = PAGE_ALIGN(params->sq_ring_size_in_bytes +
> + (resp->llq_desc_offset & ~PAGE_MASK));
> resp->llq_desc_mmap_key =
> - mmap_entry_insert(dev, ucontext, qp,
> - dev->mem_bar_addr + resp->llq_desc_offset,
> - PAGE_ALIGN(params->sq_ring_size_in_bytes +
> - (resp->llq_desc_offset & ~PAGE_MASK)),
> - EFA_MMAP_IO_WC);
> - if (resp->llq_desc_mmap_key == EFA_MMAP_INVALID)
> + rdma_user_mmap_entry_insert(&ucontext->ibucontext, qp,
> + address,
> + length,
> + EFA_MMAP_IO_WC);
> + if (resp->llq_desc_mmap_key == RDMA_USER_MMAP_INVALID)
> return -ENOMEM;
>
> resp->llq_desc_offset &= ~PAGE_MASK;
>
> if (qp->rq_size) {
> + address = dev->db_bar_addr + resp->rq_db_offset;
> resp->rq_db_mmap_key =
> - mmap_entry_insert(dev, ucontext, qp,
> - dev->db_bar_addr + resp->rq_db_offset,
> - PAGE_SIZE, EFA_MMAP_IO_NC);
> - if (resp->rq_db_mmap_key == EFA_MMAP_INVALID)
> + rdma_user_mmap_entry_insert(&ucontext->ibucontext, qp,
> + address, PAGE_SIZE,
> + EFA_MMAP_IO_NC);
> + if (resp->rq_db_mmap_key == RDMA_USER_MMAP_INVALID)
> return -ENOMEM;
>
> resp->rq_db_offset &= ~PAGE_MASK;
>
> + address = virt_to_phys(qp->rq_cpu_addr);
> resp->rq_mmap_key =
> - mmap_entry_insert(dev, ucontext, qp,
> - virt_to_phys(qp->rq_cpu_addr),
> - qp->rq_size, EFA_MMAP_DMA_PAGE);
> - if (resp->rq_mmap_key == EFA_MMAP_INVALID)
> + rdma_user_mmap_entry_insert(&ucontext->ibucontext, qp,
> + address, qp->rq_size,
> + EFA_MMAP_DMA_PAGE);
> + if (resp->rq_mmap_key == RDMA_USER_MMAP_INVALID)
> return -ENOMEM;
>
> resp->rq_mmap_size = qp->rq_size;
> @@ -875,11 +765,13 @@ void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
> static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq,
> struct efa_ibv_create_cq_resp *resp)
> {
> + struct efa_ucontext *ucontext = cq->ucontext;
Line break.
> resp->q_mmap_size = cq->size;
> - resp->q_mmap_key = mmap_entry_insert(dev, cq->ucontext, cq,
> - virt_to_phys(cq->cpu_addr),
> - cq->size, EFA_MMAP_DMA_PAGE);
> - if (resp->q_mmap_key == EFA_MMAP_INVALID)
> + resp->q_mmap_key =
> + rdma_user_mmap_entry_insert(&ucontext->ibucontext, cq,
> + virt_to_phys(cq->cpu_addr),
> + cq->size, EFA_MMAP_DMA_PAGE);
> + if (resp->q_mmap_key == RDMA_USER_MMAP_INVALID)
> return -ENOMEM;
>
> return 0;
> @@ -1531,7 +1423,6 @@ int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata)
> goto err_out;
>
> ucontext->uarn = result.uarn;
> - xa_init(&ucontext->mmap_xa);
>
> resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE;
> resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_CREATE_AH;
> @@ -1560,19 +1451,25 @@ void efa_dealloc_ucontext(struct ib_ucontext *ibucontext)
> struct efa_ucontext *ucontext = to_eucontext(ibucontext);
> struct efa_dev *dev = to_edev(ibucontext->device);
>
> - mmap_entries_remove_free(dev, ucontext);
> efa_dealloc_uar(dev, ucontext->uarn);
> }
>
> +void efa_mmap_free(u64 address, u64 length, u8 mmap_flag)
> +{
> + /* DMA mapping is already gone, now free the pages */
> + if (mmap_flag == EFA_MMAP_DMA_PAGE)
> + free_pages_exact(phys_to_virt(address), length);
> +}
> +
> static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext,
> struct vm_area_struct *vma, u64 key, u64 length)
> {
> - struct efa_mmap_entry *entry;
> + struct rdma_user_mmap_entry *entry;
> unsigned long va;
> u64 pfn;
> int err;
>
> - entry = mmap_entry_get(dev, ucontext, key, length);
> + entry = rdma_user_mmap_entry_get(&ucontext->ibucontext, key, length);
> if (!entry) {
> ibdev_dbg(&dev->ibdev, "key[%#llx] does not have valid entry\n",
> key);
>
^ permalink raw reply [flat|nested] 14+ messages in thread
* RE: [PATCH v5 rdma-next 2/6] RDMA/efa: Use the common mmap_xa helpers
2019-07-09 9:02 ` Gal Pressman
@ 2019-07-09 10:30 ` Michal Kalderon
0 siblings, 0 replies; 14+ messages in thread
From: Michal Kalderon @ 2019-07-09 10:30 UTC (permalink / raw)
To: Gal Pressman, Ariel Elior, jgg, dledford; +Cc: linux-rdma, davem, netdev
> From: Gal Pressman <galpress@amazon.com>
> Sent: Tuesday, July 9, 2019 12:03 PM
>
> On 08/07/2019 12:14, Michal Kalderon wrote:
>
> Hi, a few nits:
Thanks for the review, will fix them.
>
> > Remove the functions related to managing the mmap_xa database.
> > This code was copied to the ib_core. Use the common API's instead.
> >
> > Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
> > ---
> > drivers/infiniband/hw/efa/efa.h | 3 +-
> > drivers/infiniband/hw/efa/efa_main.c | 1 +
> > drivers/infiniband/hw/efa/efa_verbs.c | 183
> > ++++++++--------------------------
> > 3 files changed, 42 insertions(+), 145 deletions(-) diff --git
> > a/drivers/infiniband/hw/efa/efa_verbs.c
> > b/drivers/infiniband/hw/efa/efa_verbs.c
> > index df77bc312a25..5dff892da161 100644
> > --- a/drivers/infiniband/hw/efa/efa_verbs.c
> > +++ b/drivers/infiniband/hw/efa/efa_verbs.c
> > @@ -13,34 +13,15 @@
> >
> > #include "efa.h"
> >
> > -#define EFA_MMAP_FLAG_SHIFT 56
> > -#define EFA_MMAP_PAGE_MASK GENMASK(EFA_MMAP_FLAG_SHIFT -
> 1, 0)
> > -#define EFA_MMAP_INVALID U64_MAX
> > -
>
> Don't delete the blank line please.
>
> > enum {
> > EFA_MMAP_DMA_PAGE = 0,
> > EFA_MMAP_IO_WC,
> > EFA_MMAP_IO_NC,
> > };
> > -
> > #define EFA_AENQ_ENABLED_GROUPS \
> > (BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \
> > BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE))
> >
> > -struct efa_mmap_entry {
> > - void *obj;
> > - u64 address;
> > - u64 length;
> > - u32 mmap_page;
> > - u8 mmap_flag;
> > -};
> > -
> > -static inline u64 get_mmap_key(const struct efa_mmap_entry *efa) -{
> > - return ((u64)efa->mmap_flag << EFA_MMAP_FLAG_SHIFT) |
> > - ((u64)efa->mmap_page << PAGE_SHIFT);
> > -}
> > -
> > #define EFA_CHUNK_PAYLOAD_SHIFT 12
> > #define EFA_CHUNK_PAYLOAD_SIZE
> BIT(EFA_CHUNK_PAYLOAD_SHIFT)
> > #define EFA_CHUNK_PAYLOAD_PTR_SIZE 8
> > @@ -145,105 +126,7 @@ static void *efa_zalloc_mapped(struct efa_dev
> *dev, dma_addr_t *dma_addr,
> > return addr;
> > }
> >
> > -/*
> > - * This is only called when the ucontext is destroyed and there can
> > be no
> > - * concurrent query via mmap or allocate on the xarray, thus we can
> > be sure no
> > - * other thread is using the entry pointer. We also know that all the
> > BAR
> > - * pages have either been zap'd or munmaped at this point. Normal
> > pages are
> > - * refcounted and will be freed at the proper time.
> > - */
> > -static void mmap_entries_remove_free(struct efa_dev *dev,
> > - struct efa_ucontext *ucontext)
> > -{
> > - struct efa_mmap_entry *entry;
> > - unsigned long mmap_page;
> >
> > - xa_for_each(&ucontext->mmap_xa, mmap_page, entry) {
> > - xa_erase(&ucontext->mmap_xa, mmap_page);
> > -
> > - ibdev_dbg(
> > - &dev->ibdev,
> > - "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx]
> removed\n",
> > - entry->obj, get_mmap_key(entry), entry->address,
> > - entry->length);
> > - if (entry->mmap_flag == EFA_MMAP_DMA_PAGE)
> > - /* DMA mapping is already gone, now free the pages
> */
> > - free_pages_exact(phys_to_virt(entry->address),
> > - entry->length);
> > - kfree(entry);
> > - }
> > -}
> > -
> > -static struct efa_mmap_entry *mmap_entry_get(struct efa_dev *dev,
> > - struct efa_ucontext *ucontext,
> > - u64 key, u64 len)
> > -{
> > - struct efa_mmap_entry *entry;
> > - u64 mmap_page;
> > -
> > - mmap_page = (key & EFA_MMAP_PAGE_MASK) >> PAGE_SHIFT;
> > - if (mmap_page > U32_MAX)
> > - return NULL;
> > -
> > - entry = xa_load(&ucontext->mmap_xa, mmap_page);
> > - if (!entry || get_mmap_key(entry) != key || entry->length != len)
> > - return NULL;
> > -
> > - ibdev_dbg(&dev->ibdev,
> > - "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx]
> removed\n",
> > - entry->obj, key, entry->address, entry->length);
> > -
> > - return entry;
> > -}
> > -
> > -/*
> > - * Note this locking scheme cannot support removal of entries, except
> > during
> > - * ucontext destruction when the core code guarentees no concurrency.
> > - */
> > -static u64 mmap_entry_insert(struct efa_dev *dev, struct efa_ucontext
> *ucontext,
> > - void *obj, u64 address, u64 length, u8 mmap_flag)
> > -{
> > - struct efa_mmap_entry *entry;
> > - u32 next_mmap_page;
> > - int err;
> > -
> > - entry = kmalloc(sizeof(*entry), GFP_KERNEL);
> > - if (!entry)
> > - return EFA_MMAP_INVALID;
> > -
> > - entry->obj = obj;
> > - entry->address = address;
> > - entry->length = length;
> > - entry->mmap_flag = mmap_flag;
> > -
> > - xa_lock(&ucontext->mmap_xa);
> > - if (check_add_overflow(ucontext->mmap_xa_page,
> > - (u32)(length >> PAGE_SHIFT),
> > - &next_mmap_page))
> > - goto err_unlock;
> > -
> > - entry->mmap_page = ucontext->mmap_xa_page;
> > - ucontext->mmap_xa_page = next_mmap_page;
> > - err = __xa_insert(&ucontext->mmap_xa, entry->mmap_page,
> entry,
> > - GFP_KERNEL);
> > - if (err)
> > - goto err_unlock;
> > -
> > - xa_unlock(&ucontext->mmap_xa);
> > -
> > - ibdev_dbg(
> > - &dev->ibdev,
> > - "mmap: obj[0x%p] addr[%#llx], len[%#llx], key[%#llx]
> inserted\n",
> > - entry->obj, entry->address, entry->length,
> get_mmap_key(entry));
> > -
> > - return get_mmap_key(entry);
> > -
> > -err_unlock:
> > - xa_unlock(&ucontext->mmap_xa);
> > - kfree(entry);
> > - return EFA_MMAP_INVALID;
> > -
> > -}
> >
>
> You left two extra blank lines between efa_zalloc_mapped and
> efa_query_device.
>
> > int efa_query_device(struct ib_device *ibdev,
> > struct ib_device_attr *props,
> > @@ -488,45 +371,52 @@ static int qp_mmap_entries_setup(struct efa_qp
> *qp,
> > struct efa_com_create_qp_params
> *params,
> > struct efa_ibv_create_qp_resp *resp) {
> > + u64 address;
> > + u64 length;
>
> Line break.
>
> > /*
> > * Once an entry is inserted it might be mmapped, hence cannot be
> > * cleaned up until dealloc_ucontext.
> > */
> > resp->sq_db_mmap_key =
> > - mmap_entry_insert(dev, ucontext, qp,
> > - dev->db_bar_addr + resp->sq_db_offset,
> > - PAGE_SIZE, EFA_MMAP_IO_NC);
> > - if (resp->sq_db_mmap_key == EFA_MMAP_INVALID)
> > + rdma_user_mmap_entry_insert(&ucontext->ibucontext, qp,
> > + dev->db_bar_addr +
> > + resp->sq_db_offset,
> > + PAGE_SIZE, EFA_MMAP_IO_NC);
> > + if (resp->sq_db_mmap_key == RDMA_USER_MMAP_INVALID)
> > return -ENOMEM;
> >
> > resp->sq_db_offset &= ~PAGE_MASK;
> >
> > + address = dev->mem_bar_addr + resp->llq_desc_offset;
> > + length = PAGE_ALIGN(params->sq_ring_size_in_bytes +
> > + (resp->llq_desc_offset & ~PAGE_MASK));
> > resp->llq_desc_mmap_key =
> > - mmap_entry_insert(dev, ucontext, qp,
> > - dev->mem_bar_addr + resp-
> >llq_desc_offset,
> > - PAGE_ALIGN(params-
> >sq_ring_size_in_bytes +
> > - (resp->llq_desc_offset &
> ~PAGE_MASK)),
> > - EFA_MMAP_IO_WC);
> > - if (resp->llq_desc_mmap_key == EFA_MMAP_INVALID)
> > + rdma_user_mmap_entry_insert(&ucontext->ibucontext, qp,
> > + address,
> > + length,
> > + EFA_MMAP_IO_WC);
> > + if (resp->llq_desc_mmap_key == RDMA_USER_MMAP_INVALID)
> > return -ENOMEM;
> >
> > resp->llq_desc_offset &= ~PAGE_MASK;
> >
> > if (qp->rq_size) {
> > + address = dev->db_bar_addr + resp->rq_db_offset;
> > resp->rq_db_mmap_key =
> > - mmap_entry_insert(dev, ucontext, qp,
> > - dev->db_bar_addr + resp-
> >rq_db_offset,
> > - PAGE_SIZE, EFA_MMAP_IO_NC);
> > - if (resp->rq_db_mmap_key == EFA_MMAP_INVALID)
> > + rdma_user_mmap_entry_insert(&ucontext-
> >ibucontext, qp,
> > + address, PAGE_SIZE,
> > + EFA_MMAP_IO_NC);
> > + if (resp->rq_db_mmap_key ==
> RDMA_USER_MMAP_INVALID)
> > return -ENOMEM;
> >
> > resp->rq_db_offset &= ~PAGE_MASK;
> >
> > + address = virt_to_phys(qp->rq_cpu_addr);
> > resp->rq_mmap_key =
> > - mmap_entry_insert(dev, ucontext, qp,
> > - virt_to_phys(qp->rq_cpu_addr),
> > - qp->rq_size,
> EFA_MMAP_DMA_PAGE);
> > - if (resp->rq_mmap_key == EFA_MMAP_INVALID)
> > + rdma_user_mmap_entry_insert(&ucontext-
> >ibucontext, qp,
> > + address, qp->rq_size,
> > + EFA_MMAP_DMA_PAGE);
> > + if (resp->rq_mmap_key == RDMA_USER_MMAP_INVALID)
> > return -ENOMEM;
> >
> > resp->rq_mmap_size = qp->rq_size;
> > @@ -875,11 +765,13 @@ void efa_destroy_cq(struct ib_cq *ibcq, struct
> > ib_udata *udata) static int cq_mmap_entries_setup(struct efa_dev *dev,
> struct efa_cq *cq,
> > struct efa_ibv_create_cq_resp *resp) {
> > + struct efa_ucontext *ucontext = cq->ucontext;
>
> Line break.
>
> > resp->q_mmap_size = cq->size;
> > - resp->q_mmap_key = mmap_entry_insert(dev, cq->ucontext, cq,
> > - virt_to_phys(cq->cpu_addr),
> > - cq->size,
> EFA_MMAP_DMA_PAGE);
> > - if (resp->q_mmap_key == EFA_MMAP_INVALID)
> > + resp->q_mmap_key =
> > + rdma_user_mmap_entry_insert(&ucontext->ibucontext, cq,
> > + virt_to_phys(cq->cpu_addr),
> > + cq->size,
> EFA_MMAP_DMA_PAGE);
> > + if (resp->q_mmap_key == RDMA_USER_MMAP_INVALID)
> > return -ENOMEM;
> >
> > return 0;
> > @@ -1531,7 +1423,6 @@ int efa_alloc_ucontext(struct ib_ucontext
> *ibucontext, struct ib_udata *udata)
> > goto err_out;
> >
> > ucontext->uarn = result.uarn;
> > - xa_init(&ucontext->mmap_xa);
> >
> > resp.cmds_supp_udata_mask |=
> EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE;
> > resp.cmds_supp_udata_mask |=
> EFA_USER_CMDS_SUPP_UDATA_CREATE_AH;
> > @@ -1560,19 +1451,25 @@ void efa_dealloc_ucontext(struct ib_ucontext
> *ibucontext)
> > struct efa_ucontext *ucontext = to_eucontext(ibucontext);
> > struct efa_dev *dev = to_edev(ibucontext->device);
> >
> > - mmap_entries_remove_free(dev, ucontext);
> > efa_dealloc_uar(dev, ucontext->uarn);
> > }
> >
> > +void efa_mmap_free(u64 address, u64 length, u8 mmap_flag)
> > +{
> > + /* DMA mapping is already gone, now free the pages */
> > + if (mmap_flag == EFA_MMAP_DMA_PAGE)
> > + free_pages_exact(phys_to_virt(address), length);
> > +}
> > +
> > static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext
> *ucontext,
> > struct vm_area_struct *vma, u64 key, u64 length)
> > {
> > - struct efa_mmap_entry *entry;
> > + struct rdma_user_mmap_entry *entry;
> > unsigned long va;
> > u64 pfn;
> > int err;
> >
> > - entry = mmap_entry_get(dev, ucontext, key, length);
> > + entry = rdma_user_mmap_entry_get(&ucontext->ibucontext, key,
> length);
> > if (!entry) {
> > ibdev_dbg(&dev->ibdev, "key[%#llx] does not have valid
> entry\n",
> > key);
> >
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v5 rdma-next 3/6] RDMA/qedr: Use the common mmap API
2019-07-08 9:14 [PATCH v5 rdma-next 0/6] RDMA/qedr: Use the doorbell overflow recovery mechanism for RDMA Michal Kalderon
2019-07-08 9:14 ` [PATCH v5 rdma-next 1/6] RDMA/core: Create mmap database and cookie helper functions Michal Kalderon
2019-07-08 9:14 ` [PATCH v5 rdma-next 2/6] RDMA/efa: Use the common mmap_xa helpers Michal Kalderon
@ 2019-07-08 9:15 ` Michal Kalderon
2019-07-08 9:15 ` [PATCH v5 rdma-next 4/6] qed*: Change dpi_addr to be denoted with __iomem Michal Kalderon
` (2 subsequent siblings)
5 siblings, 0 replies; 14+ messages in thread
From: Michal Kalderon @ 2019-07-08 9:15 UTC (permalink / raw)
To: michal.kalderon, ariel.elior, jgg, dledford, galpress
Cc: linux-rdma, davem, netdev
Remove all function related to mmap from qedr and use the common
API
Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
---
drivers/infiniband/hw/qedr/qedr.h | 13 ----
drivers/infiniband/hw/qedr/verbs.c | 153 +++++++++++++------------------------
drivers/infiniband/hw/qedr/verbs.h | 2 +-
3 files changed, 52 insertions(+), 116 deletions(-)
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index 6175d1e98717..97c90d1e525d 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -231,11 +231,6 @@ struct qedr_ucontext {
u64 dpi_phys_addr;
u32 dpi_size;
u16 dpi;
-
- struct list_head mm_head;
-
- /* Lock to protect mm list */
- struct mutex mm_list_lock;
};
union db_prod64 {
@@ -298,14 +293,6 @@ struct qedr_pd {
struct qedr_ucontext *uctx;
};
-struct qedr_mm {
- struct {
- u64 phy_addr;
- unsigned long len;
- } key;
- struct list_head entry;
-};
-
union db_prod32 {
struct rdma_pwm_val16_data data;
u32 raw;
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 27d90a84ea01..f33f0f1e7d76 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -58,6 +58,10 @@
#define DB_ADDR_SHIFT(addr) ((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
+enum {
+ QEDR_USER_MMAP_IO_WC = 0,
+};
+
static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
size_t len)
{
@@ -256,60 +260,6 @@ int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask,
return 0;
}
-static int qedr_add_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
- unsigned long len)
-{
- struct qedr_mm *mm;
-
- mm = kzalloc(sizeof(*mm), GFP_KERNEL);
- if (!mm)
- return -ENOMEM;
-
- mm->key.phy_addr = phy_addr;
- /* This function might be called with a length which is not a multiple
- * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel
- * forces this granularity by increasing the requested size if needed.
- * When qedr_mmap is called, it will search the list with the updated
- * length as a key. To prevent search failures, the length is rounded up
- * in advance to PAGE_SIZE.
- */
- mm->key.len = roundup(len, PAGE_SIZE);
- INIT_LIST_HEAD(&mm->entry);
-
- mutex_lock(&uctx->mm_list_lock);
- list_add(&mm->entry, &uctx->mm_head);
- mutex_unlock(&uctx->mm_list_lock);
-
- DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
- "added (addr=0x%llx,len=0x%lx) for ctx=%p\n",
- (unsigned long long)mm->key.phy_addr,
- (unsigned long)mm->key.len, uctx);
-
- return 0;
-}
-
-static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
- unsigned long len)
-{
- bool found = false;
- struct qedr_mm *mm;
-
- mutex_lock(&uctx->mm_list_lock);
- list_for_each_entry(mm, &uctx->mm_head, entry) {
- if (len != mm->key.len || phy_addr != mm->key.phy_addr)
- continue;
-
- found = true;
- break;
- }
- mutex_unlock(&uctx->mm_list_lock);
- DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
- "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, result=%d\n",
- mm->key.phy_addr, mm->key.len, uctx, found);
-
- return found;
-}
-
int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
{
struct ib_device *ibdev = uctx->device;
@@ -318,6 +268,7 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
struct qedr_alloc_ucontext_resp uresp = {};
struct qedr_dev *dev = get_qedr_dev(ibdev);
struct qed_rdma_add_user_out_params oparams;
+ u64 key;
if (!udata)
return -EFAULT;
@@ -334,13 +285,17 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
ctx->dpi_addr = oparams.dpi_addr;
ctx->dpi_phys_addr = oparams.dpi_phys_addr;
ctx->dpi_size = oparams.dpi_size;
- INIT_LIST_HEAD(&ctx->mm_head);
- mutex_init(&ctx->mm_list_lock);
+
+ key = rdma_user_mmap_entry_insert(uctx, ctx,
+ ctx->dpi_phys_addr, ctx->dpi_size,
+ QEDR_USER_MMAP_IO_WC);
+ if (key == RDMA_USER_MMAP_INVALID)
+ return -ENOMEM;
uresp.dpm_enabled = dev->user_dpm_enabled;
uresp.wids_enabled = 1;
uresp.wid_count = oparams.wid_count;
- uresp.db_pa = ctx->dpi_phys_addr;
+ uresp.db_pa = key;
uresp.db_size = ctx->dpi_size;
uresp.max_send_wr = dev->attr.max_sqe;
uresp.max_recv_wr = dev->attr.max_rqe;
@@ -356,10 +311,6 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
ctx->dev = dev;
- rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size);
- if (rc)
- return rc;
-
DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
&ctx->ibucontext);
return 0;
@@ -368,66 +319,64 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
void qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
{
struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
- struct qedr_mm *mm, *tmp;
DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
uctx);
uctx->dev->ops->rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi);
-
- list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
- DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
- "deleted (addr=0x%llx,len=0x%lx) for ctx=%p\n",
- mm->key.phy_addr, mm->key.len, uctx);
- list_del(&mm->entry);
- kfree(mm);
- }
}
-int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma)
{
- struct qedr_ucontext *ucontext = get_qedr_ucontext(context);
- struct qedr_dev *dev = get_qedr_dev(context->device);
- unsigned long phys_addr = vma->vm_pgoff << PAGE_SHIFT;
- unsigned long len = (vma->vm_end - vma->vm_start);
- unsigned long dpi_start;
+ struct ib_device *dev = ucontext->device;
+ u64 length = vma->vm_end - vma->vm_start;
+ u64 key = vma->vm_pgoff << PAGE_SHIFT;
+ struct rdma_user_mmap_entry *entry;
+ u64 pfn;
+ int err;
- dpi_start = dev->db_phys_addr + (ucontext->dpi * ucontext->dpi_size);
+ ibdev_dbg(dev,
+ "start %#lx, end %#lx, length = %#llx, key = %#llx\n",
+ vma->vm_start, vma->vm_end, length, key);
- DP_DEBUG(dev, QEDR_MSG_INIT,
- "mmap invoked with vm_start=0x%pK, vm_end=0x%pK,vm_pgoff=0x%pK; dpi_start=0x%pK dpi_size=0x%x\n",
- (void *)vma->vm_start, (void *)vma->vm_end,
- (void *)vma->vm_pgoff, (void *)dpi_start, ucontext->dpi_size);
-
- if ((vma->vm_start & (PAGE_SIZE - 1)) || (len & (PAGE_SIZE - 1))) {
- DP_ERR(dev,
- "failed mmap, addresses must be page aligned: start=0x%pK, end=0x%pK\n",
- (void *)vma->vm_start, (void *)vma->vm_end);
+ if (length % PAGE_SIZE != 0 || !(vma->vm_flags & VM_SHARED)) {
+ ibdev_dbg(dev,
+ "length[%#llx] is not page size aligned[%#lx] or VM_SHARED is not set [%#lx]\n",
+ length, PAGE_SIZE, vma->vm_flags);
return -EINVAL;
}
- if (!qedr_search_mmap(ucontext, phys_addr, len)) {
- DP_ERR(dev, "failed mmap, vm_pgoff=0x%lx is not authorized\n",
- vma->vm_pgoff);
- return -EINVAL;
+ if (vma->vm_flags & VM_EXEC) {
+ ibdev_dbg(dev, "Mapping executable pages is not permitted\n");
+ return -EPERM;
}
+ vma->vm_flags &= ~VM_MAYEXEC;
- if (phys_addr < dpi_start ||
- ((phys_addr + len) > (dpi_start + ucontext->dpi_size))) {
- DP_ERR(dev,
- "failed mmap, pages are outside of dpi; page address=0x%pK, dpi_start=0x%pK, dpi_size=0x%x\n",
- (void *)phys_addr, (void *)dpi_start,
- ucontext->dpi_size);
+ entry = rdma_user_mmap_entry_get(ucontext, key, length);
+ if (!entry) {
+ ibdev_dbg(dev, "key[%#llx] does not have valid entry\n",
+ key);
return -EINVAL;
}
- if (vma->vm_flags & VM_READ) {
- DP_ERR(dev, "failed mmap, cannot map doorbell bar for read\n");
- return -EINVAL;
+ ibdev_dbg(dev,
+ "Mapping address[%#llx], length[%#llx], mmap_flag[%d]\n",
+ entry->address, length, entry->mmap_flag);
+
+ pfn = entry->address >> PAGE_SHIFT;
+ switch (entry->mmap_flag) {
+ case QEDR_USER_MMAP_IO_WC:
+ err = rdma_user_mmap_io(ucontext, vma, pfn, length,
+ pgprot_writecombine(vma->vm_page_prot));
+ break;
+ default:
+ err = -EINVAL;
}
- vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
- return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, len,
- vma->vm_page_prot);
+ ibdev_dbg(dev,
+ "Couldn't mmap address[%#llx] length[%#llx] mmap_flag[%d] err[%d]\n",
+ entry->address, length, entry->mmap_flag, err);
+
+ return err;
}
int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h
index 9aaa90283d6e..724d0983e972 100644
--- a/drivers/infiniband/hw/qedr/verbs.h
+++ b/drivers/infiniband/hw/qedr/verbs.h
@@ -46,7 +46,7 @@ int qedr_query_pkey(struct ib_device *, u8 port, u16 index, u16 *pkey);
int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata);
void qedr_dealloc_ucontext(struct ib_ucontext *uctx);
-int qedr_mmap(struct ib_ucontext *, struct vm_area_struct *vma);
+int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma);
int qedr_alloc_pd(struct ib_pd *pd, struct ib_udata *udata);
void qedr_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
--
2.14.5
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH v5 rdma-next 4/6] qed*: Change dpi_addr to be denoted with __iomem
2019-07-08 9:14 [PATCH v5 rdma-next 0/6] RDMA/qedr: Use the doorbell overflow recovery mechanism for RDMA Michal Kalderon
` (2 preceding siblings ...)
2019-07-08 9:15 ` [PATCH v5 rdma-next 3/6] RDMA/qedr: Use the common mmap API Michal Kalderon
@ 2019-07-08 9:15 ` Michal Kalderon
2019-07-08 9:15 ` [PATCH v5 rdma-next 5/6] RDMA/qedr: Add doorbell overflow recovery support Michal Kalderon
2019-07-08 9:15 ` [PATCH v5 rdma-next 6/6] RDMA/qedr: Add iWARP doorbell " Michal Kalderon
5 siblings, 0 replies; 14+ messages in thread
From: Michal Kalderon @ 2019-07-08 9:15 UTC (permalink / raw)
To: michal.kalderon, ariel.elior, jgg, dledford, galpress
Cc: linux-rdma, davem, netdev
Several casts were required around dpi_addr parameter in qed_rdma_if.h
This is an address on the doorbell bar and should therefore be marked
with __iomem.
Reported-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
---
drivers/infiniband/hw/qedr/main.c | 2 +-
drivers/infiniband/hw/qedr/qedr.h | 2 +-
drivers/net/ethernet/qlogic/qed/qed_rdma.c | 5 ++---
include/linux/qed/qed_rdma_if.h | 2 +-
4 files changed, 5 insertions(+), 6 deletions(-)
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index a0a7ba0a5af4..3db4b6ba5ad6 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -815,7 +815,7 @@ static int qedr_init_hw(struct qedr_dev *dev)
if (rc)
goto out;
- dev->db_addr = (void __iomem *)(uintptr_t)out_params.dpi_addr;
+ dev->db_addr = out_params.dpi_addr;
dev->db_phys_addr = out_params.dpi_phys_addr;
dev->db_size = out_params.dpi_size;
dev->dpi = out_params.dpi;
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index 97c90d1e525d..7e80ce521d8d 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -227,7 +227,7 @@ struct qedr_ucontext {
struct ib_ucontext ibucontext;
struct qedr_dev *dev;
struct qedr_pd *pd;
- u64 dpi_addr;
+ void __iomem *dpi_addr;
u64 dpi_phys_addr;
u32 dpi_size;
u16 dpi;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
index 7873d6dfd91f..fb3fe60a1a68 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
@@ -799,9 +799,8 @@ static int qed_rdma_add_user(void *rdma_cxt,
/* Calculate the corresponding DPI address */
dpi_start_offset = p_hwfn->dpi_start_offset;
- out_params->dpi_addr = (u64)((u8 __iomem *)p_hwfn->doorbells +
- dpi_start_offset +
- ((out_params->dpi) * p_hwfn->dpi_size));
+ out_params->dpi_addr = p_hwfn->doorbells + dpi_start_offset +
+ out_params->dpi * p_hwfn->dpi_size;
out_params->dpi_phys_addr = p_hwfn->cdev->db_phys_addr +
dpi_start_offset +
diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h
index d15f8e4815e3..834166809a6c 100644
--- a/include/linux/qed/qed_rdma_if.h
+++ b/include/linux/qed/qed_rdma_if.h
@@ -225,7 +225,7 @@ struct qed_rdma_start_in_params {
struct qed_rdma_add_user_out_params {
u16 dpi;
- u64 dpi_addr;
+ void __iomem *dpi_addr;
u64 dpi_phys_addr;
u32 dpi_size;
u16 wid_count;
--
2.14.5
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH v5 rdma-next 5/6] RDMA/qedr: Add doorbell overflow recovery support
2019-07-08 9:14 [PATCH v5 rdma-next 0/6] RDMA/qedr: Use the doorbell overflow recovery mechanism for RDMA Michal Kalderon
` (3 preceding siblings ...)
2019-07-08 9:15 ` [PATCH v5 rdma-next 4/6] qed*: Change dpi_addr to be denoted with __iomem Michal Kalderon
@ 2019-07-08 9:15 ` Michal Kalderon
2019-07-08 9:15 ` [PATCH v5 rdma-next 6/6] RDMA/qedr: Add iWARP doorbell " Michal Kalderon
5 siblings, 0 replies; 14+ messages in thread
From: Michal Kalderon @ 2019-07-08 9:15 UTC (permalink / raw)
To: michal.kalderon, ariel.elior, jgg, dledford, galpress
Cc: linux-rdma, davem, netdev
Use the doorbell recovery mechanism to register rdma related doorbells
that will be restored in case there is a doorbell overflow attention.
Signed-off-by: Ariel Elior <ariel.elior@marvell.com>
Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
---
drivers/infiniband/hw/qedr/main.c | 1 +
drivers/infiniband/hw/qedr/qedr.h | 7 +
drivers/infiniband/hw/qedr/verbs.c | 273 ++++++++++++++++++++++++++++++++-----
drivers/infiniband/hw/qedr/verbs.h | 2 +
include/uapi/rdma/qedr-abi.h | 25 ++++
5 files changed, 273 insertions(+), 35 deletions(-)
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index 3db4b6ba5ad6..34225c88f03d 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -206,6 +206,7 @@ static const struct ib_device_ops qedr_dev_ops = {
.get_link_layer = qedr_link_layer,
.map_mr_sg = qedr_map_mr_sg,
.mmap = qedr_mmap,
+ .mmap_free = qedr_mmap_free,
.modify_port = qedr_modify_port,
.modify_qp = qedr_modify_qp,
.modify_srq = qedr_modify_srq,
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index 7e80ce521d8d..8aed24b32de6 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -231,6 +231,7 @@ struct qedr_ucontext {
u64 dpi_phys_addr;
u32 dpi_size;
u16 dpi;
+ bool db_rec;
};
union db_prod64 {
@@ -258,6 +259,12 @@ struct qedr_userq {
struct qedr_pbl *pbl_tbl;
u64 buf_addr;
size_t buf_len;
+
+ /* doorbell recovery */
+ void __iomem *db_addr;
+ struct qedr_user_db_rec *db_rec_data;
+ u64 db_rec_phys;
+ u64 db_rec_key;
};
struct qedr_cq {
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index f33f0f1e7d76..15221d9c7773 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -60,6 +60,7 @@
enum {
QEDR_USER_MMAP_IO_WC = 0,
+ QEDR_USER_MMAP_PHYS_PAGE,
};
static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
@@ -266,6 +267,7 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
int rc;
struct qedr_ucontext *ctx = get_qedr_ucontext(uctx);
struct qedr_alloc_ucontext_resp uresp = {};
+ struct qedr_alloc_ucontext_req ureq = {};
struct qedr_dev *dev = get_qedr_dev(ibdev);
struct qed_rdma_add_user_out_params oparams;
u64 key;
@@ -273,6 +275,17 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
if (!udata)
return -EFAULT;
+ if (udata->inlen) {
+ rc = ib_copy_from_udata(&ureq, udata,
+ min(sizeof(ureq), udata->inlen));
+ if (rc) {
+ DP_ERR(dev, "Problem copying data from user space\n");
+ return -EFAULT;
+ }
+
+ ctx->db_rec = !!(ureq.context_flags & QEDR_ALLOC_UCTX_DB_REC);
+ }
+
rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
if (rc) {
DP_ERR(dev,
@@ -325,6 +338,13 @@ void qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
uctx->dev->ops->rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi);
}
+void qedr_mmap_free(u64 address, u64 length, u8 mmap_flag)
+{
+ /* DMA mapping is already gone, now free the pages */
+ if (mmap_flag == QEDR_USER_MMAP_PHYS_PAGE)
+ free_page((unsigned long)phys_to_virt(address));
+}
+
int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma)
{
struct ib_device *dev = ucontext->device;
@@ -368,6 +388,11 @@ int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma)
err = rdma_user_mmap_io(ucontext, vma, pfn, length,
pgprot_writecombine(vma->vm_page_prot));
break;
+ case QEDR_USER_MMAP_PHYS_PAGE:
+ err = vm_insert_page(vma, vma->vm_start, pfn_to_page(pfn));
+ if (err)
+ break;
+ break;
default:
err = -EINVAL;
}
@@ -606,16 +631,48 @@ static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
}
}
+static int qedr_db_recovery_add(struct qedr_dev *dev,
+ void __iomem *db_addr,
+ void *db_data,
+ enum qed_db_rec_width db_width,
+ enum qed_db_rec_space db_space)
+{
+ if (!db_data) {
+ DP_DEBUG(dev, QEDR_MSG_INIT, "avoiding db rec since old lib\n");
+ return 0;
+ }
+
+ return dev->ops->common->db_recovery_add(dev->cdev, db_addr, db_data,
+ db_width, db_space);
+}
+
+static void qedr_db_recovery_del(struct qedr_dev *dev,
+ void __iomem *db_addr,
+ void *db_data)
+{
+ if (!db_data) {
+ DP_DEBUG(dev, QEDR_MSG_INIT, "avoiding db rec since old lib\n");
+ return;
+ }
+
+ /* Ignore return code as there is not much we can do about it. Error
+ * log will be printed inside.
+ */
+ dev->ops->common->db_recovery_del(dev->cdev, db_addr, db_data);
+}
+
static int qedr_copy_cq_uresp(struct qedr_dev *dev,
- struct qedr_cq *cq, struct ib_udata *udata)
+ struct qedr_cq *cq, struct ib_udata *udata,
+ u32 db_offset)
{
struct qedr_create_cq_uresp uresp;
int rc;
memset(&uresp, 0, sizeof(uresp));
- uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
+ uresp.db_offset = db_offset;
uresp.icid = cq->icid;
+ uresp.db_rec_addr = cq->q.db_rec_key;
rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
if (rc)
@@ -643,10 +700,42 @@ static inline int qedr_align_cq_entries(int entries)
return aligned_size / QEDR_CQE_SIZE;
}
+static int qedr_init_user_db_rec(struct ib_udata *udata,
+ struct qedr_dev *dev, struct qedr_userq *q,
+ bool requires_db_rec)
+{
+ struct qedr_ucontext *uctx =
+ rdma_udata_to_drv_context(udata, struct qedr_ucontext,
+ ibucontext);
+
+ /* Aborting for non doorbell userqueue (SRQ) or non-supporting lib */
+ if (requires_db_rec == 0 || !uctx->db_rec)
+ return 0;
+
+ /* Allocate a page for doorbell recovery, add to mmap ) */
+ q->db_rec_data = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!q->db_rec_data) {
+ DP_ERR(dev,
+ "get_free_page failed\n");
+ return -ENOMEM;
+ }
+
+ q->db_rec_phys = virt_to_phys(q->db_rec_data);
+ q->db_rec_key = rdma_user_mmap_entry_insert(&uctx->ibucontext, q,
+ q->db_rec_phys,
+ PAGE_SIZE,
+ QEDR_USER_MMAP_PHYS_PAGE);
+ if (q->db_rec_key == RDMA_USER_MMAP_INVALID)
+ return -ENOMEM;
+
+ return 0;
+}
+
static inline int qedr_init_user_queue(struct ib_udata *udata,
struct qedr_dev *dev,
struct qedr_userq *q, u64 buf_addr,
- size_t buf_len, int access, int dmasync,
+ size_t buf_len, bool requires_db_rec,
+ int access, int dmasync,
int alloc_and_init)
{
u32 fw_pages;
@@ -684,7 +773,8 @@ static inline int qedr_init_user_queue(struct ib_udata *udata,
}
}
- return 0;
+ /* mmap the user address used to store doorbell data for recovery */
+ return qedr_init_user_db_rec(udata, dev, q, requires_db_rec);
err0:
ib_umem_release(q->umem);
@@ -770,6 +860,7 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
int entries = attr->cqe;
struct qedr_cq *cq = get_qedr_cq(ibcq);
int chain_entries;
+ u32 db_offset;
int page_cnt;
u64 pbl_ptr;
u16 icid;
@@ -789,8 +880,12 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
chain_entries = qedr_align_cq_entries(entries);
chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
+ /* calc db offset. user will add DPI base, kernel will add db addr */
+ db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
+
if (udata) {
- if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
+ if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
+ udata->inlen))) {
DP_ERR(dev,
"create cq: problem copying data from user space\n");
goto err0;
@@ -805,8 +900,9 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
cq->cq_type = QEDR_CQ_TYPE_USER;
rc = qedr_init_user_queue(udata, dev, &cq->q, ureq.addr,
- ureq.len, IB_ACCESS_LOCAL_WRITE, 1,
- 1);
+ ureq.len, true,
+ IB_ACCESS_LOCAL_WRITE,
+ 1, 1);
if (rc)
goto err0;
@@ -814,6 +910,7 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
page_cnt = cq->q.pbl_info.num_pbes;
cq->ibcq.cqe = chain_entries;
+ cq->q.db_addr = ctx->dpi_addr + db_offset;
} else {
cq->cq_type = QEDR_CQ_TYPE_KERNEL;
@@ -844,14 +941,21 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
spin_lock_init(&cq->cq_lock);
if (udata) {
- rc = qedr_copy_cq_uresp(dev, cq, udata);
+ rc = qedr_copy_cq_uresp(dev, cq, udata, db_offset);
+ if (rc)
+ goto err3;
+
+ rc = qedr_db_recovery_add(dev, cq->q.db_addr,
+ &cq->q.db_rec_data->db_data,
+ DB_REC_WIDTH_64B,
+ DB_REC_USER);
if (rc)
goto err3;
+
} else {
/* Generate doorbell address. */
- cq->db_addr = dev->db_addr +
- DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
cq->db.data.icid = cq->icid;
+ cq->db_addr = dev->db_addr + db_offset;
cq->db.data.params = DB_AGG_CMD_SET <<
RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
@@ -861,6 +965,11 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
cq->latest_cqe = NULL;
consume_cqe(cq);
cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
+
+ rc = qedr_db_recovery_add(dev, cq->db_addr, &cq->db.data,
+ DB_REC_WIDTH_64B, DB_REC_KERNEL);
+ if (rc)
+ goto err3;
}
DP_DEBUG(dev, QEDR_MSG_CQ,
@@ -879,8 +988,18 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
else
dev->ops->common->chain_free(dev->cdev, &cq->pbl);
err1:
- if (udata)
+ if (udata) {
ib_umem_release(cq->q.umem);
+ if (cq->q.db_rec_data) {
+ qedr_db_recovery_del(dev, cq->q.db_addr,
+ &cq->q.db_rec_data->db_data);
+ if (cq->q.db_rec_key == RDMA_USER_MMAP_INVALID)
+ free_page((unsigned long)cq->q.db_rec_data);
+ /* o/w will be freed by ib_uverbs on context free */
+ }
+ } else {
+ qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data);
+ }
err0:
return -EINVAL;
}
@@ -911,8 +1030,10 @@ void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
cq->destroyed = 1;
/* GSIs CQs are handled by driver, so they don't exist in the FW */
- if (cq->cq_type == QEDR_CQ_TYPE_GSI)
+ if (cq->cq_type == QEDR_CQ_TYPE_GSI) {
+ qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data);
return;
+ }
iparams.icid = cq->icid;
dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
@@ -921,6 +1042,12 @@ void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
if (udata) {
qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
ib_umem_release(cq->q.umem);
+
+ if (cq->q.db_rec_data)
+ qedr_db_recovery_del(dev, cq->q.db_addr,
+ &cq->q.db_rec_data->db_data);
+ } else {
+ qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data);
}
/* We don't want the IRQ handler to handle a non-existing CQ so we
@@ -1085,8 +1212,8 @@ static int qedr_copy_srq_uresp(struct qedr_dev *dev,
}
static void qedr_copy_rq_uresp(struct qedr_dev *dev,
- struct qedr_create_qp_uresp *uresp,
- struct qedr_qp *qp)
+ struct qedr_create_qp_uresp *uresp,
+ struct qedr_qp *qp)
{
/* iWARP requires two doorbells per RQ. */
if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
@@ -1099,6 +1226,7 @@ static void qedr_copy_rq_uresp(struct qedr_dev *dev,
}
uresp->rq_icid = qp->icid;
+ uresp->rq_db_rec_addr = qp->urq.db_rec_key;
}
static void qedr_copy_sq_uresp(struct qedr_dev *dev,
@@ -1112,22 +1240,24 @@ static void qedr_copy_sq_uresp(struct qedr_dev *dev,
uresp->sq_icid = qp->icid;
else
uresp->sq_icid = qp->icid + 1;
+
+ uresp->sq_db_rec_addr = qp->usq.db_rec_key;
}
static int qedr_copy_qp_uresp(struct qedr_dev *dev,
- struct qedr_qp *qp, struct ib_udata *udata)
+ struct qedr_qp *qp, struct ib_udata *udata,
+ struct qedr_create_qp_uresp *uresp)
{
- struct qedr_create_qp_uresp uresp;
int rc;
- memset(&uresp, 0, sizeof(uresp));
- qedr_copy_sq_uresp(dev, &uresp, qp);
- qedr_copy_rq_uresp(dev, &uresp, qp);
+ memset(uresp, 0, sizeof(*uresp));
+ qedr_copy_sq_uresp(dev, uresp, qp);
+ qedr_copy_rq_uresp(dev, uresp, qp);
- uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
- uresp.qp_id = qp->qp_id;
+ uresp->atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
+ uresp->qp_id = qp->qp_id;
- rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ rc = qedr_ib_copy_to_udata(udata, uresp, sizeof(*uresp));
if (rc)
DP_ERR(dev,
"create qp: failed a copy to user space with qp icid=0x%x.\n",
@@ -1171,16 +1301,35 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev,
qp->sq.max_sges, qp->sq_cq->icid);
}
-static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
+static int qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
{
+ int rc;
+
qp->sq.db = dev->db_addr +
DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
qp->sq.db_data.data.icid = qp->icid + 1;
+ rc = qedr_db_recovery_add(dev, qp->sq.db,
+ &qp->sq.db_data,
+ DB_REC_WIDTH_32B,
+ DB_REC_KERNEL);
+ if (rc)
+ return rc;
+
if (!qp->srq) {
qp->rq.db = dev->db_addr +
DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
qp->rq.db_data.data.icid = qp->icid;
+
+ rc = qedr_db_recovery_add(dev, qp->rq.db,
+ &qp->rq.db_data,
+ DB_REC_WIDTH_32B,
+ DB_REC_KERNEL);
+ if (rc)
+ qedr_db_recovery_del(dev, qp->sq.db,
+ &qp->sq.db_data);
}
+
+ return rc;
}
static int qedr_check_srq_params(struct qedr_dev *dev,
@@ -1234,7 +1383,7 @@ static int qedr_init_srq_user_params(struct ib_udata *udata,
int rc;
rc = qedr_init_user_queue(udata, srq->dev, &srq->usrq, ureq->srq_addr,
- ureq->srq_len, access, dmasync, 1);
+ ureq->srq_len, false, access, dmasync, 1);
if (rc)
return rc;
@@ -1330,7 +1479,8 @@ int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr,
hw_srq->max_sges = init_attr->attr.max_sge;
if (udata) {
- if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
+ if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
+ udata->inlen))) {
DP_ERR(dev,
"create srq: problem copying data from user space\n");
goto err0;
@@ -1526,6 +1676,14 @@ static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp)
ib_umem_release(qp->urq.umem);
qp->urq.umem = NULL;
+
+ if (qp->usq.db_rec_data)
+ qedr_db_recovery_del(dev, qp->usq.db_addr,
+ &qp->usq.db_rec_data->db_data);
+
+ if (qp->urq.db_rec_data)
+ qedr_db_recovery_del(dev, qp->urq.db_addr,
+ &qp->urq.db_rec_data->db_data);
}
static int qedr_create_user_qp(struct qedr_dev *dev,
@@ -1537,12 +1695,14 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
struct qed_rdma_create_qp_in_params in_params;
struct qed_rdma_create_qp_out_params out_params;
struct qedr_pd *pd = get_qedr_pd(ibpd);
+ struct qedr_create_qp_uresp uresp;
+ struct qedr_ucontext *ctx = NULL;
struct qedr_create_qp_ureq ureq;
int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1);
int rc = -EINVAL;
memset(&ureq, 0, sizeof(ureq));
- rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq));
+ rc = ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), udata->inlen));
if (rc) {
DP_ERR(dev, "Problem copying data from user space\n");
return rc;
@@ -1550,14 +1710,16 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
/* SQ - read access only (0), dma sync not required (0) */
rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr,
- ureq.sq_len, 0, 0, alloc_and_init);
+ ureq.sq_len, true, 0, 0,
+ alloc_and_init);
if (rc)
return rc;
if (!qp->srq) {
/* RQ - read access only (0), dma sync not required (0) */
rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr,
- ureq.rq_len, 0, 0, alloc_and_init);
+ ureq.rq_len, true,
+ 0, 0, alloc_and_init);
if (rc)
return rc;
}
@@ -1587,13 +1749,31 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
qp->qp_id = out_params.qp_id;
qp->icid = out_params.icid;
- rc = qedr_copy_qp_uresp(dev, qp, udata);
+ rc = qedr_copy_qp_uresp(dev, qp, udata, &uresp);
if (rc)
goto err;
+ /* db offset was calculated in copy_qp_uresp, now set in the user q */
+ ctx = pd->uctx;
+ qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset;
+ qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset;
+
+ rc = qedr_db_recovery_add(dev, qp->usq.db_addr,
+ &qp->usq.db_rec_data->db_data,
+ DB_REC_WIDTH_32B,
+ DB_REC_USER);
+ if (rc)
+ goto err;
+
+ rc = qedr_db_recovery_add(dev, qp->urq.db_addr,
+ &qp->urq.db_rec_data->db_data,
+ DB_REC_WIDTH_32B,
+ DB_REC_USER);
+ if (rc)
+ goto err;
qedr_qp_user_print(dev, qp);
- return 0;
+ return rc;
err:
rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
if (rc)
@@ -1604,12 +1784,21 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
return rc;
}
-static void qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
+static int qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
{
+ int rc;
+
qp->sq.db = dev->db_addr +
DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
qp->sq.db_data.data.icid = qp->icid;
+ rc = qedr_db_recovery_add(dev, qp->sq.db,
+ &qp->sq.db_data,
+ DB_REC_WIDTH_32B,
+ DB_REC_KERNEL);
+ if (rc)
+ return rc;
+
qp->rq.db = dev->db_addr +
DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
qp->rq.db_data.data.icid = qp->icid;
@@ -1617,6 +1806,13 @@ static void qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
qp->rq.iwarp_db2_data.data.icid = qp->icid;
qp->rq.iwarp_db2_data.data.value = DQ_TCM_IWARP_POST_RQ_CF_CMD;
+
+ rc = qedr_db_recovery_add(dev, qp->rq.db,
+ &qp->rq.db_data,
+ DB_REC_WIDTH_32B,
+ DB_REC_KERNEL);
+
+ return rc;
}
static int
@@ -1664,8 +1860,7 @@ qedr_roce_create_kernel_qp(struct qedr_dev *dev,
qp->qp_id = out_params.qp_id;
qp->icid = out_params.icid;
- qedr_set_roce_db_info(dev, qp);
- return rc;
+ return qedr_set_roce_db_info(dev, qp);
}
static int
@@ -1723,8 +1918,7 @@ qedr_iwarp_create_kernel_qp(struct qedr_dev *dev,
qp->qp_id = out_params.qp_id;
qp->icid = out_params.icid;
- qedr_set_iwarp_db_info(dev, qp);
- return rc;
+ return qedr_set_iwarp_db_info(dev, qp);
err:
dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
@@ -1739,6 +1933,15 @@ static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
kfree(qp->rqe_wr_id);
+
+ /* GSI qp is not registered to db mechanism so no need to delete */
+ if (qp->qp_type == IB_QPT_GSI)
+ return;
+
+ qedr_db_recovery_del(dev, qp->sq.db, &qp->sq.db_data);
+
+ if (!qp->srq)
+ qedr_db_recovery_del(dev, qp->rq.db, &qp->rq.db_data);
}
static int qedr_create_kernel_qp(struct qedr_dev *dev,
diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h
index 724d0983e972..21f0ee356bef 100644
--- a/drivers/infiniband/hw/qedr/verbs.h
+++ b/drivers/infiniband/hw/qedr/verbs.h
@@ -47,6 +47,8 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata);
void qedr_dealloc_ucontext(struct ib_ucontext *uctx);
int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma);
+void qedr_mmap_free(u64 address, u64 length, u8 mmap_flag);
+
int qedr_alloc_pd(struct ib_pd *pd, struct ib_udata *udata);
void qedr_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
diff --git a/include/uapi/rdma/qedr-abi.h b/include/uapi/rdma/qedr-abi.h
index 7a10b3a325fa..c022ee26089b 100644
--- a/include/uapi/rdma/qedr-abi.h
+++ b/include/uapi/rdma/qedr-abi.h
@@ -38,6 +38,15 @@
#define QEDR_ABI_VERSION (8)
/* user kernel communication data structures. */
+enum qedr_alloc_ucontext_flags {
+ QEDR_ALLOC_UCTX_RESERVED = 1 << 0,
+ QEDR_ALLOC_UCTX_DB_REC = 1 << 1
+};
+
+struct qedr_alloc_ucontext_req {
+ __u32 context_flags;
+ __u32 reserved;
+};
struct qedr_alloc_ucontext_resp {
__aligned_u64 db_pa;
@@ -74,6 +83,7 @@ struct qedr_create_cq_uresp {
__u32 db_offset;
__u16 icid;
__u16 reserved;
+ __aligned_u64 db_rec_addr;
};
struct qedr_create_qp_ureq {
@@ -109,6 +119,13 @@ struct qedr_create_qp_uresp {
__u32 rq_db2_offset;
__u32 reserved;
+
+ /* address of SQ doorbell recovery user entry */
+ __aligned_u64 sq_db_rec_addr;
+
+ /* address of RQ doorbell recovery user entry */
+ __aligned_u64 rq_db_rec_addr;
+
};
struct qedr_create_srq_ureq {
@@ -128,4 +145,12 @@ struct qedr_create_srq_uresp {
__u32 reserved1;
};
+/* doorbell recovery entry allocated and populated by userspace doorbelling
+ * entities and mapped to kernel. Kernel uses this to register doorbell
+ * information with doorbell drop recovery mechanism.
+ */
+struct qedr_user_db_rec {
+ __aligned_u64 db_data; /* doorbell data */
+};
+
#endif /* __QEDR_USER_H__ */
--
2.14.5
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH v5 rdma-next 6/6] RDMA/qedr: Add iWARP doorbell recovery support
2019-07-08 9:14 [PATCH v5 rdma-next 0/6] RDMA/qedr: Use the doorbell overflow recovery mechanism for RDMA Michal Kalderon
` (4 preceding siblings ...)
2019-07-08 9:15 ` [PATCH v5 rdma-next 5/6] RDMA/qedr: Add doorbell overflow recovery support Michal Kalderon
@ 2019-07-08 9:15 ` Michal Kalderon
5 siblings, 0 replies; 14+ messages in thread
From: Michal Kalderon @ 2019-07-08 9:15 UTC (permalink / raw)
To: michal.kalderon, ariel.elior, jgg, dledford, galpress
Cc: linux-rdma, davem, netdev
This patch adds the iWARP specific doorbells to the doorbell
recovery mechanism
Signed-off-by: Ariel Elior <ariel.elior@marvell.com>
Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
---
drivers/infiniband/hw/qedr/qedr.h | 12 +++++++-----
drivers/infiniband/hw/qedr/verbs.c | 37 ++++++++++++++++++++++++++++++++++++-
2 files changed, 43 insertions(+), 6 deletions(-)
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index 8aed24b32de6..dc9ebbf625d2 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -234,6 +234,11 @@ struct qedr_ucontext {
bool db_rec;
};
+union db_prod32 {
+ struct rdma_pwm_val16_data data;
+ u32 raw;
+};
+
union db_prod64 {
struct rdma_pwm_val32_data data;
u64 raw;
@@ -265,6 +270,8 @@ struct qedr_userq {
struct qedr_user_db_rec *db_rec_data;
u64 db_rec_phys;
u64 db_rec_key;
+ void __iomem *db_rec_db2_addr;
+ union db_prod32 db_rec_db2_data;
};
struct qedr_cq {
@@ -300,11 +307,6 @@ struct qedr_pd {
struct qedr_ucontext *uctx;
};
-union db_prod32 {
- struct rdma_pwm_val16_data data;
- u32 raw;
-};
-
struct qedr_qp_hwq_info {
/* WQE Elements */
struct qed_chain pbl;
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 15221d9c7773..dfe5f6c42a2f 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -1684,6 +1684,10 @@ static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp)
if (qp->urq.db_rec_data)
qedr_db_recovery_del(dev, qp->urq.db_addr,
&qp->urq.db_rec_data->db_data);
+
+ if (rdma_protocol_iwarp(&dev->ibdev, 1))
+ qedr_db_recovery_del(dev, qp->urq.db_rec_db2_addr,
+ &qp->urq.db_rec_db2_data);
}
static int qedr_create_user_qp(struct qedr_dev *dev,
@@ -1758,6 +1762,17 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset;
qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset;
+ if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
+ qp->urq.db_rec_db2_addr = ctx->dpi_addr + uresp.rq_db2_offset;
+
+ /* calculate the db_rec_db2 data since it is constant so no
+ * need to reflect from user
+ */
+ qp->urq.db_rec_db2_data.data.icid = cpu_to_le16(qp->icid);
+ qp->urq.db_rec_db2_data.data.value =
+ cpu_to_le16(DQ_TCM_IWARP_POST_RQ_CF_CMD);
+ }
+
rc = qedr_db_recovery_add(dev, qp->usq.db_addr,
&qp->usq.db_rec_data->db_data,
DB_REC_WIDTH_32B,
@@ -1771,6 +1786,15 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
DB_REC_USER);
if (rc)
goto err;
+
+ if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
+ rc = qedr_db_recovery_add(dev, qp->urq.db_rec_db2_addr,
+ &qp->urq.db_rec_db2_data,
+ DB_REC_WIDTH_32B,
+ DB_REC_USER);
+ if (rc)
+ goto err;
+ }
qedr_qp_user_print(dev, qp);
return rc;
@@ -1811,7 +1835,13 @@ static int qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
&qp->rq.db_data,
DB_REC_WIDTH_32B,
DB_REC_KERNEL);
+ if (rc)
+ return rc;
+ rc = qedr_db_recovery_add(dev, qp->rq.iwarp_db2,
+ &qp->rq.iwarp_db2_data,
+ DB_REC_WIDTH_32B,
+ DB_REC_KERNEL);
return rc;
}
@@ -1940,8 +1970,13 @@ static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
qedr_db_recovery_del(dev, qp->sq.db, &qp->sq.db_data);
- if (!qp->srq)
+ if (!qp->srq) {
qedr_db_recovery_del(dev, qp->rq.db, &qp->rq.db_data);
+
+ if (rdma_protocol_iwarp(&dev->ibdev, 1))
+ qedr_db_recovery_del(dev, qp->rq.iwarp_db2,
+ &qp->rq.iwarp_db2_data);
+ }
}
static int qedr_create_kernel_qp(struct qedr_dev *dev,
--
2.14.5
^ permalink raw reply related [flat|nested] 14+ messages in thread