From mboxrd@z Thu Jan 1 00:00:00 1970 From: Anatoly Burakov Subject: [PATCH v4 62/70] eal: add support for callbacks on memory hotplug Date: Sun, 8 Apr 2018 21:18:35 +0100 Message-ID: <1ad1e5a365d1b6c87a4d84a9cb5c63f4cd492eb7.1523218215.git.anatoly.burakov@intel.com> References: Cc: keith.wiles@intel.com, jianfeng.tan@intel.com, andras.kovacs@ericsson.com, laszlo.vadkeri@ericsson.com, benjamin.walker@intel.com, bruce.richardson@intel.com, thomas@monjalon.net, konstantin.ananyev@intel.com, kuralamudhan.ramakrishnan@intel.com, louise.m.daly@intel.com, nelio.laranjeiro@6wind.com, yskoh@mellanox.com, pepperjo@japf.ch, jerin.jacob@caviumnetworks.com, hemant.agrawal@nxp.com, olivier.matz@6wind.com, shreyansh.jain@nxp.com, gowrishankar.m@linux.vnet.ibm.com To: dev@dpdk.org Return-path: Received: from mga18.intel.com (mga18.intel.com [134.134.136.126]) by dpdk.org (Postfix) with ESMTP id 7E6CC1B69E for ; Sun, 8 Apr 2018 22:18:59 +0200 (CEST) In-Reply-To: In-Reply-To: References: List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Each process will have its own callbacks. Callbacks will indicate whether it's allocation and deallocation that's happened, and will also provide start VA address and length of allocated block. Since memory hotplug isn't supported on FreeBSD and in legacy mem mode, it will not be possible to register them in either. Callbacks are called whenever something happens to the memory map of current process, therefore at those times memory hotplug subsystem is write-locked, which leads to deadlocks on attempt to use these functions. Document the limitation. Signed-off-by: Anatoly Burakov --- Notes: v4: - Document limitation about potential deadlocks. Should we provide thread-unsafe versions of these functions as well? v3: - Made API experimental - Compile fixes lib/librte_eal/common/eal_common_memalloc.c | 133 ++++++++++++++++++++++++++++ lib/librte_eal/common/eal_common_memory.c | 28 ++++++ lib/librte_eal/common/eal_memalloc.h | 11 +++ lib/librte_eal/common/include/rte_memory.h | 71 +++++++++++++++ lib/librte_eal/rte_eal_version.map | 2 + 5 files changed, 245 insertions(+) diff --git a/lib/librte_eal/common/eal_common_memalloc.c b/lib/librte_eal/common/eal_common_memalloc.c index 607ec3f..2d2d46f 100644 --- a/lib/librte_eal/common/eal_common_memalloc.c +++ b/lib/librte_eal/common/eal_common_memalloc.c @@ -2,16 +2,46 @@ * Copyright(c) 2017-2018 Intel Corporation */ +#include + +#include #include #include #include #include #include +#include #include "eal_private.h" #include "eal_internal_cfg.h" #include "eal_memalloc.h" +struct mem_event_callback_entry { + TAILQ_ENTRY(mem_event_callback_entry) next; + char name[RTE_MEM_EVENT_CALLBACK_NAME_LEN]; + rte_mem_event_callback_t clb; +}; + +/** Double linked list of actions. */ +TAILQ_HEAD(mem_event_callback_entry_list, mem_event_callback_entry); + +static struct mem_event_callback_entry_list mem_event_callback_list = + TAILQ_HEAD_INITIALIZER(mem_event_callback_list); + +static rte_rwlock_t mem_event_rwlock = RTE_RWLOCK_INITIALIZER; + +static struct mem_event_callback_entry * +find_mem_event_callback(const char *name) +{ + struct mem_event_callback_entry *r; + + TAILQ_FOREACH(r, &mem_event_callback_list, next) { + if (!strcmp(r->name, name)) + break; + } + return r; +} + bool eal_memalloc_is_contig(const struct rte_memseg_list *msl, void *start, size_t len) @@ -88,3 +118,106 @@ eal_memalloc_is_contig(const struct rte_memseg_list *msl, void *start, } return true; } + +int +eal_memalloc_mem_event_callback_register(const char *name, + rte_mem_event_callback_t clb) +{ + struct mem_event_callback_entry *entry; + int ret, len; + if (name == NULL || clb == NULL) { + rte_errno = EINVAL; + return -1; + } + len = strnlen(name, RTE_MEM_EVENT_CALLBACK_NAME_LEN); + if (len == 0) { + rte_errno = EINVAL; + return -1; + } else if (len == RTE_MEM_EVENT_CALLBACK_NAME_LEN) { + rte_errno = ENAMETOOLONG; + return -1; + } + rte_rwlock_write_lock(&mem_event_rwlock); + + entry = find_mem_event_callback(name); + if (entry != NULL) { + rte_errno = EEXIST; + ret = -1; + goto unlock; + } + + entry = malloc(sizeof(*entry)); + if (entry == NULL) { + rte_errno = ENOMEM; + ret = -1; + goto unlock; + } + + /* callback successfully created and is valid, add it to the list */ + entry->clb = clb; + snprintf(entry->name, RTE_MEM_EVENT_CALLBACK_NAME_LEN, "%s", name); + TAILQ_INSERT_TAIL(&mem_event_callback_list, entry, next); + + ret = 0; + + RTE_LOG(DEBUG, EAL, "Mem event callback '%s' registered\n", name); + +unlock: + rte_rwlock_write_unlock(&mem_event_rwlock); + return ret; +} + +int +eal_memalloc_mem_event_callback_unregister(const char *name) +{ + struct mem_event_callback_entry *entry; + int ret, len; + + if (name == NULL) { + rte_errno = EINVAL; + return -1; + } + len = strnlen(name, RTE_MEM_EVENT_CALLBACK_NAME_LEN); + if (len == 0) { + rte_errno = EINVAL; + return -1; + } else if (len == RTE_MEM_EVENT_CALLBACK_NAME_LEN) { + rte_errno = ENAMETOOLONG; + return -1; + } + rte_rwlock_write_lock(&mem_event_rwlock); + + entry = find_mem_event_callback(name); + if (entry == NULL) { + rte_errno = ENOENT; + ret = -1; + goto unlock; + } + TAILQ_REMOVE(&mem_event_callback_list, entry, next); + free(entry); + + ret = 0; + + RTE_LOG(DEBUG, EAL, "Mem event callback '%s' unregistered\n", name); + +unlock: + rte_rwlock_write_unlock(&mem_event_rwlock); + return ret; +} + +void +eal_memalloc_mem_event_notify(enum rte_mem_event event, const void *start, + size_t len) +{ + struct mem_event_callback_entry *entry; + + rte_rwlock_read_lock(&mem_event_rwlock); + + TAILQ_FOREACH(entry, &mem_event_callback_list, next) { + RTE_LOG(DEBUG, EAL, "Calling mem event callback %s", + entry->name); + entry->clb(event, start, len); + } + + rte_rwlock_read_unlock(&mem_event_rwlock); +} diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c index 2db3d8b..b9e6c03 100644 --- a/lib/librte_eal/common/eal_common_memory.c +++ b/lib/librte_eal/common/eal_common_memory.c @@ -623,6 +623,34 @@ dump_memseg(const struct rte_memseg_list *msl, const struct rte_memseg *ms, return 0; } +/* + * Defining here because declared in rte_memory.h, but the actual implementation + * is in eal_common_memalloc.c, like all other memalloc internals. + */ +int __rte_experimental +rte_mem_event_callback_register(const char *name, rte_mem_event_callback_t clb) +{ + /* FreeBSD boots with legacy mem enabled by default */ + if (internal_config.legacy_mem) { + RTE_LOG(DEBUG, EAL, "Registering mem event callbacks not supported\n"); + rte_errno = ENOTSUP; + return -1; + } + return eal_memalloc_mem_event_callback_register(name, clb); +} + +int __rte_experimental +rte_mem_event_callback_unregister(const char *name) +{ + /* FreeBSD boots with legacy mem enabled by default */ + if (internal_config.legacy_mem) { + RTE_LOG(DEBUG, EAL, "Registering mem event callbacks not supported\n"); + rte_errno = ENOTSUP; + return -1; + } + return eal_memalloc_mem_event_callback_unregister(name); +} + /* Dump the physical memory layout on console */ void rte_dump_physmem_layout(FILE *f) diff --git a/lib/librte_eal/common/eal_memalloc.h b/lib/librte_eal/common/eal_memalloc.h index 4a7b45c..4d27403 100644 --- a/lib/librte_eal/common/eal_memalloc.h +++ b/lib/librte_eal/common/eal_memalloc.h @@ -56,4 +56,15 @@ eal_memalloc_is_contig(const struct rte_memseg_list *msl, void *start, int eal_memalloc_sync_with_primary(void); +int +eal_memalloc_mem_event_callback_register(const char *name, + rte_mem_event_callback_t clb); + +int +eal_memalloc_mem_event_callback_unregister(const char *name); + +void +eal_memalloc_mem_event_notify(enum rte_mem_event event, const void *start, + size_t len); + #endif // EAL_MEMALLOC_H diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h index 55383c4..398ca55 100644 --- a/lib/librte_eal/common/include/rte_memory.h +++ b/lib/librte_eal/common/include/rte_memory.h @@ -136,6 +136,9 @@ rte_iova_t rte_mem_virt2iova(const void *virt); /** * Get virtual memory address corresponding to iova address. * + * @note This function read-locks the memory hotplug subsystem, and thus cannot + * be used within memory-related callback functions. + * * @param iova * The iova address. * @return @@ -203,6 +206,9 @@ typedef int (*rte_memseg_list_walk_t)(const struct rte_memseg_list *msl, /** * Walk list of all memsegs. * + * @note This function read-locks the memory hotplug subsystem, and thus cannot + * be used within memory-related callback functions. + * * @param func * Iterator function * @param arg @@ -218,6 +224,9 @@ rte_memseg_walk(rte_memseg_walk_t func, void *arg); /** * Walk each VA-contiguous area. * + * @note This function read-locks the memory hotplug subsystem, and thus cannot + * be used within memory-related callback functions. + * * @param func * Iterator function * @param arg @@ -233,6 +242,9 @@ rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg); /** * Walk each allocated memseg list. * + * @note This function read-locks the memory hotplug subsystem, and thus cannot + * be used within memory-related callback functions. + * * @param func * Iterator function * @param arg @@ -248,6 +260,9 @@ rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg); /** * Dump the physical memory layout to a file. * + * @note This function read-locks the memory hotplug subsystem, and thus cannot + * be used within memory-related callback functions. + * * @param f * A pointer to a file for output */ @@ -256,6 +271,9 @@ void rte_dump_physmem_layout(FILE *f); /** * Get the total amount of available physical memory. * + * @note This function read-locks the memory hotplug subsystem, and thus cannot + * be used within memory-related callback functions. + * * @return * The total amount of available physical memory in bytes. */ @@ -290,6 +308,59 @@ unsigned rte_memory_get_nrank(void); */ int rte_eal_using_phys_addrs(void); + +/** + * Enum indicating which kind of memory event has happened. Used by callbacks to + * distinguish between memory allocations and deallocations. + */ +enum rte_mem_event { + RTE_MEM_EVENT_ALLOC = 0, /**< Allocation event. */ + RTE_MEM_EVENT_FREE, /**< Deallocation event. */ +}; +#define RTE_MEM_EVENT_CALLBACK_NAME_LEN 64 +/**< maximum length of callback name */ + +/** + * Function typedef used to register callbacks for memory events. + */ +typedef void (*rte_mem_event_callback_t)(enum rte_mem_event event_type, + const void *addr, size_t len); + +/** + * Function used to register callbacks for memory events. + * + * @note callbacks will happen while memory hotplug subsystem is write-locked, + * therefore some functions (e.g. `rte_memseg_walk()`) will cause a + * deadlock when called from within such callbacks. + * + * @param name + * Name associated with specified callback to be added to the list. + * + * @param clb + * Callback function pointer. + * + * @return + * 0 on successful callback register + * -1 on unsuccessful callback register, with rte_errno value indicating + * reason for failure. + */ +int __rte_experimental +rte_mem_event_callback_register(const char *name, rte_mem_event_callback_t clb); + +/** + * Function used to unregister callbacks for memory events. + * + * @param name + * Name associated with specified callback to be removed from the list. + * + * @return + * 0 on successful callback unregister + * -1 on unsuccessful callback unregister, with rte_errno value indicating + * reason for failure. + */ +int __rte_experimental +rte_mem_event_callback_unregister(const char *name); + #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map index 23b339e..d1ac9ea 100644 --- a/lib/librte_eal/rte_eal_version.map +++ b/lib/librte_eal/rte_eal_version.map @@ -238,6 +238,8 @@ EXPERIMENTAL { rte_fbarray_set_used; rte_log_register_type_and_pick_level; rte_malloc_dump_heaps; + rte_mem_event_callback_register; + rte_mem_event_callback_unregister; rte_mem_iova2virt; rte_mem_virt2memseg; rte_mem_virt2memseg_list; -- 2.7.4