DPDK-dev Archive on lore.kernel.org
 help / color / Atom feed
* [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs
@ 2019-10-15  7:49 Joyce Kong
  2019-10-15  7:49 ` [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bit operation APIs Joyce Kong
                   ` (33 more replies)
  0 siblings, 34 replies; 70+ messages in thread
From: Joyce Kong @ 2019-10-15  7:49 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, ravi1.kumar, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, rmody, shshaikh, honnappa.nagarahalli, gavin.hu

There are a lot functions of bit operations scattered in
PMDs, consolidate them into a common API family and applied
in different PMDs to reduce code duplication.

Joyce Kong (5):
  lib/eal: implement the family of rte bit operation APIs
  net/axgbe: use common rte bit operation APIs instead
  net/bnx2x: use common rte bit operation APIs instead
  net/hinic: use common rte bit operation APIs instead
  net/qede: use common rte bit operation APIs instead

 drivers/net/axgbe/axgbe_common.h           |  29 +----
 drivers/net/axgbe/axgbe_ethdev.c           |  14 +-
 drivers/net/axgbe/axgbe_mdio.c             |  14 +-
 drivers/net/bnx2x/bnx2x.c                  | 202 +++++++++++++----------------
 drivers/net/bnx2x/bnx2x.h                  |   5 +-
 drivers/net/bnx2x/ecore_sp.h               |   8 +-
 drivers/net/hinic/base/hinic_compat.h      |  35 +----
 drivers/net/hinic/hinic_pmd_ethdev.c       |  16 +--
 drivers/net/qede/base/bcm_osal.c           |  20 ---
 drivers/net/qede/base/bcm_osal.h           |  10 +-
 lib/librte_eal/common/Makefile             |   1 +
 lib/librte_eal/common/include/rte_bitops.h |  56 ++++++++
 lib/librte_eal/common/meson.build          |   1 +
 13 files changed, 180 insertions(+), 231 deletions(-)
 create mode 100644 lib/librte_eal/common/include/rte_bitops.h

-- 
2.7.4


^ permalink raw reply	[flat|nested] 70+ messages in thread

* [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bit operation APIs
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
@ 2019-10-15  7:49 ` Joyce Kong
  2019-10-15 16:53   ` Stephen Hemminger
                     ` (3 more replies)
  2019-10-15  7:49 ` [dpdk-dev] [PATCH v1 2/5] net/axgbe: use common rte bit operation APIs instead Joyce Kong
                   ` (32 subsequent siblings)
  33 siblings, 4 replies; 70+ messages in thread
From: Joyce Kong @ 2019-10-15  7:49 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, ravi1.kumar, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, rmody, shshaikh, honnappa.nagarahalli, gavin.hu

There are a lot functions of bit operations scattered and
duplicated in PMDs, consolidating them into a common API
family is necessary. Furthermore, the bit operation is
mostly applied to the IO devices, so use __ATOMIC_ACQ_REL
to ensure the ordering.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
---
 lib/librte_eal/common/Makefile             |  1 +
 lib/librte_eal/common/include/rte_bitops.h | 56 ++++++++++++++++++++++++++++++
 lib/librte_eal/common/meson.build          |  1 +
 3 files changed, 58 insertions(+)
 create mode 100644 lib/librte_eal/common/include/rte_bitops.h

diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index a00d4fc..8586ca8 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -18,6 +18,7 @@ INC += rte_malloc.h rte_keepalive.h rte_time.h
 INC += rte_service.h rte_service_component.h
 INC += rte_bitmap.h rte_vfio.h rte_hypervisor.h rte_test.h
 INC += rte_reciprocal.h rte_fbarray.h rte_uuid.h
+INC += rte_bitops.h
 
 GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h
 GENERIC_INC += rte_memcpy.h rte_cpuflags.h
diff --git a/lib/librte_eal/common/include/rte_bitops.h b/lib/librte_eal/common/include/rte_bitops.h
new file mode 100644
index 0000000..4d7c5a3
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_bitops.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Corporation
+ */
+
+#ifndef _RTE_BITOPS_H_
+#define _RTE_BITOPS_H_
+
+/**
+ * @file
+ * Bit Operations
+ *
+ * This file defines a generic API for bit operations.
+ */
+
+#include <stdint.h>
+#include <rte_atomic.h>
+
+static inline void
+rte_set_bit(unsigned int nr, unsigned long *addr)
+{
+	__atomic_fetch_or(addr, (1UL << nr), __ATOMIC_ACQ_REL);
+}
+
+static inline void
+rte_clear_bit(int nr, unsigned long *addr)
+{
+	__atomic_fetch_and(addr, ~(1UL << nr), __ATOMIC_ACQ_REL);
+}
+
+static inline int
+rte_test_bit(int nr, unsigned long *addr)
+{
+	int res;
+	rte_mb();
+	res = ((*addr) & (1UL << nr)) != 0;
+	rte_mb();
+
+	return res;
+}
+
+static inline int
+rte_test_and_set_bit(int nr, unsigned long *addr)
+{
+	unsigned long mask = (1UL << nr);
+
+	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) & mask;
+}
+
+static inline int
+rte_test_and_clear_bit(int nr, unsigned long *addr)
+{
+	unsigned long mask = (1UL << nr);
+
+	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) & mask;
+}
+#endif /* _RTE_BITOPS_H_ */
diff --git a/lib/librte_eal/common/meson.build b/lib/librte_eal/common/meson.build
index 386577c..a277cdf 100644
--- a/lib/librte_eal/common/meson.build
+++ b/lib/librte_eal/common/meson.build
@@ -52,6 +52,7 @@ common_headers = files(
 	'include/rte_alarm.h',
 	'include/rte_branch_prediction.h',
 	'include/rte_bus.h',
+	'include/rte_bitops.h',
 	'include/rte_bitmap.h',
 	'include/rte_class.h',
 	'include/rte_common.h',
-- 
2.7.4


^ permalink raw reply	[flat|nested] 70+ messages in thread

* [dpdk-dev] [PATCH v1 2/5] net/axgbe: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
  2019-10-15  7:49 ` [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bit operation APIs Joyce Kong
@ 2019-10-15  7:49 ` Joyce Kong
  2019-10-15  7:49 ` [dpdk-dev] [PATCH v1 3/5] net/bnx2x: " Joyce Kong
                   ` (31 subsequent siblings)
  33 siblings, 0 replies; 70+ messages in thread
From: Joyce Kong @ 2019-10-15  7:49 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, ravi1.kumar, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, rmody, shshaikh, honnappa.nagarahalli, gavin.hu

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
---
 drivers/net/axgbe/axgbe_common.h | 29 +----------------------------
 drivers/net/axgbe/axgbe_ethdev.c | 14 +++++++-------
 drivers/net/axgbe/axgbe_mdio.c   | 14 +++++++-------
 3 files changed, 15 insertions(+), 42 deletions(-)

diff --git a/drivers/net/axgbe/axgbe_common.h b/drivers/net/axgbe/axgbe_common.h
index 34f60f1..9cabda8 100644
--- a/drivers/net/axgbe/axgbe_common.h
+++ b/drivers/net/axgbe/axgbe_common.h
@@ -22,6 +22,7 @@
 #include <pthread.h>
 
 #include <rte_byteorder.h>
+#include <rte_bitops.h>
 #include <rte_memory.h>
 #include <rte_malloc.h>
 #include <rte_hexdump.h>
@@ -1674,34 +1675,6 @@ do {									\
 #define time_after_eq(a, b)     ((long)((a) - (b)) >= 0)
 #define time_before_eq(a, b)	time_after_eq(b, a)
 
-/*---bitmap support apis---*/
-static inline int axgbe_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	rte_mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	rte_mb();
-	return res;
-}
-
-static inline void axgbe_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-static inline void axgbe_clear_bit(int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-static inline int axgbe_test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-
-	return __sync_fetch_and_and(addr, ~mask) & mask;
-}
-
 static inline unsigned long msecs_to_timer_cycles(unsigned int m)
 {
 	return rte_get_timer_hz() * (m / 1000);
diff --git a/drivers/net/axgbe/axgbe_ethdev.c b/drivers/net/axgbe/axgbe_ethdev.c
index d1f160e..cd990f5 100644
--- a/drivers/net/axgbe/axgbe_ethdev.c
+++ b/drivers/net/axgbe/axgbe_ethdev.c
@@ -201,8 +201,8 @@ axgbe_dev_start(struct rte_eth_dev *dev)
 	axgbe_dev_enable_tx(dev);
 	axgbe_dev_enable_rx(dev);
 
-	axgbe_clear_bit(AXGBE_STOPPED, &pdata->dev_state);
-	axgbe_clear_bit(AXGBE_DOWN, &pdata->dev_state);
+	rte_clear_bit(AXGBE_STOPPED, &pdata->dev_state);
+	rte_clear_bit(AXGBE_DOWN, &pdata->dev_state);
 	return 0;
 }
 
@@ -216,17 +216,17 @@ axgbe_dev_stop(struct rte_eth_dev *dev)
 
 	rte_intr_disable(&pdata->pci_dev->intr_handle);
 
-	if (axgbe_test_bit(AXGBE_STOPPED, &pdata->dev_state))
+	if (rte_test_bit(AXGBE_STOPPED, &pdata->dev_state))
 		return;
 
-	axgbe_set_bit(AXGBE_STOPPED, &pdata->dev_state);
+	rte_set_bit(AXGBE_STOPPED, &pdata->dev_state);
 	axgbe_dev_disable_tx(dev);
 	axgbe_dev_disable_rx(dev);
 
 	pdata->phy_if.phy_stop(pdata);
 	pdata->hw_if.exit(pdata);
 	memset(&dev->data->dev_link, 0, sizeof(struct rte_eth_link));
-	axgbe_set_bit(AXGBE_DOWN, &pdata->dev_state);
+	rte_set_bit(AXGBE_DOWN, &pdata->dev_state);
 }
 
 /* Clear all resources like TX/RX queues. */
@@ -598,8 +598,8 @@ eth_axgbe_dev_init(struct rte_eth_dev *eth_dev)
 
 	pdata = eth_dev->data->dev_private;
 	/* initial state */
-	axgbe_set_bit(AXGBE_DOWN, &pdata->dev_state);
-	axgbe_set_bit(AXGBE_STOPPED, &pdata->dev_state);
+	rte_set_bit(AXGBE_DOWN, &pdata->dev_state);
+	rte_set_bit(AXGBE_STOPPED, &pdata->dev_state);
 	pdata->eth_dev = eth_dev;
 
 	pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
diff --git a/drivers/net/axgbe/axgbe_mdio.c b/drivers/net/axgbe/axgbe_mdio.c
index 2721e5c..6f3b3f2 100644
--- a/drivers/net/axgbe/axgbe_mdio.c
+++ b/drivers/net/axgbe/axgbe_mdio.c
@@ -743,7 +743,7 @@ static int __axgbe_phy_config_aneg(struct axgbe_port *pdata)
 {
 	int ret;
 
-	axgbe_set_bit(AXGBE_LINK_INIT, &pdata->dev_state);
+	rte_set_bit(AXGBE_LINK_INIT, &pdata->dev_state);
 	pdata->link_check = rte_get_timer_cycles();
 
 	ret = pdata->phy_if.phy_impl.an_config(pdata);
@@ -807,9 +807,9 @@ static int axgbe_phy_config_aneg(struct axgbe_port *pdata)
 
 	ret = __axgbe_phy_config_aneg(pdata);
 	if (ret)
-		axgbe_set_bit(AXGBE_LINK_ERR, &pdata->dev_state);
+		rte_set_bit(AXGBE_LINK_ERR, &pdata->dev_state);
 	else
-		axgbe_clear_bit(AXGBE_LINK_ERR, &pdata->dev_state);
+		rte_clear_bit(AXGBE_LINK_ERR, &pdata->dev_state);
 
 	pthread_mutex_unlock(&pdata->an_mutex);
 
@@ -880,7 +880,7 @@ static void axgbe_phy_status(struct axgbe_port *pdata)
 	unsigned int link_aneg;
 	int an_restart;
 
-	if (axgbe_test_bit(AXGBE_LINK_ERR, &pdata->dev_state)) {
+	if (rte_test_bit(AXGBE_LINK_ERR, &pdata->dev_state)) {
 		pdata->phy.link = 0;
 		goto adjust_link;
 	}
@@ -900,10 +900,10 @@ static void axgbe_phy_status(struct axgbe_port *pdata)
 			return;
 		}
 		axgbe_phy_status_result(pdata);
-		if (axgbe_test_bit(AXGBE_LINK_INIT, &pdata->dev_state))
-			axgbe_clear_bit(AXGBE_LINK_INIT, &pdata->dev_state);
+		if (rte_test_bit(AXGBE_LINK_INIT, &pdata->dev_state))
+			rte_clear_bit(AXGBE_LINK_INIT, &pdata->dev_state);
 	} else {
-		if (axgbe_test_bit(AXGBE_LINK_INIT, &pdata->dev_state)) {
+		if (rte_test_bit(AXGBE_LINK_INIT, &pdata->dev_state)) {
 			axgbe_check_link_timeout(pdata);
 
 			if (link_aneg)
-- 
2.7.4


^ permalink raw reply	[flat|nested] 70+ messages in thread

* [dpdk-dev] [PATCH v1 3/5] net/bnx2x: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
  2019-10-15  7:49 ` [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bit operation APIs Joyce Kong
  2019-10-15  7:49 ` [dpdk-dev] [PATCH v1 2/5] net/axgbe: use common rte bit operation APIs instead Joyce Kong
@ 2019-10-15  7:49 ` " Joyce Kong
  2019-10-15  7:50 ` [dpdk-dev] [PATCH v1 4/5] net/hinic: " Joyce Kong
                   ` (30 subsequent siblings)
  33 siblings, 0 replies; 70+ messages in thread
From: Joyce Kong @ 2019-10-15  7:49 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, ravi1.kumar, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, rmody, shshaikh, honnappa.nagarahalli, gavin.hu

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
---
 drivers/net/bnx2x/bnx2x.c    | 202 +++++++++++++++++++------------------------
 drivers/net/bnx2x/bnx2x.h    |   5 +-
 drivers/net/bnx2x/ecore_sp.h |   8 +-
 3 files changed, 94 insertions(+), 121 deletions(-)

diff --git a/drivers/net/bnx2x/bnx2x.c b/drivers/net/bnx2x/bnx2x.c
index e1dfe60..92c77d1 100644
--- a/drivers/net/bnx2x/bnx2x.c
+++ b/drivers/net/bnx2x/bnx2x.c
@@ -129,32 +129,6 @@ static void bnx2x_ack_sb(struct bnx2x_softc *sc, uint8_t igu_sb_id,
 			 uint8_t storm, uint16_t index, uint8_t op,
 			 uint8_t update);
 
-int bnx2x_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	mb();
-	return res;
-}
-
-void bnx2x_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-void bnx2x_clear_bit(int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-int bnx2x_test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-	return __sync_fetch_and_and(addr, ~mask) & mask;
-}
-
 int bnx2x_cmpxchg(volatile int *addr, int old, int new)
 {
 	return __sync_val_compare_and_swap(addr, old, new);
@@ -1427,11 +1401,11 @@ bnx2x_del_all_macs(struct bnx2x_softc *sc, struct ecore_vlan_mac_obj *mac_obj,
 
 	/* wait for completion of requested */
 	if (wait_for_comp) {
-		bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+		rte_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
 	}
 
 	/* Set the mac type of addresses we want to clear */
-	bnx2x_set_bit(mac_type, &vlan_mac_flags);
+	rte_set_bit(mac_type, &vlan_mac_flags);
 
 	rc = mac_obj->delete_all(sc, mac_obj, &vlan_mac_flags, &ramrod_flags);
 	if (rc < 0)
@@ -1458,26 +1432,26 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 		break;
 
 	case BNX2X_RX_MODE_NORMAL:
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_MULTICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_MULTICAST, rx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
 
 		/* internal switching mode */
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_MULTICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_MULTICAST, tx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
 
 		break;
 
 	case BNX2X_RX_MODE_ALLMULTI:
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
+		ret_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
 
 		/* internal switching mode */
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
 
 		break;
 
@@ -1488,19 +1462,19 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 		 * should receive matched and unmatched (in resolution of port)
 		 * unicast packets.
 		 */
-		bnx2x_set_bit(ECORE_ACCEPT_UNMATCHED, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_UNMATCHED, rx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
 
 		/* internal switching mode */
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
 
 		if (IS_MF_SI(sc)) {
-			bnx2x_set_bit(ECORE_ACCEPT_ALL_UNICAST, tx_accept_flags);
+			rte_set_bit(ECORE_ACCEPT_ALL_UNICAST, tx_accept_flags);
 		} else {
-			bnx2x_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
+			rte_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
 		}
 
 		break;
@@ -1512,8 +1486,8 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 
 	/* Set ACCEPT_ANY_VLAN as we do not enable filtering by VLAN */
 	if (rx_mode != BNX2X_RX_MODE_NONE) {
-		bnx2x_set_bit(ECORE_ACCEPT_ANY_VLAN, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ANY_VLAN, tx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_ANY_VLAN, rx_accept_flags);
+		rte_set_bit(ECORE_ACCEPT_ANY_VLAN, tx_accept_flags);
 	}
 
 	return 0;
@@ -1542,7 +1516,7 @@ bnx2x_set_q_rx_mode(struct bnx2x_softc *sc, uint8_t cl_id,
 	ramrod_param.rdata = BNX2X_SP(sc, rx_mode_rdata);
 	ramrod_param.rdata_mapping =
 	    (rte_iova_t)BNX2X_SP_MAPPING(sc, rx_mode_rdata),
-	    bnx2x_set_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
+	    rte_set_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
 
 	ramrod_param.ramrod_flags = ramrod_flags;
 	ramrod_param.rx_mode_flags = rx_mode_flags;
@@ -1571,9 +1545,9 @@ int bnx2x_set_storm_rx_mode(struct bnx2x_softc *sc)
 		return rc;
 	}
 
-	bnx2x_set_bit(RAMROD_RX, &ramrod_flags);
-	bnx2x_set_bit(RAMROD_TX, &ramrod_flags);
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	rte_set_bit(RAMROD_RX, &ramrod_flags);
+	rte_set_bit(RAMROD_TX, &ramrod_flags);
+	rte_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
 
 	return bnx2x_set_q_rx_mode(sc, sc->fp[0].cl_id, rx_mode_flags,
 				 rx_accept_flags, tx_accept_flags,
@@ -1698,7 +1672,7 @@ static int bnx2x_func_wait_started(struct bnx2x_softc *sc)
 			    "Forcing STARTED-->TX_STOPPED-->STARTED");
 
 		func_params.f_obj = &sc->func_obj;
-		bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
+		rte_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
 
 		/* STARTED-->TX_STOPPED */
 		func_params.cmd = ECORE_F_CMD_TX_STOP;
@@ -1722,7 +1696,7 @@ static int bnx2x_stop_queue(struct bnx2x_softc *sc, int index)
 
 	q_params.q_obj = &sc->sp_objs[fp->index].q_obj;
 	/* We want to wait for completion in this context */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
+	rte_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
 
 	/* Stop the primary connection: */
 
@@ -1783,7 +1757,7 @@ static int bnx2x_func_stop(struct bnx2x_softc *sc)
 	int rc;
 
 	/* prepare parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_STOP;
 
@@ -1797,7 +1771,7 @@ static int bnx2x_func_stop(struct bnx2x_softc *sc)
 	if (rc) {
 		PMD_DRV_LOG(NOTICE, sc, "FUNC_STOP ramrod failed. "
 			    "Running a dry transaction");
-		bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
+		rte_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
 		return ecore_func_state_change(sc, &func_params);
 	}
 
@@ -1809,7 +1783,7 @@ static int bnx2x_reset_hw(struct bnx2x_softc *sc, uint32_t load_code)
 	struct ecore_func_state_params func_params = { NULL };
 
 	/* Prepare parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_HW_RESET;
@@ -1866,8 +1840,8 @@ bnx2x_chip_cleanup(struct bnx2x_softc *sc, uint32_t unload_mode, uint8_t keep_li
 	 * a race between the completion code and this code.
 	 */
 
-	if (bnx2x_test_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state)) {
-		bnx2x_set_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state);
+	if (rte_test_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state)) {
+		rte_set_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state);
 	} else {
 		bnx2x_set_storm_rx_mode(sc);
 	}
@@ -1960,12 +1934,12 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 	/* Cleanup MACs' object first... */
 
 	/* Wait for completion of requested */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	rte_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
 	/* Perform a dry cleanup */
-	bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &ramrod_flags);
+	rte_set_bit(RAMROD_DRV_CLR_ONLY, &ramrod_flags);
 
 	/* Clean ETH primary MAC */
-	bnx2x_set_bit(ECORE_ETH_MAC, &vlan_mac_flags);
+	rte_set_bit(ECORE_ETH_MAC, &vlan_mac_flags);
 	rc = mac_obj->delete_all(sc, &sc->sp_objs->mac_obj, &vlan_mac_flags,
 				 &ramrod_flags);
 	if (rc != 0) {
@@ -1974,7 +1948,7 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 
 	/* Cleanup UC list */
 	vlan_mac_flags = 0;
-	bnx2x_set_bit(ECORE_UC_LIST_MAC, &vlan_mac_flags);
+	rte_set_bit(ECORE_UC_LIST_MAC, &vlan_mac_flags);
 	rc = mac_obj->delete_all(sc, mac_obj, &vlan_mac_flags, &ramrod_flags);
 	if (rc != 0) {
 		PMD_DRV_LOG(NOTICE, sc,
@@ -1984,7 +1958,7 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 	/* Now clean mcast object... */
 
 	rparam.mcast_obj = &sc->mcast_obj;
-	bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags);
+	rte_set_bit(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags);
 
 	/* Add a DEL command... */
 	rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_DEL);
@@ -4288,7 +4262,7 @@ bnx2x_handle_classification_eqe(struct bnx2x_softc *sc, union event_ring_elem *e
 	struct ecore_vlan_mac_obj *vlan_mac_obj;
 
 	/* always push next commands out, don't wait here */
-	bnx2x_set_bit(RAMROD_CONT, &ramrod_flags);
+	rte_set_bit(RAMROD_CONT, &ramrod_flags);
 
 	switch (le32toh(elem->message.data.eth_event.echo) >> BNX2X_SWCID_SHIFT) {
 	case ECORE_FILTER_MAC_PENDING:
@@ -4319,10 +4293,10 @@ bnx2x_handle_classification_eqe(struct bnx2x_softc *sc, union event_ring_elem *e
 
 static void bnx2x_handle_rx_mode_eqe(struct bnx2x_softc *sc)
 {
-	bnx2x_clear_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
+	rte_clear_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
 
 	/* send rx_mode command again if was requested */
-	if (bnx2x_test_and_clear_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state)) {
+	if (rte_test_and_clear_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state)) {
 		bnx2x_set_storm_rx_mode(sc);
 	}
 }
@@ -4693,7 +4667,7 @@ static int bnx2x_init_hw(struct bnx2x_softc *sc, uint32_t load_code)
 	PMD_INIT_FUNC_TRACE(sc);
 
 	/* prepare the parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_HW_INIT;
@@ -4988,8 +4962,8 @@ static void bnx2x_init_eth_fp(struct bnx2x_softc *sc, int idx)
 	bnx2x_update_fp_sb_idx(fp);
 
 	/* Configure Queue State object */
-	bnx2x_set_bit(ECORE_Q_TYPE_HAS_RX, &q_type);
-	bnx2x_set_bit(ECORE_Q_TYPE_HAS_TX, &q_type);
+	rte_set_bit(ECORE_Q_TYPE_HAS_RX, &q_type);
+	rte_set_bit(ECORE_Q_TYPE_HAS_TX, &q_type);
 
 	ecore_init_queue_obj(sc,
 			     &sc->sp_objs[idx].q_obj,
@@ -5803,7 +5777,7 @@ static int bnx2x_func_start(struct bnx2x_softc *sc)
 	    &func_params.params.start;
 
 	/* Prepare parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_START;
@@ -6379,11 +6353,11 @@ bnx2x_pf_q_prep_init(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp,
 	uint8_t cos;
 	int cxt_index, cxt_offset;
 
-	bnx2x_set_bit(ECORE_Q_FLG_HC, &init_params->rx.flags);
-	bnx2x_set_bit(ECORE_Q_FLG_HC, &init_params->tx.flags);
+	rte_set_bit(ECORE_Q_FLG_HC, &init_params->rx.flags);
+	rte_set_bit(ECORE_Q_FLG_HC, &init_params->tx.flags);
 
-	bnx2x_set_bit(ECORE_Q_FLG_HC_EN, &init_params->rx.flags);
-	bnx2x_set_bit(ECORE_Q_FLG_HC_EN, &init_params->tx.flags);
+	rte_set_bit(ECORE_Q_FLG_HC_EN, &init_params->rx.flags);
+	rte_set_bit(ECORE_Q_FLG_HC_EN, &init_params->tx.flags);
 
 	/* HC rate */
 	init_params->rx.hc_rate =
@@ -6417,7 +6391,7 @@ bnx2x_get_common_flags(struct bnx2x_softc *sc, uint8_t zero_stats)
 	unsigned long flags = 0;
 
 	/* PF driver will always initialize the Queue to an ACTIVE state */
-	bnx2x_set_bit(ECORE_Q_FLG_ACTIVE, &flags);
+	rte_set_bit(ECORE_Q_FLG_ACTIVE, &flags);
 
 	/*
 	 * tx only connections collect statistics (on the same index as the
@@ -6425,9 +6399,9 @@ bnx2x_get_common_flags(struct bnx2x_softc *sc, uint8_t zero_stats)
 	 * connection is initialized.
 	 */
 
-	bnx2x_set_bit(ECORE_Q_FLG_STATS, &flags);
+	rte_set_bit(ECORE_Q_FLG_STATS, &flags);
 	if (zero_stats) {
-		bnx2x_set_bit(ECORE_Q_FLG_ZERO_STATS, &flags);
+		rte_set_bit(ECORE_Q_FLG_ZERO_STATS, &flags);
 	}
 
 	/*
@@ -6435,10 +6409,10 @@ bnx2x_get_common_flags(struct bnx2x_softc *sc, uint8_t zero_stats)
 	 * CoS-ness doesn't survive the loopback
 	 */
 	if (sc->flags & BNX2X_TX_SWITCHING) {
-		bnx2x_set_bit(ECORE_Q_FLG_TX_SWITCH, &flags);
+		rte_set_bit(ECORE_Q_FLG_TX_SWITCH, &flags);
 	}
 
-	bnx2x_set_bit(ECORE_Q_FLG_PCSUM_ON_PKT, &flags);
+	rte_set_bit(ECORE_Q_FLG_PCSUM_ON_PKT, &flags);
 
 	return flags;
 }
@@ -6448,15 +6422,15 @@ static unsigned long bnx2x_get_q_flags(struct bnx2x_softc *sc, uint8_t leading)
 	unsigned long flags = 0;
 
 	if (IS_MF_SD(sc)) {
-		bnx2x_set_bit(ECORE_Q_FLG_OV, &flags);
+		rte_set_bit(ECORE_Q_FLG_OV, &flags);
 	}
 
 	if (leading) {
-		bnx2x_set_bit(ECORE_Q_FLG_LEADING_RSS, &flags);
-		bnx2x_set_bit(ECORE_Q_FLG_MCAST, &flags);
+		rte_set_bit(ECORE_Q_FLG_LEADING_RSS, &flags);
+		rte_set_bit(ECORE_Q_FLG_MCAST, &flags);
 	}
 
-	bnx2x_set_bit(ECORE_Q_FLG_VLAN, &flags);
+	rte_set_bit(ECORE_Q_FLG_VLAN, &flags);
 
 	/* merge with common flags */
 	return flags | bnx2x_get_common_flags(sc, TRUE);
@@ -6577,7 +6551,7 @@ bnx2x_setup_queue(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp, uint8_t lea
 	q_params.q_obj = &BNX2X_SP_OBJ(sc, fp).q_obj;
 
 	/* we want to wait for completion in this context */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
+	rte_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
 
 	/* prepare the INIT parameters */
 	bnx2x_pf_q_prep_init(sc, fp, &q_params.params.init);
@@ -6645,20 +6619,20 @@ bnx2x_config_rss_pf(struct bnx2x_softc *sc, struct ecore_rss_config_obj *rss_obj
 
 	params.rss_obj = rss_obj;
 
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &params.ramrod_flags);
+	rte_set_bit(RAMROD_COMP_WAIT, &params.ramrod_flags);
 
-	bnx2x_set_bit(ECORE_RSS_MODE_REGULAR, &params.rss_flags);
+	rte_set_bit(ECORE_RSS_MODE_REGULAR, &params.rss_flags);
 
 	/* RSS configuration */
-	bnx2x_set_bit(ECORE_RSS_IPV4, &params.rss_flags);
-	bnx2x_set_bit(ECORE_RSS_IPV4_TCP, &params.rss_flags);
-	bnx2x_set_bit(ECORE_RSS_IPV6, &params.rss_flags);
-	bnx2x_set_bit(ECORE_RSS_IPV6_TCP, &params.rss_flags);
+	rte_set_bit(ECORE_RSS_IPV4, &params.rss_flags);
+	rte_set_bit(ECORE_RSS_IPV4_TCP, &params.rss_flags);
+	rte_set_bit(ECORE_RSS_IPV6, &params.rss_flags);
+	rte_set_bit(ECORE_RSS_IPV6_TCP, &params.rss_flags);
 	if (rss_obj->udp_rss_v4) {
-		bnx2x_set_bit(ECORE_RSS_IPV4_UDP, &params.rss_flags);
+		rte_set_bit(ECORE_RSS_IPV4_UDP, &params.rss_flags);
 	}
 	if (rss_obj->udp_rss_v6) {
-		bnx2x_set_bit(ECORE_RSS_IPV6_UDP, &params.rss_flags);
+		rte_set_bit(ECORE_RSS_IPV6_UDP, &params.rss_flags);
 	}
 
 	/* Hash bits */
@@ -6673,7 +6647,7 @@ bnx2x_config_rss_pf(struct bnx2x_softc *sc, struct ecore_rss_config_obj *rss_obj
 			params.rss_key[i] = (uint32_t) rte_rand();
 		}
 
-		bnx2x_set_bit(ECORE_RSS_SET_SRCH, &params.rss_flags);
+		rte_set_bit(ECORE_RSS_SET_SRCH, &params.rss_flags);
 	}
 
 	if (IS_PF(sc))
@@ -6730,11 +6704,11 @@ bnx2x_set_mac_one(struct bnx2x_softc *sc, uint8_t * mac,
 	ramrod_param.ramrod_flags = *ramrod_flags;
 
 	/* fill a user request section if needed */
-	if (!bnx2x_test_bit(RAMROD_CONT, ramrod_flags)) {
+	if (!rte_test_bit(RAMROD_CONT, ramrod_flags)) {
 		rte_memcpy(ramrod_param.user_req.u.mac.mac, mac,
 				 ETH_ALEN);
 
-		bnx2x_set_bit(mac_type, &ramrod_param.user_req.vlan_mac_flags);
+		rte_set_bit(mac_type, &ramrod_param.user_req.vlan_mac_flags);
 
 /* Set the command: ADD or DEL */
 		ramrod_param.user_req.cmd = (set) ? ECORE_VLAN_MAC_ADD :
@@ -6761,7 +6735,7 @@ static int bnx2x_set_eth_mac(struct bnx2x_softc *sc, uint8_t set)
 
 	PMD_DRV_LOG(DEBUG, sc, "Adding Ethernet MAC");
 
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	rte_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
 
 	/* Eth MAC is set on RSS leading client (fp[0]) */
 	return bnx2x_set_mac_one(sc, sc->link_params.mac_addr,
@@ -6893,24 +6867,26 @@ bnx2x_fill_report_data(struct bnx2x_softc *sc, struct bnx2x_link_report_data *da
 
 	/* Link is down */
 	if (!sc->link_vars.link_up || (sc->flags & BNX2X_MF_FUNC_DIS)) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+		rte_set_bit(BNX2X_LINK_REPORT_LINK_DOWN,
 			    &data->link_report_flags);
 	}
 
 	/* Full DUPLEX */
 	if (sc->link_vars.duplex == DUPLEX_FULL) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_FULL_DUPLEX,
+		rte_set_bit(BNX2X_LINK_REPORT_FULL_DUPLEX,
 			    &data->link_report_flags);
 	}
 
 	/* Rx Flow Control is ON */
 	if (sc->link_vars.flow_ctrl & ELINK_FLOW_CTRL_RX) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_RX_FC_ON, &data->link_report_flags);
+		rte_set_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+				&data->link_report_flags);
 	}
 
 	/* Tx Flow Control is ON */
 	if (sc->link_vars.flow_ctrl & ELINK_FLOW_CTRL_TX) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_TX_FC_ON, &data->link_report_flags);
+		rte_set_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+				&data->link_report_flags);
 	}
 }
 
@@ -6929,9 +6905,9 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
 
 	/* Don't report link down or exactly the same link status twice */
 	if (!memcmp(&cur_data, &sc->last_reported_link, sizeof(cur_data)) ||
-	    (bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+	    (rte_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
 			  &sc->last_reported_link.link_report_flags) &&
-	     bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+	     rte_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
 			  &cur_data.link_report_flags))) {
 		return;
 	}
@@ -6946,14 +6922,14 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
 	/* report new link params and remember the state for the next time */
 	rte_memcpy(&sc->last_reported_link, &cur_data, sizeof(cur_data));
 
-	if (bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+	if (rte_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
 			 &cur_data.link_report_flags)) {
 		ELINK_DEBUG_P0(sc, "NIC Link is Down");
 	} else {
 		__rte_unused const char *duplex;
 		__rte_unused const char *flow;
 
-		if (bnx2x_test_and_clear_bit(BNX2X_LINK_REPORT_FULL_DUPLEX,
+		if (rte_test_and_clear_bit(BNX2X_LINK_REPORT_FULL_DUPLEX,
 					   &cur_data.link_report_flags)) {
 			duplex = "full";
 				ELINK_DEBUG_P0(sc, "link set to full duplex");
@@ -6968,19 +6944,19 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
  * enabled.
  */
 		if (cur_data.link_report_flags) {
-			if (bnx2x_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+			if (rte_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
 					 &cur_data.link_report_flags) &&
-			    bnx2x_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+			    rte_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
 					 &cur_data.link_report_flags)) {
 				flow = "ON - receive & transmit";
-			} else if (bnx2x_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+			} else if (rte_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
 						&cur_data.link_report_flags) &&
-				   !bnx2x_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+				   !rte_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
 						 &cur_data.link_report_flags)) {
 				flow = "ON - receive";
-			} else if (!bnx2x_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+			} else if (!rte_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
 						 &cur_data.link_report_flags) &&
-				   bnx2x_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+				   rte_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
 						&cur_data.link_report_flags)) {
 				flow = "ON - transmit";
 			} else {
diff --git a/drivers/net/bnx2x/bnx2x.h b/drivers/net/bnx2x/bnx2x.h
index 43c6040..aa2d251 100644
--- a/drivers/net/bnx2x/bnx2x.h
+++ b/drivers/net/bnx2x/bnx2x.h
@@ -15,6 +15,7 @@
 #define __BNX2X_H__
 
 #include <rte_byteorder.h>
+#include <rte_bitops.h>
 #include <rte_spinlock.h>
 #include <rte_bus_pci.h>
 #include <rte_io.h>
@@ -1809,10 +1810,6 @@ static const uint32_t dmae_reg_go_c[] = {
 #define PCI_PM_D0    1
 #define PCI_PM_D3hot 2
 
-int  bnx2x_test_bit(int nr, volatile unsigned long * addr);
-void bnx2x_set_bit(unsigned int nr, volatile unsigned long * addr);
-void bnx2x_clear_bit(int nr, volatile unsigned long * addr);
-int  bnx2x_test_and_clear_bit(int nr, volatile unsigned long * addr);
 int  bnx2x_cmpxchg(volatile int *addr, int old, int new);
 
 int bnx2x_dma_alloc(struct bnx2x_softc *sc, size_t size,
diff --git a/drivers/net/bnx2x/ecore_sp.h b/drivers/net/bnx2x/ecore_sp.h
index cc1db37..72697c2 100644
--- a/drivers/net/bnx2x/ecore_sp.h
+++ b/drivers/net/bnx2x/ecore_sp.h
@@ -73,10 +73,10 @@ typedef rte_spinlock_t ECORE_MUTEX_SPIN;
 #define ECORE_SET_BIT_NA(bit, var)         (*var |= (1 << bit))
 #define ECORE_CLEAR_BIT_NA(bit, var)       (*var &= ~(1 << bit))
 
-#define ECORE_TEST_BIT(bit, var)           bnx2x_test_bit(bit, var)
-#define ECORE_SET_BIT(bit, var)            bnx2x_set_bit(bit, var)
-#define ECORE_CLEAR_BIT(bit, var)          bnx2x_clear_bit(bit, var)
-#define ECORE_TEST_AND_CLEAR_BIT(bit, var) bnx2x_test_and_clear_bit(bit, var)
+#define ECORE_TEST_BIT(bit, var)           rte_test_bit(bit, var)
+#define ECORE_SET_BIT(bit, var)            rte_set_bit(bit, var)
+#define ECORE_CLEAR_BIT(bit, var)          rte_clear_bit(bit, var)
+#define ECORE_TEST_AND_CLEAR_BIT(bit, var) rte_test_and_clear_bit(bit, var)
 
 #define atomic_load_acq_int                (int)*
 #define atomic_store_rel_int(a, v)         (*a = v)
-- 
2.7.4


^ permalink raw reply	[flat|nested] 70+ messages in thread

* [dpdk-dev] [PATCH v1 4/5] net/hinic: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (2 preceding siblings ...)
  2019-10-15  7:49 ` [dpdk-dev] [PATCH v1 3/5] net/bnx2x: " Joyce Kong
@ 2019-10-15  7:50 ` " Joyce Kong
  2019-10-15  7:50 ` [dpdk-dev] [PATCH v1 5/5] net/qede: " Joyce Kong
                   ` (29 subsequent siblings)
  33 siblings, 0 replies; 70+ messages in thread
From: Joyce Kong @ 2019-10-15  7:50 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, ravi1.kumar, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, rmody, shshaikh, honnappa.nagarahalli, gavin.hu

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
---
 drivers/net/hinic/base/hinic_compat.h | 35 +----------------------------------
 drivers/net/hinic/hinic_pmd_ethdev.c  | 16 ++++++++--------
 2 files changed, 9 insertions(+), 42 deletions(-)

diff --git a/drivers/net/hinic/base/hinic_compat.h b/drivers/net/hinic/base/hinic_compat.h
index f599947..ce1fdc6 100644
--- a/drivers/net/hinic/base/hinic_compat.h
+++ b/drivers/net/hinic/base/hinic_compat.h
@@ -11,6 +11,7 @@
 #include <pthread.h>
 #include <rte_common.h>
 #include <rte_byteorder.h>
+#include <rte_bitops.h>
 #include <rte_memzone.h>
 #include <rte_memcpy.h>
 #include <rte_malloc.h>
@@ -117,40 +118,6 @@ extern int hinic_logtype;
 
 #define HINIC_PAGE_SIZE_DPDK	6
 
-static inline int hinic_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	rte_mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	rte_mb();
-	return res;
-}
-
-static inline void hinic_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-static inline void hinic_clear_bit(int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-static inline int hinic_test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-
-	return __sync_fetch_and_and(addr, ~mask) & mask;
-}
-
-static inline int hinic_test_and_set_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-
-	return __sync_fetch_and_or(addr, mask) & mask;
-}
-
 void *dma_zalloc_coherent(void *dev, size_t size, dma_addr_t *dma_handle,
 			  gfp_t flag);
 void *dma_zalloc_coherent_aligned(void *dev, size_t size,
diff --git a/drivers/net/hinic/hinic_pmd_ethdev.c b/drivers/net/hinic/hinic_pmd_ethdev.c
index c9a400e..dcdcfb9 100644
--- a/drivers/net/hinic/hinic_pmd_ethdev.c
+++ b/drivers/net/hinic/hinic_pmd_ethdev.c
@@ -227,7 +227,7 @@ static void hinic_dev_interrupt_handler(void *param)
 	struct rte_eth_dev *dev = param;
 	struct hinic_nic_dev *nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
 
-	if (!hinic_test_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status)) {
+	if (!rte_test_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status)) {
 		PMD_DRV_LOG(WARNING, "Device's interrupt is disabled, ignore interrupt event, dev_name: %s, port_id: %d",
 			    nic_dev->proc_dev_name, dev->data->port_id);
 		return;
@@ -907,7 +907,7 @@ static int hinic_dev_start(struct rte_eth_dev *dev)
 	if (dev->data->dev_conf.intr_conf.lsc != 0)
 		(void)hinic_link_update(dev, 0);
 
-	hinic_set_bit(HINIC_DEV_START, &nic_dev->dev_status);
+	rte_set_bit(HINIC_DEV_START, &nic_dev->dev_status);
 
 	return 0;
 
@@ -1030,7 +1030,7 @@ static void hinic_dev_stop(struct rte_eth_dev *dev)
 	name = dev->data->name;
 	port_id = dev->data->port_id;
 
-	if (!hinic_test_and_clear_bit(HINIC_DEV_START, &nic_dev->dev_status)) {
+	if (!rte_test_and_clear_bit(HINIC_DEV_START, &nic_dev->dev_status)) {
 		PMD_DRV_LOG(INFO, "Device %s already stopped", name);
 		return;
 	}
@@ -1073,7 +1073,7 @@ static void hinic_disable_interrupt(struct rte_eth_dev *dev)
 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
 	int ret, retries = 0;
 
-	hinic_clear_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
+	rte_clear_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
 
 	/* disable msix interrupt in hardware */
 	hinic_set_msix_state(nic_dev->hwdev, 0, HINIC_MSIX_DISABLE);
@@ -2197,9 +2197,9 @@ static int hinic_func_init(struct rte_eth_dev *eth_dev)
 			    eth_dev->data->name);
 		goto enable_intr_fail;
 	}
-	hinic_set_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
+	rte_set_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
 
-	hinic_set_bit(HINIC_DEV_INIT, &nic_dev->dev_status);
+	rte_set_bit(HINIC_DEV_INIT, &nic_dev->dev_status);
 	PMD_DRV_LOG(INFO, "Initialize %s in primary successfully",
 		    eth_dev->data->name);
 
@@ -2236,7 +2236,7 @@ static void hinic_dev_close(struct rte_eth_dev *dev)
 {
 	struct hinic_nic_dev *nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
 
-	if (hinic_test_and_set_bit(HINIC_DEV_CLOSE, &nic_dev->dev_status)) {
+	if (rte_test_and_set_bit(HINIC_DEV_CLOSE, &nic_dev->dev_status)) {
 		PMD_DRV_LOG(WARNING, "Device %s already closed",
 			    dev->data->name);
 		return;
@@ -2316,7 +2316,7 @@ static int hinic_dev_uninit(struct rte_eth_dev *dev)
 	struct hinic_nic_dev *nic_dev;
 
 	nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
-	hinic_clear_bit(HINIC_DEV_INIT, &nic_dev->dev_status);
+	rte_clear_bit(HINIC_DEV_INIT, &nic_dev->dev_status);
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
-- 
2.7.4


^ permalink raw reply	[flat|nested] 70+ messages in thread

* [dpdk-dev] [PATCH v1 5/5] net/qede: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (3 preceding siblings ...)
  2019-10-15  7:50 ` [dpdk-dev] [PATCH v1 4/5] net/hinic: " Joyce Kong
@ 2019-10-15  7:50 ` " Joyce Kong
  2019-10-15 16:51 ` [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Stephen Hemminger
                   ` (28 subsequent siblings)
  33 siblings, 0 replies; 70+ messages in thread
From: Joyce Kong @ 2019-10-15  7:50 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, ravi1.kumar, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, rmody, shshaikh, honnappa.nagarahalli, gavin.hu

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
---
 drivers/net/qede/base/bcm_osal.c | 20 --------------------
 drivers/net/qede/base/bcm_osal.h | 10 ++++------
 2 files changed, 4 insertions(+), 26 deletions(-)

diff --git a/drivers/net/qede/base/bcm_osal.c b/drivers/net/qede/base/bcm_osal.c
index 9915df4..665833c 100644
--- a/drivers/net/qede/base/bcm_osal.c
+++ b/drivers/net/qede/base/bcm_osal.c
@@ -45,26 +45,6 @@ u32 qede_osal_log2(u32 val)
 	return log;
 }
 
-inline void qede_set_bit(u32 nr, unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-inline void qede_clr_bit(u32 nr, unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-inline bool qede_test_bit(u32 nr, unsigned long *addr)
-{
-	bool res;
-
-	rte_mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	rte_mb();
-	return res;
-}
-
 static inline u32 qede_ffb(unsigned long word)
 {
 	unsigned long first_bit;
diff --git a/drivers/net/qede/base/bcm_osal.h b/drivers/net/qede/base/bcm_osal.h
index 51edc41..9f2be0a 100644
--- a/drivers/net/qede/base/bcm_osal.h
+++ b/drivers/net/qede/base/bcm_osal.h
@@ -8,6 +8,7 @@
 #define __BCM_OSAL_H
 
 #include <rte_byteorder.h>
+#include <rte_bitops.h>
 #include <rte_spinlock.h>
 #include <rte_malloc.h>
 #include <rte_atomic.h>
@@ -311,17 +312,14 @@ typedef struct osal_list_t {
 #define OSAL_BITS_PER_UL_MASK		(OSAL_BITS_PER_UL - 1)
 
 /* Bitops */
-void qede_set_bit(u32, unsigned long *);
 #define OSAL_SET_BIT(bit, bitmap) \
-	qede_set_bit(bit, bitmap)
+	rte_set_bit(bit, bitmap)
 
-void qede_clr_bit(u32, unsigned long *);
 #define OSAL_CLEAR_BIT(bit, bitmap) \
-	qede_clr_bit(bit, bitmap)
+	rte_clear_bit(bit, bitmap)
 
-bool qede_test_bit(u32, unsigned long *);
 #define OSAL_TEST_BIT(bit, bitmap) \
-	qede_test_bit(bit, bitmap)
+	rte_test_bit(bit, bitmap)
 
 u32 qede_find_first_bit(unsigned long *, u32);
 #define OSAL_FIND_FIRST_BIT(bitmap, length) \
-- 
2.7.4


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (4 preceding siblings ...)
  2019-10-15  7:50 ` [dpdk-dev] [PATCH v1 5/5] net/qede: " Joyce Kong
@ 2019-10-15 16:51 ` Stephen Hemminger
  2019-10-18  9:01   ` Joyce Kong (Arm Technology China)
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 0/6] " Joyce Kong
                   ` (27 subsequent siblings)
  33 siblings, 1 reply; 70+ messages in thread
From: Stephen Hemminger @ 2019-10-15 16:51 UTC (permalink / raw)
  To: Joyce Kong
  Cc: dev, nd, thomas, jerinj, ravi1.kumar, xuanziyang2,
	cloud.wangxiaoyun, zhouguoyang, rmody, shshaikh,
	honnappa.nagarahalli, gavin.hu

On Tue, 15 Oct 2019 15:49:56 +0800
Joyce Kong <joyce.kong@arm.com> wrote:

> There are a lot functions of bit operations scattered in
> PMDs, consolidate them into a common API family and applied
> in different PMDs to reduce code duplication.
> 
> Joyce Kong (5):
>   lib/eal: implement the family of rte bit operation APIs
>   net/axgbe: use common rte bit operation APIs instead
>   net/bnx2x: use common rte bit operation APIs instead
>   net/hinic: use common rte bit operation APIs instead
>   net/qede: use common rte bit operation APIs instead
> 
>  drivers/net/axgbe/axgbe_common.h           |  29 +----
>  drivers/net/axgbe/axgbe_ethdev.c           |  14 +-
>  drivers/net/axgbe/axgbe_mdio.c             |  14 +-
>  drivers/net/bnx2x/bnx2x.c                  | 202 +++++++++++++----------------
>  drivers/net/bnx2x/bnx2x.h                  |   5 +-
>  drivers/net/bnx2x/ecore_sp.h               |   8 +-
>  drivers/net/hinic/base/hinic_compat.h      |  35 +----
>  drivers/net/hinic/hinic_pmd_ethdev.c       |  16 +--
>  drivers/net/qede/base/bcm_osal.c           |  20 ---
>  drivers/net/qede/base/bcm_osal.h           |  10 +-
>  lib/librte_eal/common/Makefile             |   1 +
>  lib/librte_eal/common/include/rte_bitops.h |  56 ++++++++
>  lib/librte_eal/common/meson.build          |   1 +
>  13 files changed, 180 insertions(+), 231 deletions(-)
>  create mode 100644 lib/librte_eal/common/include/rte_bitops.h
> 


This is a really good idea, and should have been done long ago.
Could you add tests for these as well?

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bit operation APIs
  2019-10-15  7:49 ` [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bit operation APIs Joyce Kong
@ 2019-10-15 16:53   ` Stephen Hemminger
  2019-10-18  9:00     ` Joyce Kong (Arm Technology China)
  2019-10-16  7:54   ` Jerin Jacob
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 70+ messages in thread
From: Stephen Hemminger @ 2019-10-15 16:53 UTC (permalink / raw)
  To: Joyce Kong
  Cc: dev, nd, thomas, jerinj, ravi1.kumar, xuanziyang2,
	cloud.wangxiaoyun, zhouguoyang, rmody, shshaikh,
	honnappa.nagarahalli, gavin.hu

On Tue, 15 Oct 2019 15:49:57 +0800
Joyce Kong <joyce.kong@arm.com> wrote:

> +static inline void
> +rte_set_bit(unsigned int nr, unsigned long *addr)
> +{
> +	__atomic_fetch_or(addr, (1UL << nr), __ATOMIC_ACQ_REL);
> +}
> +
> +static inline void
> +rte_clear_bit(int nr, unsigned long *addr)
> +{
> +	__atomic_fetch_and(addr, ~(1UL << nr), __ATOMIC_ACQ_REL);
> +}
> +
> +static inline int
> +rte_test_bit(int nr, unsigned long *addr)
> +{
> +	int res;
> +	rte_mb();
> +	res = ((*addr) & (1UL << nr)) != 0;
> +	rte_mb();
> +
> +	return res;
> +}
> +
> +static inline int
> +rte_test_and_set_bit(int nr, unsigned long *addr)
> +{
> +	unsigned long mask = (1UL << nr);
> +
> +	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) & mask;
> +}
> +
> +static inline int
> +rte_test_and_clear_bit(int nr, unsigned long *addr)
> +{
> +	unsigned long mask = (1UL << nr);
> +
> +	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) & mask;
> +}

These functions need to be part of API, and have doxygen comments?

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bit operation APIs
  2019-10-15  7:49 ` [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bit operation APIs Joyce Kong
  2019-10-15 16:53   ` Stephen Hemminger
@ 2019-10-16  7:54   ` Jerin Jacob
  2019-10-18  9:02     ` Joyce Kong (Arm Technology China)
  2019-10-16 19:05   ` Stephen Hemminger
  2019-10-17 13:32   ` [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bitoperation APIs Morten Brørup
  3 siblings, 1 reply; 70+ messages in thread
From: Jerin Jacob @ 2019-10-16  7:54 UTC (permalink / raw)
  To: Joyce Kong
  Cc: dpdk-dev, nd, Thomas Monjalon, Jerin Jacob, ravi1.kumar,
	Ziyang Xuan, Xiaoyun Wang, Guoyang Zhou, Rasesh Mody,
	Shahed Shaikh, Honnappa Nagarahalli, Gavin Hu

On Tue, Oct 15, 2019 at 1:20 PM Joyce Kong <joyce.kong@arm.com> wrote:
>
> There are a lot functions of bit operations scattered and
> duplicated in PMDs, consolidating them into a common API
> family is necessary. Furthermore, the bit operation is
> mostly applied to the IO devices, so use __ATOMIC_ACQ_REL
> to ensure the ordering.
>
> Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> ---
>  lib/librte_eal/common/Makefile             |  1 +
>  lib/librte_eal/common/include/rte_bitops.h | 56 ++++++++++++++++++++++++++++++
>  lib/librte_eal/common/meson.build          |  1 +
> +
> +static inline void
> +rte_set_bit(unsigned int nr, unsigned long *addr)
> +{
> +       __atomic_fetch_or(addr, (1UL << nr), __ATOMIC_ACQ_REL);
> +}

If it is specific for IO the IMO, it makes sense call the API to
rte_io_set_bit() like rte_io_rmb
and change the header file to rte_io_bitops.h.

The barries are only needed for IO operations. Explicitly is not
conveying it in API name
would call for using it for normal cases.

Other option could be to introduce, generic and IO specific bit
operations operations
separately.

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bit operation APIs
  2019-10-15  7:49 ` [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bit operation APIs Joyce Kong
  2019-10-15 16:53   ` Stephen Hemminger
  2019-10-16  7:54   ` Jerin Jacob
@ 2019-10-16 19:05   ` Stephen Hemminger
  2019-10-17 13:32   ` [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bitoperation APIs Morten Brørup
  3 siblings, 0 replies; 70+ messages in thread
From: Stephen Hemminger @ 2019-10-16 19:05 UTC (permalink / raw)
  To: Joyce Kong
  Cc: dev, nd, thomas, jerinj, ravi1.kumar, xuanziyang2,
	cloud.wangxiaoyun, zhouguoyang, rmody, shshaikh,
	honnappa.nagarahalli, gavin.hu

On Tue, 15 Oct 2019 15:49:57 +0800
Joyce Kong <joyce.kong@arm.com> wrote:

> There are a lot functions of bit operations scattered and
> duplicated in PMDs, consolidating them into a common API
> family is necessary. Furthermore, the bit operation is
> mostly applied to the IO devices, so use __ATOMIC_ACQ_REL
> to ensure the ordering.
> 
> Signed-off-by: Joyce Kong <joyce.kong@arm.com>
	'include/rte_common.h',

Patchwork reports several build failures for this patch set.

/tmp/UB1604-64_K4.4.0_Clang3.8.0/x86_64-native-linuxapp-clang/62c86b2c1091439598f2f1688566632c/dpdk/x86_64-native-linuxapp-clang/lib/librte_pmd_bnx2x.a(bnx2x.o): In function `bnx2x_set_storm_rx_mode':
/tmp/UB1604-64_K4.4.0_Clang3.8.0/x86_64-native-linuxapp-clang/62c86b2c1091439598f2f1688566632c/dpdk/drivers/net/bnx2x/bnx2x.c:(.text+0x1602): undefined reference to `ret_set_bit'


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bitoperation APIs
  2019-10-15  7:49 ` [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bit operation APIs Joyce Kong
                     ` (2 preceding siblings ...)
  2019-10-16 19:05   ` Stephen Hemminger
@ 2019-10-17 13:32   ` Morten Brørup
  2019-10-18  8:58     ` Joyce Kong (Arm Technology China)
  3 siblings, 1 reply; 70+ messages in thread
From: Morten Brørup @ 2019-10-17 13:32 UTC (permalink / raw)
  To: Joyce Kong, dev
  Cc: nd, thomas, jerinj, ravi1.kumar, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, rmody, shshaikh, honnappa.nagarahalli, gavin.hu,
	Stephen Hemminger

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Joyce Kong
> Sent: Tuesday, October 15, 2019 9:50 AM
> 
> There are a lot functions of bit operations scattered and
> duplicated in PMDs, consolidating them into a common API
> family is necessary. Furthermore, the bit operation is
> mostly applied to the IO devices, so use __ATOMIC_ACQ_REL
> to ensure the ordering.

Good initiative.

> 
> Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> ---
>  lib/librte_eal/common/Makefile             |  1 +
>  lib/librte_eal/common/include/rte_bitops.h | 56
> ++++++++++++++++++++++++++++++
>  lib/librte_eal/common/meson.build          |  1 +
>  3 files changed, 58 insertions(+)
>  create mode 100644 lib/librte_eal/common/include/rte_bitops.h
> 
> diff --git a/lib/librte_eal/common/Makefile
> b/lib/librte_eal/common/Makefile
> index a00d4fc..8586ca8 100644
> --- a/lib/librte_eal/common/Makefile
> +++ b/lib/librte_eal/common/Makefile
> @@ -18,6 +18,7 @@ INC += rte_malloc.h rte_keepalive.h rte_time.h
>  INC += rte_service.h rte_service_component.h
>  INC += rte_bitmap.h rte_vfio.h rte_hypervisor.h rte_test.h
>  INC += rte_reciprocal.h rte_fbarray.h rte_uuid.h
> +INC += rte_bitops.h
> 
>  GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h
> rte_prefetch.h
>  GENERIC_INC += rte_memcpy.h rte_cpuflags.h
> diff --git a/lib/librte_eal/common/include/rte_bitops.h
> b/lib/librte_eal/common/include/rte_bitops.h
> new file mode 100644
> index 0000000..4d7c5a3
> --- /dev/null
> +++ b/lib/librte_eal/common/include/rte_bitops.h
> @@ -0,0 +1,56 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2019 Arm Corporation
> + */
> +
> +#ifndef _RTE_BITOPS_H_
> +#define _RTE_BITOPS_H_
> +
> +/**
> + * @file
> + * Bit Operations
> + *
> + * This file defines a generic API for bit operations.
> + */
> +
> +#include <stdint.h>
> +#include <rte_atomic.h>
> +
> +static inline void
> +rte_set_bit(unsigned int nr, unsigned long *addr)
> +{
> +	__atomic_fetch_or(addr, (1UL << nr), __ATOMIC_ACQ_REL);
> +}
> +
> +static inline void
> +rte_clear_bit(int nr, unsigned long *addr)
> +{
> +	__atomic_fetch_and(addr, ~(1UL << nr), __ATOMIC_ACQ_REL);
> +}
> +
> +static inline int
> +rte_test_bit(int nr, unsigned long *addr)
> +{
> +	int res;
> +	rte_mb();
> +	res = ((*addr) & (1UL << nr)) != 0;
> +	rte_mb();
> +
> +	return res;
> +}

Why does rte_test_bit() not use any of the __atomic_xx functions instead? E.g.:

static inline int
rte_test_bit(int nr, unsigned long *addr)
{
	return __atomic_load_n(addr, __ATOMIC_ACQUIRE);
}

> +
> +static inline int
> +rte_test_and_set_bit(int nr, unsigned long *addr)
> +{
> +	unsigned long mask = (1UL << nr);
> +
> +	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) & mask;
> +}
> +
> +static inline int
> +rte_test_and_clear_bit(int nr, unsigned long *addr)
> +{
> +	unsigned long mask = (1UL << nr);
> +
> +	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) & mask;
> +}
> +#endif /* _RTE_BITOPS_H_ */
> diff --git a/lib/librte_eal/common/meson.build
> b/lib/librte_eal/common/meson.build
> index 386577c..a277cdf 100644
> --- a/lib/librte_eal/common/meson.build
> +++ b/lib/librte_eal/common/meson.build
> @@ -52,6 +52,7 @@ common_headers = files(
>  	'include/rte_alarm.h',
>  	'include/rte_branch_prediction.h',
>  	'include/rte_bus.h',
> +	'include/rte_bitops.h',
>  	'include/rte_bitmap.h',
>  	'include/rte_class.h',
>  	'include/rte_common.h',
> --
> 2.7.4
> 

These functions use unsigned long as the type of their value, like they do in the PMDs.

However, a generic bit operations library should preferably work with multiple types, like the __atomic_xx functions. Or use an well defined uint_NN_t type. Or have individually named functions for each type size, e.g. rte_set_bit_32() and rte_set_bit_64().


Med venlig hilsen / kind regards
- Morten Brørup


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bitoperation APIs
  2019-10-17 13:32   ` [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bitoperation APIs Morten Brørup
@ 2019-10-18  8:58     ` Joyce Kong (Arm Technology China)
  2019-10-23  3:07       ` Joyce Kong (Arm Technology China)
  0 siblings, 1 reply; 70+ messages in thread
From: Joyce Kong (Arm Technology China) @ 2019-10-18  8:58 UTC (permalink / raw)
  To: Morten Brørup, dev
  Cc: nd, thomas, jerinj, ravi1.kumar, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, rmody, shshaikh, Honnappa Nagarahalli,
	Gavin Hu (Arm Technology China),
	Stephen Hemminger

Hi Morten,

> -----Original Message-----
> From: Morten Brørup <mb@smartsharesystems.com>
> Sent: Thursday, October 17, 2019 9:32 PM
> To: Joyce Kong (Arm Technology China) <Joyce.Kong@arm.com>;
> dev@dpdk.org
> Cc: nd <nd@arm.com>; thomas@monjalon.net; jerinj@marvell.com;
> ravi1.kumar@amd.com; xuanziyang2@huawei.com;
> cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com;
> rmody@marvell.com; shshaikh@marvell.com; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; Gavin Hu (Arm Technology China)
> <Gavin.Hu@arm.com>; Stephen Hemminger
> <stephen@networkplumber.org>
> Subject: RE: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte
> bitoperation APIs
> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Joyce Kong
> > Sent: Tuesday, October 15, 2019 9:50 AM
> >
> > There are a lot functions of bit operations scattered and duplicated
> > in PMDs, consolidating them into a common API family is necessary.
> > Furthermore, the bit operation is mostly applied to the IO devices, so
> > use __ATOMIC_ACQ_REL to ensure the ordering.
> 
> Good initiative.
> 
> >
> > Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> > ---
> >  lib/librte_eal/common/Makefile             |  1 +
> >  lib/librte_eal/common/include/rte_bitops.h | 56
> > ++++++++++++++++++++++++++++++
> >  lib/librte_eal/common/meson.build          |  1 +
> >  3 files changed, 58 insertions(+)
> >  create mode 100644 lib/librte_eal/common/include/rte_bitops.h
> >
> > diff --git a/lib/librte_eal/common/Makefile
> > b/lib/librte_eal/common/Makefile index a00d4fc..8586ca8 100644
> > --- a/lib/librte_eal/common/Makefile
> > +++ b/lib/librte_eal/common/Makefile
> > @@ -18,6 +18,7 @@ INC += rte_malloc.h rte_keepalive.h rte_time.h  INC
> > += rte_service.h rte_service_component.h  INC += rte_bitmap.h
> > rte_vfio.h rte_hypervisor.h rte_test.h  INC += rte_reciprocal.h
> > rte_fbarray.h rte_uuid.h
> > +INC += rte_bitops.h
> >
> >  GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h
> > rte_prefetch.h  GENERIC_INC += rte_memcpy.h rte_cpuflags.h diff --git
> > a/lib/librte_eal/common/include/rte_bitops.h
> > b/lib/librte_eal/common/include/rte_bitops.h
> > new file mode 100644
> > index 0000000..4d7c5a3
> > --- /dev/null
> > +++ b/lib/librte_eal/common/include/rte_bitops.h
> > @@ -0,0 +1,56 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2019 Arm Corporation
> > + */
> > +
> > +#ifndef _RTE_BITOPS_H_
> > +#define _RTE_BITOPS_H_
> > +
> > +/**
> > + * @file
> > + * Bit Operations
> > + *
> > + * This file defines a generic API for bit operations.
> > + */
> > +
> > +#include <stdint.h>
> > +#include <rte_atomic.h>
> > +
> > +static inline void
> > +rte_set_bit(unsigned int nr, unsigned long *addr) {
> > +	__atomic_fetch_or(addr, (1UL << nr), __ATOMIC_ACQ_REL); }
> > +
> > +static inline void
> > +rte_clear_bit(int nr, unsigned long *addr) {
> > +	__atomic_fetch_and(addr, ~(1UL << nr), __ATOMIC_ACQ_REL); }
> > +
> > +static inline int
> > +rte_test_bit(int nr, unsigned long *addr) {
> > +	int res;
> > +	rte_mb();
> > +	res = ((*addr) & (1UL << nr)) != 0;
> > +	rte_mb();
> > +
> > +	return res;
> > +}
> 
> Why does rte_test_bit() not use any of the __atomic_xx functions instead?
> E.g.:
> 
> static inline int
> rte_test_bit(int nr, unsigned long *addr) {
> 	return __atomic_load_n(addr, __ATOMIC_ACQUIRE); }
> 
You re right, it's better to use __atomic_xx here to keep the consistent with other APIs.

> > +
> > +static inline int
> > +rte_test_and_set_bit(int nr, unsigned long *addr) {
> > +	unsigned long mask = (1UL << nr);
> > +
> > +	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) &
> mask; }
> > +
> > +static inline int
> > +rte_test_and_clear_bit(int nr, unsigned long *addr) {
> > +	unsigned long mask = (1UL << nr);
> > +
> > +	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) &
> mask; }
> > +#endif /* _RTE_BITOPS_H_ */
> > diff --git a/lib/librte_eal/common/meson.build
> > b/lib/librte_eal/common/meson.build
> > index 386577c..a277cdf 100644
> > --- a/lib/librte_eal/common/meson.build
> > +++ b/lib/librte_eal/common/meson.build
> > @@ -52,6 +52,7 @@ common_headers = files(
> >  	'include/rte_alarm.h',
> >  	'include/rte_branch_prediction.h',
> >  	'include/rte_bus.h',
> > +	'include/rte_bitops.h',
> >  	'include/rte_bitmap.h',
> >  	'include/rte_class.h',
> >  	'include/rte_common.h',
> > --
> > 2.7.4
> >
> 
> These functions use unsigned long as the type of their value, like they do in
> the PMDs.
> 
> However, a generic bit operations library should preferably work with
> multiple types, like the __atomic_xx functions. Or use an well defined
> uint_NN_t type. Or have individually named functions for each type size, e.g.
> rte_set_bit_32() and rte_set_bit_64().
> 
Good suggestion! And will do this in next version.

> Med venlig hilsen / kind regards
> - Morten Brørup


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bit operation APIs
  2019-10-15 16:53   ` Stephen Hemminger
@ 2019-10-18  9:00     ` Joyce Kong (Arm Technology China)
  0 siblings, 0 replies; 70+ messages in thread
From: Joyce Kong (Arm Technology China) @ 2019-10-18  9:00 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: dev, nd, thomas, jerinj, ravi1.kumar, xuanziyang2,
	cloud.wangxiaoyun, zhouguoyang, rmody, shshaikh,
	Honnappa Nagarahalli, Gavin Hu (Arm Technology China)

> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Wednesday, October 16, 2019 12:54 AM
> To: Joyce Kong (Arm Technology China) <Joyce.Kong@arm.com>
> Cc: dev@dpdk.org; nd <nd@arm.com>; thomas@monjalon.net;
> jerinj@marvell.com; ravi1.kumar@amd.com; xuanziyang2@huawei.com;
> cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com;
> rmody@marvell.com; shshaikh@marvell.com; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; Gavin Hu (Arm Technology China)
> <Gavin.Hu@arm.com>
> Subject: Re: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte
> bit operation APIs
> 
> On Tue, 15 Oct 2019 15:49:57 +0800
> Joyce Kong <joyce.kong@arm.com> wrote:
> 
> > +static inline void
> > +rte_set_bit(unsigned int nr, unsigned long *addr) {
> > +	__atomic_fetch_or(addr, (1UL << nr), __ATOMIC_ACQ_REL); }
> > +
> > +static inline void
> > +rte_clear_bit(int nr, unsigned long *addr) {
> > +	__atomic_fetch_and(addr, ~(1UL << nr), __ATOMIC_ACQ_REL); }
> > +
> > +static inline int
> > +rte_test_bit(int nr, unsigned long *addr) {
> > +	int res;
> > +	rte_mb();
> > +	res = ((*addr) & (1UL << nr)) != 0;
> > +	rte_mb();
> > +
> > +	return res;
> > +}
> > +
> > +static inline int
> > +rte_test_and_set_bit(int nr, unsigned long *addr) {
> > +	unsigned long mask = (1UL << nr);
> > +
> > +	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) &
> mask; }
> > +
> > +static inline int
> > +rte_test_and_clear_bit(int nr, unsigned long *addr) {
> > +	unsigned long mask = (1UL << nr);
> > +
> > +	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) &
> mask; }
> 
> These functions need to be part of API, and have doxygen comments?

Will add doxygen comments in next version.

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs
  2019-10-15 16:51 ` [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Stephen Hemminger
@ 2019-10-18  9:01   ` Joyce Kong (Arm Technology China)
  0 siblings, 0 replies; 70+ messages in thread
From: Joyce Kong (Arm Technology China) @ 2019-10-18  9:01 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: dev, nd, thomas, jerinj, ravi1.kumar, xuanziyang2,
	cloud.wangxiaoyun, zhouguoyang, rmody, shshaikh,
	Honnappa Nagarahalli, Gavin Hu (Arm Technology China)

> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Wednesday, October 16, 2019 12:51 AM
> To: Joyce Kong (Arm Technology China) <Joyce.Kong@arm.com>
> Cc: dev@dpdk.org; nd <nd@arm.com>; thomas@monjalon.net;
> jerinj@marvell.com; ravi1.kumar@amd.com; xuanziyang2@huawei.com;
> cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com;
> rmody@marvell.com; shshaikh@marvell.com; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; Gavin Hu (Arm Technology China)
> <Gavin.Hu@arm.com>
> Subject: Re: [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation
> APIs in PMDs
> 
> On Tue, 15 Oct 2019 15:49:56 +0800
> Joyce Kong <joyce.kong@arm.com> wrote:
> 
> > There are a lot functions of bit operations scattered in PMDs,
> > consolidate them into a common API family and applied in different
> > PMDs to reduce code duplication.
> >
> > Joyce Kong (5):
> >   lib/eal: implement the family of rte bit operation APIs
> >   net/axgbe: use common rte bit operation APIs instead
> >   net/bnx2x: use common rte bit operation APIs instead
> >   net/hinic: use common rte bit operation APIs instead
> >   net/qede: use common rte bit operation APIs instead
> >
> >  drivers/net/axgbe/axgbe_common.h           |  29 +----
> >  drivers/net/axgbe/axgbe_ethdev.c           |  14 +-
> >  drivers/net/axgbe/axgbe_mdio.c             |  14 +-
> >  drivers/net/bnx2x/bnx2x.c                  | 202 +++++++++++++----------------
> >  drivers/net/bnx2x/bnx2x.h                  |   5 +-
> >  drivers/net/bnx2x/ecore_sp.h               |   8 +-
> >  drivers/net/hinic/base/hinic_compat.h      |  35 +----
> >  drivers/net/hinic/hinic_pmd_ethdev.c       |  16 +--
> >  drivers/net/qede/base/bcm_osal.c           |  20 ---
> >  drivers/net/qede/base/bcm_osal.h           |  10 +-
> >  lib/librte_eal/common/Makefile             |   1 +
> >  lib/librte_eal/common/include/rte_bitops.h |  56 ++++++++
> >  lib/librte_eal/common/meson.build          |   1 +
> >  13 files changed, 180 insertions(+), 231 deletions(-)  create mode
> > 100644 lib/librte_eal/common/include/rte_bitops.h
> >
> 
> 
> This is a really good idea, and should have been done long ago.
> Could you add tests for these as well?

Yes. Will add some tests for these APIs in next version.

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bit operation APIs
  2019-10-16  7:54   ` Jerin Jacob
@ 2019-10-18  9:02     ` Joyce Kong (Arm Technology China)
  2019-10-23  3:12       ` Joyce Kong (Arm Technology China)
  0 siblings, 1 reply; 70+ messages in thread
From: Joyce Kong (Arm Technology China) @ 2019-10-18  9:02 UTC (permalink / raw)
  To: Jerin Jacob
  Cc: dpdk-dev, nd, thomas, jerinj, ravi1.kumar, Ziyang Xuan,
	Xiaoyun Wang, Guoyang Zhou, Rasesh Mody, Shahed Shaikh,
	Honnappa Nagarahalli, Gavin Hu (Arm Technology China)

> -----Original Message-----
> From: Jerin Jacob <jerinjacobk@gmail.com>
> Sent: Wednesday, October 16, 2019 3:54 PM
> To: Joyce Kong (Arm Technology China) <Joyce.Kong@arm.com>
> Cc: dpdk-dev <dev@dpdk.org>; nd <nd@arm.com>; thomas@monjalon.net;
> jerinj@marvell.com; ravi1.kumar@amd.com; Ziyang Xuan
> <xuanziyang2@huawei.com>; Xiaoyun Wang
> <cloud.wangxiaoyun@huawei.com>; Guoyang Zhou
> <zhouguoyang@huawei.com>; Rasesh Mody <rmody@marvell.com>;
> Shahed Shaikh <shshaikh@marvell.com>; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; Gavin Hu (Arm Technology China)
> <Gavin.Hu@arm.com>
> Subject: Re: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte
> bit operation APIs
> 
> On Tue, Oct 15, 2019 at 1:20 PM Joyce Kong <joyce.kong@arm.com> wrote:
> >
> > There are a lot functions of bit operations scattered and duplicated
> > in PMDs, consolidating them into a common API family is necessary.
> > Furthermore, the bit operation is mostly applied to the IO devices, so
> > use __ATOMIC_ACQ_REL to ensure the ordering.
> >
> > Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> > ---
> >  lib/librte_eal/common/Makefile             |  1 +
> >  lib/librte_eal/common/include/rte_bitops.h | 56
> ++++++++++++++++++++++++++++++
> >  lib/librte_eal/common/meson.build          |  1 +
> > +
> > +static inline void
> > +rte_set_bit(unsigned int nr, unsigned long *addr) {
> > +       __atomic_fetch_or(addr, (1UL << nr), __ATOMIC_ACQ_REL); }
> 
> If it is specific for IO the IMO, it makes sense call the API to
> rte_io_set_bit() like rte_io_rmb
> and change the header file to rte_io_bitops.h.
> 
> The barries are only needed for IO operations. Explicitly is not conveying it in
> API name would call for using it for normal cases.
> 
> Other option could be to introduce, generic and IO specific bit operations
> operations separately.

Would do some related changes in next version.

^ permalink raw reply	[flat|nested] 70+ messages in thread

* [dpdk-dev] [PATCH v2 0/6] implement common rte bit operation APIs in PMDs
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (5 preceding siblings ...)
  2019-10-15 16:51 ` [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Stephen Hemminger
@ 2019-10-23  2:54 ` " Joyce Kong
  2019-10-25 13:14   ` David Marchand
  2019-10-29 16:42   ` Thomas Monjalon
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 1/6] lib/eal: implement the family of rte bit operation APIs Joyce Kong
                   ` (26 subsequent siblings)
  33 siblings, 2 replies; 70+ messages in thread
From: Joyce Kong @ 2019-10-23  2:54 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, honnappa.nagarahalli, gavin.hu,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang

There are a lot functions of bit operations scattered in PMDs,
consolidate them into a common API family and applied in different
PMDs to reduce code duplication.

v2:
  1. Add doxygen comments for the rte bit operation API(suggested by Stephen Hemminger).
  2. Add test cases for common rte bit operation API(suggested by Stephen Hemminger).
  3. Change the header file to rte_io_bitops.h and the operation to rte_io_set_bit()etc.,
     as the API uses barriers inside and the barriers are only needed for IO operations
     (suggested by Jerin Jacob).
  4. Use an well defined uint_NN_t type(suggested by Morten Brørup).

Joyce Kong (6):
  lib/eal: implement the family of rte bit operation APIs
  test/iobitops: add io bit operation test case
  net/axgbe: use common rte bit operation APIs instead
  net/bnx2x: use common rte bit operation APIs instead
  net/hinic: use common rte bit operation APIs instead
  net/qede: use common rte bit operation APIs instead

 app/test/Makefile                             |   1 +
 app/test/test_io_bitops.c                     |  86 +++++++++++
 drivers/net/axgbe/axgbe_common.h              |  29 +---
 drivers/net/axgbe/axgbe_ethdev.c              |  14 +-
 drivers/net/axgbe/axgbe_mdio.c                |  14 +-
 drivers/net/bnx2x/bnx2x.c                     | 209 ++++++++++++--------------
 drivers/net/bnx2x/bnx2x.h                     |   4 -
 drivers/net/bnx2x/ecore_sp.h                  |   9 +-
 drivers/net/hinic/base/hinic_compat.h         |  35 +----
 drivers/net/hinic/hinic_pmd_ethdev.c          |  16 +-
 drivers/net/qede/base/bcm_osal.c              |  20 ---
 drivers/net/qede/base/bcm_osal.h              |  10 +-
 lib/librte_eal/common/Makefile                |   1 +
 lib/librte_eal/common/include/rte_io_bitops.h | 112 ++++++++++++++
 lib/librte_eal/common/meson.build             |   1 +
 15 files changed, 327 insertions(+), 234 deletions(-)
 create mode 100644 app/test/test_io_bitops.c
 create mode 100644 lib/librte_eal/common/include/rte_io_bitops.h

-- 
2.7.4


^ permalink raw reply	[flat|nested] 70+ messages in thread

* [dpdk-dev] [PATCH v2 1/6] lib/eal: implement the family of rte bit operation APIs
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (6 preceding siblings ...)
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 0/6] " Joyce Kong
@ 2019-10-23  2:54 ` Joyce Kong
  2019-10-23  3:09   ` Honnappa Nagarahalli
                     ` (2 more replies)
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 2/6] test/iobitops: add io bit operation test case Joyce Kong
                   ` (25 subsequent siblings)
  33 siblings, 3 replies; 70+ messages in thread
From: Joyce Kong @ 2019-10-23  2:54 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, honnappa.nagarahalli, gavin.hu,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang

There are a lot functions of bit operations scattered and
duplicated in PMDs, consolidating them into a common API
family is necessary. Furthermore, the bit operation is
mostly applied to the IO devices, so use __ATOMIC_ACQ_REL
to ensure the ordering.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 lib/librte_eal/common/Makefile                |   1 +
 lib/librte_eal/common/include/rte_io_bitops.h | 112 ++++++++++++++++++++++++++
 lib/librte_eal/common/meson.build             |   1 +
 3 files changed, 114 insertions(+)
 create mode 100644 lib/librte_eal/common/include/rte_io_bitops.h

diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index a00d4fc..3831313 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -18,6 +18,7 @@ INC += rte_malloc.h rte_keepalive.h rte_time.h
 INC += rte_service.h rte_service_component.h
 INC += rte_bitmap.h rte_vfio.h rte_hypervisor.h rte_test.h
 INC += rte_reciprocal.h rte_fbarray.h rte_uuid.h
+INC += rte_io_bitops.h
 
 GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h
 GENERIC_INC += rte_memcpy.h rte_cpuflags.h
diff --git a/lib/librte_eal/common/include/rte_io_bitops.h b/lib/librte_eal/common/include/rte_io_bitops.h
new file mode 100644
index 0000000..5f778b8
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_io_bitops.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#ifndef _RTE_IO_BITOPS_H_
+#define _RTE_IO_BITOPS_H_
+
+/**
+ * @file
+ * Bit Operations
+ *
+ * This file defines a generic API for bit operations.
+ */
+
+#include <rte_lcore.h>
+
+/**
+ * Get a bit.
+ *
+ * @param nr
+ *   The bit to get.
+ * @param addr
+ *   The address to count from.
+ * @return
+ *   The value of the bit.
+ */
+static inline int32_t
+rte_io_get_bit(uint32_t nr, uint64_t *addr)
+{
+	return __atomic_load_n(addr, __ATOMIC_ACQUIRE) & (1UL << nr);
+}
+
+/**
+ * Set a bit to 1.
+ *
+ * @param nr
+ *   The bit to set.
+ * @param addr
+ *   The address to count from.
+ */
+static inline void
+rte_io_set_bit(uint32_t nr, uint64_t *addr)
+{
+	__atomic_fetch_or(addr, (1UL << nr), __ATOMIC_ACQ_REL);
+}
+
+/**
+ * Set a bit to 0.
+ *
+ * @param nr
+ *   The bit to set.
+ * @param addr
+ *   The address to count from.
+ */
+static inline void
+rte_io_clear_bit(int32_t nr, uint64_t *addr)
+{
+	__atomic_fetch_and(addr, ~(1UL << nr), __ATOMIC_ACQ_REL);
+}
+
+/**
+ * Test if a bit is 1.
+ *
+ * @param nr
+ *   The bit to test.
+ * @param addr
+ *   The address to count from.
+ * @return
+ *   1 if the bit is 1; else 0.
+ */
+static inline int32_t
+rte_io_test_bit(int32_t nr, uint64_t *addr)
+{
+	return (__atomic_load_n(addr, __ATOMIC_ACQUIRE) & (1UL << nr)) != 0;
+}
+
+/**
+ * Set a bit to 1 and return its old value.
+ *
+ * @param nr
+ *   The bit to set.
+ * @param addr
+ *   The address to count from.
+ * @return
+ *   The old value of the bit.
+ */
+static inline int32_t
+rte_io_test_and_set_bit(int32_t nr, uint64_t *addr)
+{
+	unsigned long mask = (1UL << nr);
+
+	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) & mask;
+}
+
+/**
+ * Set a bit to 0 and return its old value.
+ *
+ * @param nr
+ *   The bit to set.
+ * @param addr
+ *   The address to count from.
+ * @return
+ *   The old value of the bit.
+ */
+static inline int32_t
+rte_io_test_and_clear_bit(int32_t nr, uint64_t *addr)
+{
+	unsigned long mask = (1UL << nr);
+
+	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) & mask;
+}
+#endif /* _RTE_IO_BITOPS_H_ */
diff --git a/lib/librte_eal/common/meson.build b/lib/librte_eal/common/meson.build
index 386577c..0a65d04 100644
--- a/lib/librte_eal/common/meson.build
+++ b/lib/librte_eal/common/meson.build
@@ -52,6 +52,7 @@ common_headers = files(
 	'include/rte_alarm.h',
 	'include/rte_branch_prediction.h',
 	'include/rte_bus.h',
+	'include/rte_io_bitops.h',
 	'include/rte_bitmap.h',
 	'include/rte_class.h',
 	'include/rte_common.h',
-- 
2.7.4


^ permalink raw reply	[flat|nested] 70+ messages in thread

* [dpdk-dev] [PATCH v2 2/6] test/iobitops: add io bit operation test case
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (7 preceding siblings ...)
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 1/6] lib/eal: implement the family of rte bit operation APIs Joyce Kong
@ 2019-10-23  2:54 ` Joyce Kong
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 3/6] net/axgbe: use common rte bit operation APIs instead Joyce Kong
                   ` (24 subsequent siblings)
  33 siblings, 0 replies; 70+ messages in thread
From: Joyce Kong @ 2019-10-23  2:54 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, honnappa.nagarahalli, gavin.hu,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang

Add test cases for set bit, clear bit, test and set bit,
test and clear bit operations.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 app/test/Makefile         |  1 +
 app/test/test_io_bitops.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 87 insertions(+)
 create mode 100644 app/test/test_io_bitops.c

diff --git a/app/test/Makefile b/app/test/Makefile
index df7f77f..3e47c94 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -70,6 +70,7 @@ SRCS-y += test_ticketlock.c
 SRCS-y += test_memory.c
 SRCS-y += test_memzone.c
 SRCS-y += test_bitmap.c
+SRCS-y += test_io_bitops.c
 SRCS-y += test_reciprocal_division.c
 SRCS-y += test_reciprocal_division_perf.c
 SRCS-y += test_fbarray.c
diff --git a/app/test/test_io_bitops.c b/app/test/test_io_bitops.c
new file mode 100644
index 0000000..c61bec7
--- /dev/null
+++ b/app/test/test_io_bitops.c
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#include <rte_io_bitops.h>
+#include <rte_malloc.h>
+
+#include "test.h"
+
+#define MAX_BITS 32
+
+static int
+test_io_bitops_set(unsigned long *addr)
+{
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS; i++)
+		rte_io_set_bit(i, addr);
+
+	for (i = 0; i < MAX_BITS; i++)
+		if (!rte_io_get_bit(i, addr)) {
+			printf("Failed to set bit.\n");
+			return TEST_FAILED;
+		}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_io_bitops_clear(unsigned long *addr)
+{
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS; i++)
+		rte_io_clear_bit(i, addr);
+
+	for (i = 0; i < MAX_BITS; i++)
+		if (rte_io_get_bit(i, addr)) {
+			printf("Failed to clear bit.\n");
+			return TEST_FAILED;
+	}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_io_bitops_test_set_clear(unsigned long *addr)
+{
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS; i++)
+		rte_io_test_and_set_bit(i, addr);
+
+	for (i = 0; i < MAX_BITS; i++)
+		if (!rte_io_test_and_clear_bit(i, addr)) {
+			printf("Failed to set and test bit.\n");
+			return TEST_FAILED;
+	}
+
+	for (i = 0; i < MAX_BITS; i++)
+		if (rte_io_get_bit(i, addr)) {
+			printf("Failed to test and clear bit.\n");
+			return TEST_FAILED;
+		}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_io_bitops(void)
+{
+	unsigned long *addr = rte_zmalloc(NULL, MAX_BITS, RTE_CACHE_LINE_SIZE);
+
+	if (test_io_bitops_set(addr) < 0)
+		return TEST_FAILED;
+
+	if (test_io_bitops_clear(addr) < 0)
+		return TEST_FAILED;
+
+	if (test_io_bitops_test_set_clear(addr) < 0)
+		return TEST_FAILED;
+
+	return TEST_SUCCESS;
+}
+
+REGISTER_TEST_COMMAND(io_bitops_autotest, test_io_bitops);
-- 
2.7.4


^ permalink raw reply	[flat|nested] 70+ messages in thread

* [dpdk-dev] [PATCH v2 3/6] net/axgbe: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (8 preceding siblings ...)
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 2/6] test/iobitops: add io bit operation test case Joyce Kong
@ 2019-10-23  2:54 ` Joyce Kong
  2019-10-23  3:16   ` Honnappa Nagarahalli
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 4/6] net/bnx2x: " Joyce Kong
                   ` (23 subsequent siblings)
  33 siblings, 1 reply; 70+ messages in thread
From: Joyce Kong @ 2019-10-23  2:54 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, honnappa.nagarahalli, gavin.hu,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/axgbe/axgbe_common.h | 29 +----------------------------
 drivers/net/axgbe/axgbe_ethdev.c | 14 +++++++-------
 drivers/net/axgbe/axgbe_mdio.c   | 14 +++++++-------
 3 files changed, 15 insertions(+), 42 deletions(-)

diff --git a/drivers/net/axgbe/axgbe_common.h b/drivers/net/axgbe/axgbe_common.h
index 34f60f1..e44ec7d 100644
--- a/drivers/net/axgbe/axgbe_common.h
+++ b/drivers/net/axgbe/axgbe_common.h
@@ -22,6 +22,7 @@
 #include <pthread.h>
 
 #include <rte_byteorder.h>
+#include <rte_io_bitops.h>
 #include <rte_memory.h>
 #include <rte_malloc.h>
 #include <rte_hexdump.h>
@@ -1674,34 +1675,6 @@ do {									\
 #define time_after_eq(a, b)     ((long)((a) - (b)) >= 0)
 #define time_before_eq(a, b)	time_after_eq(b, a)
 
-/*---bitmap support apis---*/
-static inline int axgbe_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	rte_mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	rte_mb();
-	return res;
-}
-
-static inline void axgbe_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-static inline void axgbe_clear_bit(int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-static inline int axgbe_test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-
-	return __sync_fetch_and_and(addr, ~mask) & mask;
-}
-
 static inline unsigned long msecs_to_timer_cycles(unsigned int m)
 {
 	return rte_get_timer_hz() * (m / 1000);
diff --git a/drivers/net/axgbe/axgbe_ethdev.c b/drivers/net/axgbe/axgbe_ethdev.c
index d1f160e..8c8e5ff 100644
--- a/drivers/net/axgbe/axgbe_ethdev.c
+++ b/drivers/net/axgbe/axgbe_ethdev.c
@@ -201,8 +201,8 @@ axgbe_dev_start(struct rte_eth_dev *dev)
 	axgbe_dev_enable_tx(dev);
 	axgbe_dev_enable_rx(dev);
 
-	axgbe_clear_bit(AXGBE_STOPPED, &pdata->dev_state);
-	axgbe_clear_bit(AXGBE_DOWN, &pdata->dev_state);
+	rte_io_clear_bit(AXGBE_STOPPED, &pdata->dev_state);
+	rte_io_clear_bit(AXGBE_DOWN, &pdata->dev_state);
 	return 0;
 }
 
@@ -216,17 +216,17 @@ axgbe_dev_stop(struct rte_eth_dev *dev)
 
 	rte_intr_disable(&pdata->pci_dev->intr_handle);
 
-	if (axgbe_test_bit(AXGBE_STOPPED, &pdata->dev_state))
+	if (rte_io_test_bit(AXGBE_STOPPED, &pdata->dev_state))
 		return;
 
-	axgbe_set_bit(AXGBE_STOPPED, &pdata->dev_state);
+	rte_io_set_bit(AXGBE_STOPPED, &pdata->dev_state);
 	axgbe_dev_disable_tx(dev);
 	axgbe_dev_disable_rx(dev);
 
 	pdata->phy_if.phy_stop(pdata);
 	pdata->hw_if.exit(pdata);
 	memset(&dev->data->dev_link, 0, sizeof(struct rte_eth_link));
-	axgbe_set_bit(AXGBE_DOWN, &pdata->dev_state);
+	rte_io_set_bit(AXGBE_DOWN, &pdata->dev_state);
 }
 
 /* Clear all resources like TX/RX queues. */
@@ -598,8 +598,8 @@ eth_axgbe_dev_init(struct rte_eth_dev *eth_dev)
 
 	pdata = eth_dev->data->dev_private;
 	/* initial state */
-	axgbe_set_bit(AXGBE_DOWN, &pdata->dev_state);
-	axgbe_set_bit(AXGBE_STOPPED, &pdata->dev_state);
+	rte_io_set_bit(AXGBE_DOWN, &pdata->dev_state);
+	rte_io_set_bit(AXGBE_STOPPED, &pdata->dev_state);
 	pdata->eth_dev = eth_dev;
 
 	pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
diff --git a/drivers/net/axgbe/axgbe_mdio.c b/drivers/net/axgbe/axgbe_mdio.c
index 2721e5c..4164564 100644
--- a/drivers/net/axgbe/axgbe_mdio.c
+++ b/drivers/net/axgbe/axgbe_mdio.c
@@ -743,7 +743,7 @@ static int __axgbe_phy_config_aneg(struct axgbe_port *pdata)
 {
 	int ret;
 
-	axgbe_set_bit(AXGBE_LINK_INIT, &pdata->dev_state);
+	rte_io_set_bit(AXGBE_LINK_INIT, &pdata->dev_state);
 	pdata->link_check = rte_get_timer_cycles();
 
 	ret = pdata->phy_if.phy_impl.an_config(pdata);
@@ -807,9 +807,9 @@ static int axgbe_phy_config_aneg(struct axgbe_port *pdata)
 
 	ret = __axgbe_phy_config_aneg(pdata);
 	if (ret)
-		axgbe_set_bit(AXGBE_LINK_ERR, &pdata->dev_state);
+		rte_io_set_bit(AXGBE_LINK_ERR, &pdata->dev_state);
 	else
-		axgbe_clear_bit(AXGBE_LINK_ERR, &pdata->dev_state);
+		rte_io_clear_bit(AXGBE_LINK_ERR, &pdata->dev_state);
 
 	pthread_mutex_unlock(&pdata->an_mutex);
 
@@ -880,7 +880,7 @@ static void axgbe_phy_status(struct axgbe_port *pdata)
 	unsigned int link_aneg;
 	int an_restart;
 
-	if (axgbe_test_bit(AXGBE_LINK_ERR, &pdata->dev_state)) {
+	if (rte_io_test_bit(AXGBE_LINK_ERR, &pdata->dev_state)) {
 		pdata->phy.link = 0;
 		goto adjust_link;
 	}
@@ -900,10 +900,10 @@ static void axgbe_phy_status(struct axgbe_port *pdata)
 			return;
 		}
 		axgbe_phy_status_result(pdata);
-		if (axgbe_test_bit(AXGBE_LINK_INIT, &pdata->dev_state))
-			axgbe_clear_bit(AXGBE_LINK_INIT, &pdata->dev_state);
+		if (rte_io_test_bit(AXGBE_LINK_INIT, &pdata->dev_state))
+			rte_io_clear_bit(AXGBE_LINK_INIT, &pdata->dev_state);
 	} else {
-		if (axgbe_test_bit(AXGBE_LINK_INIT, &pdata->dev_state)) {
+		if (rte_io_test_bit(AXGBE_LINK_INIT, &pdata->dev_state)) {
 			axgbe_check_link_timeout(pdata);
 
 			if (link_aneg)
-- 
2.7.4


^ permalink raw reply	[flat|nested] 70+ messages in thread

* [dpdk-dev] [PATCH v2 4/6] net/bnx2x: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (9 preceding siblings ...)
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 3/6] net/axgbe: use common rte bit operation APIs instead Joyce Kong
@ 2019-10-23  2:54 ` " Joyce Kong
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 5/6] net/hinic: " Joyce Kong
                   ` (22 subsequent siblings)
  33 siblings, 0 replies; 70+ messages in thread
From: Joyce Kong @ 2019-10-23  2:54 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, honnappa.nagarahalli, gavin.hu,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/bnx2x/bnx2x.c    | 209 +++++++++++++++++++------------------------
 drivers/net/bnx2x/bnx2x.h    |   4 -
 drivers/net/bnx2x/ecore_sp.h |   9 +-
 3 files changed, 98 insertions(+), 124 deletions(-)

diff --git a/drivers/net/bnx2x/bnx2x.c b/drivers/net/bnx2x/bnx2x.c
index e1dfe60..623b2ed 100644
--- a/drivers/net/bnx2x/bnx2x.c
+++ b/drivers/net/bnx2x/bnx2x.c
@@ -26,6 +26,7 @@
 #include <fcntl.h>
 #include <zlib.h>
 #include <rte_string_fns.h>
+#include <rte_io_bitops.h>
 
 #define BNX2X_PMD_VER_PREFIX "BNX2X PMD"
 #define BNX2X_PMD_VERSION_MAJOR 1
@@ -129,32 +130,6 @@ static void bnx2x_ack_sb(struct bnx2x_softc *sc, uint8_t igu_sb_id,
 			 uint8_t storm, uint16_t index, uint8_t op,
 			 uint8_t update);
 
-int bnx2x_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	mb();
-	return res;
-}
-
-void bnx2x_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-void bnx2x_clear_bit(int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-int bnx2x_test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-	return __sync_fetch_and_and(addr, ~mask) & mask;
-}
-
 int bnx2x_cmpxchg(volatile int *addr, int old, int new)
 {
 	return __sync_val_compare_and_swap(addr, old, new);
@@ -1427,11 +1402,11 @@ bnx2x_del_all_macs(struct bnx2x_softc *sc, struct ecore_vlan_mac_obj *mac_obj,
 
 	/* wait for completion of requested */
 	if (wait_for_comp) {
-		bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+		rte_io_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
 	}
 
 	/* Set the mac type of addresses we want to clear */
-	bnx2x_set_bit(mac_type, &vlan_mac_flags);
+	rte_io_set_bit(mac_type, &vlan_mac_flags);
 
 	rc = mac_obj->delete_all(sc, mac_obj, &vlan_mac_flags, &ramrod_flags);
 	if (rc < 0)
@@ -1458,26 +1433,26 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 		break;
 
 	case BNX2X_RX_MODE_NORMAL:
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_MULTICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_MULTICAST, rx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
 
 		/* internal switching mode */
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_MULTICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_MULTICAST, tx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
 
 		break;
 
 	case BNX2X_RX_MODE_ALLMULTI:
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
 
 		/* internal switching mode */
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
 
 		break;
 
@@ -1488,19 +1463,20 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 		 * should receive matched and unmatched (in resolution of port)
 		 * unicast packets.
 		 */
-		bnx2x_set_bit(ECORE_ACCEPT_UNMATCHED, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_UNMATCHED, rx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
 
 		/* internal switching mode */
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
 
 		if (IS_MF_SI(sc)) {
-			bnx2x_set_bit(ECORE_ACCEPT_ALL_UNICAST, tx_accept_flags);
+			rte_io_set_bit(ECORE_ACCEPT_ALL_UNICAST,
+					tx_accept_flags);
 		} else {
-			bnx2x_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
+			rte_io_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
 		}
 
 		break;
@@ -1512,8 +1488,8 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 
 	/* Set ACCEPT_ANY_VLAN as we do not enable filtering by VLAN */
 	if (rx_mode != BNX2X_RX_MODE_NONE) {
-		bnx2x_set_bit(ECORE_ACCEPT_ANY_VLAN, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ANY_VLAN, tx_accept_flags);
+		rte_io_set_bit(ECORE_ACCEPT_ANY_VLAN, rx_accept_flags;
+		rte_io_set_bit(ECORE_ACCEPT_ANY_VLAN, tx_accept_flags);
 	}
 
 	return 0;
@@ -1542,7 +1518,7 @@ bnx2x_set_q_rx_mode(struct bnx2x_softc *sc, uint8_t cl_id,
 	ramrod_param.rdata = BNX2X_SP(sc, rx_mode_rdata);
 	ramrod_param.rdata_mapping =
 	    (rte_iova_t)BNX2X_SP_MAPPING(sc, rx_mode_rdata),
-	    bnx2x_set_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
+	    rte_io_set_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
 
 	ramrod_param.ramrod_flags = ramrod_flags;
 	ramrod_param.rx_mode_flags = rx_mode_flags;
@@ -1571,9 +1547,9 @@ int bnx2x_set_storm_rx_mode(struct bnx2x_softc *sc)
 		return rc;
 	}
 
-	bnx2x_set_bit(RAMROD_RX, &ramrod_flags);
-	bnx2x_set_bit(RAMROD_TX, &ramrod_flags);
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	rte_io_set_bit(RAMROD_RX, &ramrod_flags);
+	rte_io_set_bit(RAMROD_TX, &ramrod_flags);
+	rte_io_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
 
 	return bnx2x_set_q_rx_mode(sc, sc->fp[0].cl_id, rx_mode_flags,
 				 rx_accept_flags, tx_accept_flags,
@@ -1698,7 +1674,7 @@ static int bnx2x_func_wait_started(struct bnx2x_softc *sc)
 			    "Forcing STARTED-->TX_STOPPED-->STARTED");
 
 		func_params.f_obj = &sc->func_obj;
-		bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
+		rte_io_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
 
 		/* STARTED-->TX_STOPPED */
 		func_params.cmd = ECORE_F_CMD_TX_STOP;
@@ -1722,7 +1698,7 @@ static int bnx2x_stop_queue(struct bnx2x_softc *sc, int index)
 
 	q_params.q_obj = &sc->sp_objs[fp->index].q_obj;
 	/* We want to wait for completion in this context */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
+	rte_io_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
 
 	/* Stop the primary connection: */
 
@@ -1783,7 +1759,7 @@ static int bnx2x_func_stop(struct bnx2x_softc *sc)
 	int rc;
 
 	/* prepare parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_io_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_STOP;
 
@@ -1797,7 +1773,7 @@ static int bnx2x_func_stop(struct bnx2x_softc *sc)
 	if (rc) {
 		PMD_DRV_LOG(NOTICE, sc, "FUNC_STOP ramrod failed. "
 			    "Running a dry transaction");
-		bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
+		rte_io_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
 		return ecore_func_state_change(sc, &func_params);
 	}
 
@@ -1809,7 +1785,7 @@ static int bnx2x_reset_hw(struct bnx2x_softc *sc, uint32_t load_code)
 	struct ecore_func_state_params func_params = { NULL };
 
 	/* Prepare parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_io_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_HW_RESET;
@@ -1866,11 +1842,10 @@ bnx2x_chip_cleanup(struct bnx2x_softc *sc, uint32_t unload_mode, uint8_t keep_li
 	 * a race between the completion code and this code.
 	 */
 
-	if (bnx2x_test_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state)) {
-		bnx2x_set_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state);
-	} else {
+	if (rte_io_test_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state))
+		rte_io_set_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state);
+	else
 		bnx2x_set_storm_rx_mode(sc);
-	}
 
 	/* Clean up multicast configuration */
 	rparam.mcast_obj = &sc->mcast_obj;
@@ -1960,12 +1935,12 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 	/* Cleanup MACs' object first... */
 
 	/* Wait for completion of requested */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	rte_io_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
 	/* Perform a dry cleanup */
-	bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &ramrod_flags);
+	rte_io_set_bit(RAMROD_DRV_CLR_ONLY, &ramrod_flags);
 
 	/* Clean ETH primary MAC */
-	bnx2x_set_bit(ECORE_ETH_MAC, &vlan_mac_flags);
+	rte_io_set_bit(ECORE_ETH_MAC, &vlan_mac_flags);
 	rc = mac_obj->delete_all(sc, &sc->sp_objs->mac_obj, &vlan_mac_flags,
 				 &ramrod_flags);
 	if (rc != 0) {
@@ -1974,7 +1949,7 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 
 	/* Cleanup UC list */
 	vlan_mac_flags = 0;
-	bnx2x_set_bit(ECORE_UC_LIST_MAC, &vlan_mac_flags);
+	rte_io_set_bit(ECORE_UC_LIST_MAC, &vlan_mac_flags);
 	rc = mac_obj->delete_all(sc, mac_obj, &vlan_mac_flags, &ramrod_flags);
 	if (rc != 0) {
 		PMD_DRV_LOG(NOTICE, sc,
@@ -1984,7 +1959,7 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 	/* Now clean mcast object... */
 
 	rparam.mcast_obj = &sc->mcast_obj;
-	bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags);
+	rte_io_set_bit(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags);
 
 	/* Add a DEL command... */
 	rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_DEL);
@@ -4288,7 +4263,7 @@ bnx2x_handle_classification_eqe(struct bnx2x_softc *sc, union event_ring_elem *e
 	struct ecore_vlan_mac_obj *vlan_mac_obj;
 
 	/* always push next commands out, don't wait here */
-	bnx2x_set_bit(RAMROD_CONT, &ramrod_flags);
+	rte_io_set_bit(RAMROD_CONT, &ramrod_flags);
 
 	switch (le32toh(elem->message.data.eth_event.echo) >> BNX2X_SWCID_SHIFT) {
 	case ECORE_FILTER_MAC_PENDING:
@@ -4319,12 +4294,12 @@ bnx2x_handle_classification_eqe(struct bnx2x_softc *sc, union event_ring_elem *e
 
 static void bnx2x_handle_rx_mode_eqe(struct bnx2x_softc *sc)
 {
-	bnx2x_clear_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
+	rte_io_clear_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
 
 	/* send rx_mode command again if was requested */
-	if (bnx2x_test_and_clear_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state)) {
+	if (rte_io_test_and_clear_bit(ECORE_FILTER_RX_MODE_SCHED,
+						&sc->sp_state))
 		bnx2x_set_storm_rx_mode(sc);
-	}
 }
 
 static void bnx2x_update_eq_prod(struct bnx2x_softc *sc, uint16_t prod)
@@ -4693,7 +4668,7 @@ static int bnx2x_init_hw(struct bnx2x_softc *sc, uint32_t load_code)
 	PMD_INIT_FUNC_TRACE(sc);
 
 	/* prepare the parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_io_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_HW_INIT;
@@ -4988,8 +4963,8 @@ static void bnx2x_init_eth_fp(struct bnx2x_softc *sc, int idx)
 	bnx2x_update_fp_sb_idx(fp);
 
 	/* Configure Queue State object */
-	bnx2x_set_bit(ECORE_Q_TYPE_HAS_RX, &q_type);
-	bnx2x_set_bit(ECORE_Q_TYPE_HAS_TX, &q_type);
+	rte_io_set_bit(ECORE_Q_TYPE_HAS_RX, &q_type);
+	rte_io_set_bit(ECORE_Q_TYPE_HAS_TX, &q_type);
 
 	ecore_init_queue_obj(sc,
 			     &sc->sp_objs[idx].q_obj,
@@ -5803,7 +5778,7 @@ static int bnx2x_func_start(struct bnx2x_softc *sc)
 	    &func_params.params.start;
 
 	/* Prepare parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_io_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_START;
@@ -6379,11 +6354,11 @@ bnx2x_pf_q_prep_init(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp,
 	uint8_t cos;
 	int cxt_index, cxt_offset;
 
-	bnx2x_set_bit(ECORE_Q_FLG_HC, &init_params->rx.flags);
-	bnx2x_set_bit(ECORE_Q_FLG_HC, &init_params->tx.flags);
+	rte_io_set_bit(ECORE_Q_FLG_HC, &init_params->rx.flags);
+	rte_io_set_bit(ECORE_Q_FLG_HC, &init_params->tx.flags);
 
-	bnx2x_set_bit(ECORE_Q_FLG_HC_EN, &init_params->rx.flags);
-	bnx2x_set_bit(ECORE_Q_FLG_HC_EN, &init_params->tx.flags);
+	rte_io_set_bit(ECORE_Q_FLG_HC_EN, &init_params->rx.flags);
+	rte_io_set_bit(ECORE_Q_FLG_HC_EN, &init_params->tx.flags);
 
 	/* HC rate */
 	init_params->rx.hc_rate =
@@ -6417,7 +6392,7 @@ bnx2x_get_common_flags(struct bnx2x_softc *sc, uint8_t zero_stats)
 	unsigned long flags = 0;
 
 	/* PF driver will always initialize the Queue to an ACTIVE state */
-	bnx2x_set_bit(ECORE_Q_FLG_ACTIVE, &flags);
+	rte_io_set_bit(ECORE_Q_FLG_ACTIVE, &flags);
 
 	/*
 	 * tx only connections collect statistics (on the same index as the
@@ -6425,9 +6400,9 @@ bnx2x_get_common_flags(struct bnx2x_softc *sc, uint8_t zero_stats)
 	 * connection is initialized.
 	 */
 
-	bnx2x_set_bit(ECORE_Q_FLG_STATS, &flags);
+	rte_io_set_bit(ECORE_Q_FLG_STATS, &flags);
 	if (zero_stats) {
-		bnx2x_set_bit(ECORE_Q_FLG_ZERO_STATS, &flags);
+		rte_io_set_bit(ECORE_Q_FLG_ZERO_STATS, &flags);
 	}
 
 	/*
@@ -6435,10 +6410,10 @@ bnx2x_get_common_flags(struct bnx2x_softc *sc, uint8_t zero_stats)
 	 * CoS-ness doesn't survive the loopback
 	 */
 	if (sc->flags & BNX2X_TX_SWITCHING) {
-		bnx2x_set_bit(ECORE_Q_FLG_TX_SWITCH, &flags);
+		rte_io_set_bit(ECORE_Q_FLG_TX_SWITCH, &flags);
 	}
 
-	bnx2x_set_bit(ECORE_Q_FLG_PCSUM_ON_PKT, &flags);
+	rte_io_set_bit(ECORE_Q_FLG_PCSUM_ON_PKT, &flags);
 
 	return flags;
 }
@@ -6448,15 +6423,15 @@ static unsigned long bnx2x_get_q_flags(struct bnx2x_softc *sc, uint8_t leading)
 	unsigned long flags = 0;
 
 	if (IS_MF_SD(sc)) {
-		bnx2x_set_bit(ECORE_Q_FLG_OV, &flags);
+		rte_io_set_bit(ECORE_Q_FLG_OV, &flags);
 	}
 
 	if (leading) {
-		bnx2x_set_bit(ECORE_Q_FLG_LEADING_RSS, &flags);
-		bnx2x_set_bit(ECORE_Q_FLG_MCAST, &flags);
+		rte_io_set_bit(ECORE_Q_FLG_LEADING_RSS, &flags);
+		rte_io_set_bit(ECORE_Q_FLG_MCAST, &flags);
 	}
 
-	bnx2x_set_bit(ECORE_Q_FLG_VLAN, &flags);
+	rte_io_set_bit(ECORE_Q_FLG_VLAN, &flags);
 
 	/* merge with common flags */
 	return flags | bnx2x_get_common_flags(sc, TRUE);
@@ -6577,7 +6552,7 @@ bnx2x_setup_queue(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp, uint8_t lea
 	q_params.q_obj = &BNX2X_SP_OBJ(sc, fp).q_obj;
 
 	/* we want to wait for completion in this context */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
+	rte_io_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
 
 	/* prepare the INIT parameters */
 	bnx2x_pf_q_prep_init(sc, fp, &q_params.params.init);
@@ -6645,20 +6620,20 @@ bnx2x_config_rss_pf(struct bnx2x_softc *sc, struct ecore_rss_config_obj *rss_obj
 
 	params.rss_obj = rss_obj;
 
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &params.ramrod_flags);
+	rte_io_set_bit(RAMROD_COMP_WAIT, &params.ramrod_flags);
 
-	bnx2x_set_bit(ECORE_RSS_MODE_REGULAR, &params.rss_flags);
+	rte_io_set_bit(ECORE_RSS_MODE_REGULAR, &params.rss_flags);
 
 	/* RSS configuration */
-	bnx2x_set_bit(ECORE_RSS_IPV4, &params.rss_flags);
-	bnx2x_set_bit(ECORE_RSS_IPV4_TCP, &params.rss_flags);
-	bnx2x_set_bit(ECORE_RSS_IPV6, &params.rss_flags);
-	bnx2x_set_bit(ECORE_RSS_IPV6_TCP, &params.rss_flags);
+	rte_io_set_bit(ECORE_RSS_IPV4, &params.rss_flags);
+	rte_io_set_bit(ECORE_RSS_IPV4_TCP, &params.rss_flags);
+	rte_io_set_bit(ECORE_RSS_IPV6, &params.rss_flags);
+	rte_io_set_bit(ECORE_RSS_IPV6_TCP, &params.rss_flags);
 	if (rss_obj->udp_rss_v4) {
-		bnx2x_set_bit(ECORE_RSS_IPV4_UDP, &params.rss_flags);
+		rte_io_set_bit(ECORE_RSS_IPV4_UDP, &params.rss_flags);
 	}
 	if (rss_obj->udp_rss_v6) {
-		bnx2x_set_bit(ECORE_RSS_IPV6_UDP, &params.rss_flags);
+		rte_io_set_bit(ECORE_RSS_IPV6_UDP, &params.rss_flags);
 	}
 
 	/* Hash bits */
@@ -6673,7 +6648,7 @@ bnx2x_config_rss_pf(struct bnx2x_softc *sc, struct ecore_rss_config_obj *rss_obj
 			params.rss_key[i] = (uint32_t) rte_rand();
 		}
 
-		bnx2x_set_bit(ECORE_RSS_SET_SRCH, &params.rss_flags);
+		rte_io_set_bit(ECORE_RSS_SET_SRCH, &params.rss_flags);
 	}
 
 	if (IS_PF(sc))
@@ -6730,11 +6705,11 @@ bnx2x_set_mac_one(struct bnx2x_softc *sc, uint8_t * mac,
 	ramrod_param.ramrod_flags = *ramrod_flags;
 
 	/* fill a user request section if needed */
-	if (!bnx2x_test_bit(RAMROD_CONT, ramrod_flags)) {
+	if (!rte_io_test_bit(RAMROD_CONT, ramrod_flags)) {
 		rte_memcpy(ramrod_param.user_req.u.mac.mac, mac,
 				 ETH_ALEN);
 
-		bnx2x_set_bit(mac_type, &ramrod_param.user_req.vlan_mac_flags);
+		rte_io_set_bit(mac_type, &ramrod_param.user_req.vlan_mac_flags);
 
 /* Set the command: ADD or DEL */
 		ramrod_param.user_req.cmd = (set) ? ECORE_VLAN_MAC_ADD :
@@ -6761,7 +6736,7 @@ static int bnx2x_set_eth_mac(struct bnx2x_softc *sc, uint8_t set)
 
 	PMD_DRV_LOG(DEBUG, sc, "Adding Ethernet MAC");
 
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	rte_io_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
 
 	/* Eth MAC is set on RSS leading client (fp[0]) */
 	return bnx2x_set_mac_one(sc, sc->link_params.mac_addr,
@@ -6893,24 +6868,26 @@ bnx2x_fill_report_data(struct bnx2x_softc *sc, struct bnx2x_link_report_data *da
 
 	/* Link is down */
 	if (!sc->link_vars.link_up || (sc->flags & BNX2X_MF_FUNC_DIS)) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+		rte_io_set_bit(BNX2X_LINK_REPORT_LINK_DOWN,
 			    &data->link_report_flags);
 	}
 
 	/* Full DUPLEX */
 	if (sc->link_vars.duplex == DUPLEX_FULL) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_FULL_DUPLEX,
+		rte_io_set_bit(BNX2X_LINK_REPORT_FULL_DUPLEX,
 			    &data->link_report_flags);
 	}
 
 	/* Rx Flow Control is ON */
 	if (sc->link_vars.flow_ctrl & ELINK_FLOW_CTRL_RX) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_RX_FC_ON, &data->link_report_flags);
+		rte_io_set_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+				&data->link_report_flags);
 	}
 
 	/* Tx Flow Control is ON */
 	if (sc->link_vars.flow_ctrl & ELINK_FLOW_CTRL_TX) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_TX_FC_ON, &data->link_report_flags);
+		rte_io_set_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+				&data->link_report_flags);
 	}
 }
 
@@ -6929,9 +6906,9 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
 
 	/* Don't report link down or exactly the same link status twice */
 	if (!memcmp(&cur_data, &sc->last_reported_link, sizeof(cur_data)) ||
-	    (bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+	    (rte_io_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
 			  &sc->last_reported_link.link_report_flags) &&
-	     bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+	     rte_io_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
 			  &cur_data.link_report_flags))) {
 		return;
 	}
@@ -6946,14 +6923,14 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
 	/* report new link params and remember the state for the next time */
 	rte_memcpy(&sc->last_reported_link, &cur_data, sizeof(cur_data));
 
-	if (bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+	if (rte_io_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
 			 &cur_data.link_report_flags)) {
 		ELINK_DEBUG_P0(sc, "NIC Link is Down");
 	} else {
 		__rte_unused const char *duplex;
 		__rte_unused const char *flow;
 
-		if (bnx2x_test_and_clear_bit(BNX2X_LINK_REPORT_FULL_DUPLEX,
+		if (rte_io_test_and_clear_bit(BNX2X_LINK_REPORT_FULL_DUPLEX,
 					   &cur_data.link_report_flags)) {
 			duplex = "full";
 				ELINK_DEBUG_P0(sc, "link set to full duplex");
@@ -6968,19 +6945,19 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
  * enabled.
  */
 		if (cur_data.link_report_flags) {
-			if (bnx2x_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+			if (rte_io_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
 					 &cur_data.link_report_flags) &&
-			    bnx2x_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+			    rte_io_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
 					 &cur_data.link_report_flags)) {
 				flow = "ON - receive & transmit";
-			} else if (bnx2x_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+			} else if (rte_io_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
 						&cur_data.link_report_flags) &&
-				   !bnx2x_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+				   !rte_io_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
 						 &cur_data.link_report_flags)) {
 				flow = "ON - receive";
-			} else if (!bnx2x_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+			} else if (!rte_io_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
 						 &cur_data.link_report_flags) &&
-				   bnx2x_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+				   rte_io_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
 						&cur_data.link_report_flags)) {
 				flow = "ON - transmit";
 			} else {
diff --git a/drivers/net/bnx2x/bnx2x.h b/drivers/net/bnx2x/bnx2x.h
index 43c6040..010699a 100644
--- a/drivers/net/bnx2x/bnx2x.h
+++ b/drivers/net/bnx2x/bnx2x.h
@@ -1809,10 +1809,6 @@ static const uint32_t dmae_reg_go_c[] = {
 #define PCI_PM_D0    1
 #define PCI_PM_D3hot 2
 
-int  bnx2x_test_bit(int nr, volatile unsigned long * addr);
-void bnx2x_set_bit(unsigned int nr, volatile unsigned long * addr);
-void bnx2x_clear_bit(int nr, volatile unsigned long * addr);
-int  bnx2x_test_and_clear_bit(int nr, volatile unsigned long * addr);
 int  bnx2x_cmpxchg(volatile int *addr, int old, int new);
 
 int bnx2x_dma_alloc(struct bnx2x_softc *sc, size_t size,
diff --git a/drivers/net/bnx2x/ecore_sp.h b/drivers/net/bnx2x/ecore_sp.h
index cc1db37..5f1e74f 100644
--- a/drivers/net/bnx2x/ecore_sp.h
+++ b/drivers/net/bnx2x/ecore_sp.h
@@ -15,6 +15,7 @@
 #define ECORE_SP_H
 
 #include <rte_byteorder.h>
+#include <rte_io_bitops.h>
 
 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
 #ifndef __LITTLE_ENDIAN
@@ -73,10 +74,10 @@ typedef rte_spinlock_t ECORE_MUTEX_SPIN;
 #define ECORE_SET_BIT_NA(bit, var)         (*var |= (1 << bit))
 #define ECORE_CLEAR_BIT_NA(bit, var)       (*var &= ~(1 << bit))
 
-#define ECORE_TEST_BIT(bit, var)           bnx2x_test_bit(bit, var)
-#define ECORE_SET_BIT(bit, var)            bnx2x_set_bit(bit, var)
-#define ECORE_CLEAR_BIT(bit, var)          bnx2x_clear_bit(bit, var)
-#define ECORE_TEST_AND_CLEAR_BIT(bit, var) bnx2x_test_and_clear_bit(bit, var)
+#define ECORE_TEST_BIT(bit, var)           rte_io_test_bit(bit, var)
+#define ECORE_SET_BIT(bit, var)            rte_io_set_bit(bit, var)
+#define ECORE_CLEAR_BIT(bit, var)          rte_io_clear_bit(bit, var)
+#define ECORE_TEST_AND_CLEAR_BIT(bit, var) rte_io_test_and_clear_bit(bit, var)
 
 #define atomic_load_acq_int                (int)*
 #define atomic_store_rel_int(a, v)         (*a = v)
-- 
2.7.4


^ permalink raw reply	[flat|nested] 70+ messages in thread

* [dpdk-dev] [PATCH v2 5/6] net/hinic: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (10 preceding siblings ...)
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 4/6] net/bnx2x: " Joyce Kong
@ 2019-10-23  2:54 ` " Joyce Kong
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 6/6] net/qede: " Joyce Kong
                   ` (21 subsequent siblings)
  33 siblings, 0 replies; 70+ messages in thread
From: Joyce Kong @ 2019-10-23  2:54 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, honnappa.nagarahalli, gavin.hu,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Joyce Kong <joyce.kong@arm.com>
---
 drivers/net/hinic/base/hinic_compat.h | 35 +----------------------------------
 drivers/net/hinic/hinic_pmd_ethdev.c  | 16 ++++++++--------
 2 files changed, 9 insertions(+), 42 deletions(-)

diff --git a/drivers/net/hinic/base/hinic_compat.h b/drivers/net/hinic/base/hinic_compat.h
index f599947..d3957c8 100644
--- a/drivers/net/hinic/base/hinic_compat.h
+++ b/drivers/net/hinic/base/hinic_compat.h
@@ -11,6 +11,7 @@
 #include <pthread.h>
 #include <rte_common.h>
 #include <rte_byteorder.h>
+#include <rte_io_bitops.h>
 #include <rte_memzone.h>
 #include <rte_memcpy.h>
 #include <rte_malloc.h>
@@ -117,40 +118,6 @@ extern int hinic_logtype;
 
 #define HINIC_PAGE_SIZE_DPDK	6
 
-static inline int hinic_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	rte_mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	rte_mb();
-	return res;
-}
-
-static inline void hinic_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-static inline void hinic_clear_bit(int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-static inline int hinic_test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-
-	return __sync_fetch_and_and(addr, ~mask) & mask;
-}
-
-static inline int hinic_test_and_set_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-
-	return __sync_fetch_and_or(addr, mask) & mask;
-}
-
 void *dma_zalloc_coherent(void *dev, size_t size, dma_addr_t *dma_handle,
 			  gfp_t flag);
 void *dma_zalloc_coherent_aligned(void *dev, size_t size,
diff --git a/drivers/net/hinic/hinic_pmd_ethdev.c b/drivers/net/hinic/hinic_pmd_ethdev.c
index c9a400e..c6dcfae 100644
--- a/drivers/net/hinic/hinic_pmd_ethdev.c
+++ b/drivers/net/hinic/hinic_pmd_ethdev.c
@@ -227,7 +227,7 @@ static void hinic_dev_interrupt_handler(void *param)
 	struct rte_eth_dev *dev = param;
 	struct hinic_nic_dev *nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
 
-	if (!hinic_test_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status)) {
+	if (!rte_io_test_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status)) {
 		PMD_DRV_LOG(WARNING, "Device's interrupt is disabled, ignore interrupt event, dev_name: %s, port_id: %d",
 			    nic_dev->proc_dev_name, dev->data->port_id);
 		return;
@@ -907,7 +907,7 @@ static int hinic_dev_start(struct rte_eth_dev *dev)
 	if (dev->data->dev_conf.intr_conf.lsc != 0)
 		(void)hinic_link_update(dev, 0);
 
-	hinic_set_bit(HINIC_DEV_START, &nic_dev->dev_status);
+	rte_io_set_bit(HINIC_DEV_START, &nic_dev->dev_status);
 
 	return 0;
 
@@ -1030,7 +1030,7 @@ static void hinic_dev_stop(struct rte_eth_dev *dev)
 	name = dev->data->name;
 	port_id = dev->data->port_id;
 
-	if (!hinic_test_and_clear_bit(HINIC_DEV_START, &nic_dev->dev_status)) {
+	if (!rte_io_test_and_clear_bit(HINIC_DEV_START, &nic_dev->dev_status)) {
 		PMD_DRV_LOG(INFO, "Device %s already stopped", name);
 		return;
 	}
@@ -1073,7 +1073,7 @@ static void hinic_disable_interrupt(struct rte_eth_dev *dev)
 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
 	int ret, retries = 0;
 
-	hinic_clear_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
+	rte_io_clear_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
 
 	/* disable msix interrupt in hardware */
 	hinic_set_msix_state(nic_dev->hwdev, 0, HINIC_MSIX_DISABLE);
@@ -2197,9 +2197,9 @@ static int hinic_func_init(struct rte_eth_dev *eth_dev)
 			    eth_dev->data->name);
 		goto enable_intr_fail;
 	}
-	hinic_set_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
+	rte_io_set_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
 
-	hinic_set_bit(HINIC_DEV_INIT, &nic_dev->dev_status);
+	rte_io_set_bit(HINIC_DEV_INIT, &nic_dev->dev_status);
 	PMD_DRV_LOG(INFO, "Initialize %s in primary successfully",
 		    eth_dev->data->name);
 
@@ -2236,7 +2236,7 @@ static void hinic_dev_close(struct rte_eth_dev *dev)
 {
 	struct hinic_nic_dev *nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
 
-	if (hinic_test_and_set_bit(HINIC_DEV_CLOSE, &nic_dev->dev_status)) {
+	if (rte_io_test_and_set_bit(HINIC_DEV_CLOSE, &nic_dev->dev_status)) {
 		PMD_DRV_LOG(WARNING, "Device %s already closed",
 			    dev->data->name);
 		return;
@@ -2316,7 +2316,7 @@ static int hinic_dev_uninit(struct rte_eth_dev *dev)
 	struct hinic_nic_dev *nic_dev;
 
 	nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
-	hinic_clear_bit(HINIC_DEV_INIT, &nic_dev->dev_status);
+	rte_io_clear_bit(HINIC_DEV_INIT, &nic_dev->dev_status);
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
-- 
2.7.4


^ permalink raw reply	[flat|nested] 70+ messages in thread

* [dpdk-dev] [PATCH v2 6/6] net/qede: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (11 preceding siblings ...)
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 5/6] net/hinic: " Joyce Kong
@ 2019-10-23  2:54 ` " Joyce Kong
  2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 0/6] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (20 subsequent siblings)
  33 siblings, 0 replies; 70+ messages in thread
From: Joyce Kong @ 2019-10-23  2:54 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, honnappa.nagarahalli, gavin.hu,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/qede/base/bcm_osal.c | 20 --------------------
 drivers/net/qede/base/bcm_osal.h | 10 ++++------
 2 files changed, 4 insertions(+), 26 deletions(-)

diff --git a/drivers/net/qede/base/bcm_osal.c b/drivers/net/qede/base/bcm_osal.c
index 9915df4..665833c 100644
--- a/drivers/net/qede/base/bcm_osal.c
+++ b/drivers/net/qede/base/bcm_osal.c
@@ -45,26 +45,6 @@ u32 qede_osal_log2(u32 val)
 	return log;
 }
 
-inline void qede_set_bit(u32 nr, unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-inline void qede_clr_bit(u32 nr, unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-inline bool qede_test_bit(u32 nr, unsigned long *addr)
-{
-	bool res;
-
-	rte_mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	rte_mb();
-	return res;
-}
-
 static inline u32 qede_ffb(unsigned long word)
 {
 	unsigned long first_bit;
diff --git a/drivers/net/qede/base/bcm_osal.h b/drivers/net/qede/base/bcm_osal.h
index 51edc41..d6107c3 100644
--- a/drivers/net/qede/base/bcm_osal.h
+++ b/drivers/net/qede/base/bcm_osal.h
@@ -8,6 +8,7 @@
 #define __BCM_OSAL_H
 
 #include <rte_byteorder.h>
+#include <rte_io_bitops.h>
 #include <rte_spinlock.h>
 #include <rte_malloc.h>
 #include <rte_atomic.h>
@@ -311,17 +312,14 @@ typedef struct osal_list_t {
 #define OSAL_BITS_PER_UL_MASK		(OSAL_BITS_PER_UL - 1)
 
 /* Bitops */
-void qede_set_bit(u32, unsigned long *);
 #define OSAL_SET_BIT(bit, bitmap) \
-	qede_set_bit(bit, bitmap)
+	rte_io_set_bit(bit, bitmap)
 
-void qede_clr_bit(u32, unsigned long *);
 #define OSAL_CLEAR_BIT(bit, bitmap) \
-	qede_clr_bit(bit, bitmap)
+	rte_io_clear_bit(bit, bitmap)
 
-bool qede_test_bit(u32, unsigned long *);
 #define OSAL_TEST_BIT(bit, bitmap) \
-	qede_test_bit(bit, bitmap)
+	rte_io_test_bit(bit, bitmap)
 
 u32 qede_find_first_bit(unsigned long *, u32);
 #define OSAL_FIND_FIRST_BIT(bitmap, length) \
-- 
2.7.4


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bitoperation APIs
  2019-10-18  8:58     ` Joyce Kong (Arm Technology China)
@ 2019-10-23  3:07       ` Joyce Kong (Arm Technology China)
  2019-10-23  7:45         ` Morten Brørup
  0 siblings, 1 reply; 70+ messages in thread
From: Joyce Kong (Arm Technology China) @ 2019-10-23  3:07 UTC (permalink / raw)
  To: Morten Brørup, dev
  Cc: nd, thomas, jerinj, ravi1.kumar, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, rmody, shshaikh, Honnappa Nagarahalli,
	Gavin Hu (Arm Technology China),
	Stephen Hemminger

> > > -----Original Message-----
> > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Joyce Kong
> > > Sent: Tuesday, October 15, 2019 9:50 AM
> > >
> > > There are a lot functions of bit operations scattered and duplicated
> > > in PMDs, consolidating them into a common API family is necessary.
> > > Furthermore, the bit operation is mostly applied to the IO devices,
> > > so use __ATOMIC_ACQ_REL to ensure the ordering.
> >
> > Good initiative.
> >
> > >
> > > Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> > > ---
> > >  lib/librte_eal/common/Makefile             |  1 +
> > >  lib/librte_eal/common/include/rte_bitops.h | 56
> > > ++++++++++++++++++++++++++++++
> > >  lib/librte_eal/common/meson.build          |  1 +
> > >  3 files changed, 58 insertions(+)
> > >  create mode 100644 lib/librte_eal/common/include/rte_bitops.h
> > >
> > > diff --git a/lib/librte_eal/common/Makefile
> > > b/lib/librte_eal/common/Makefile index a00d4fc..8586ca8 100644
> > > --- a/lib/librte_eal/common/Makefile
> > > +++ b/lib/librte_eal/common/Makefile
> > > @@ -18,6 +18,7 @@ INC += rte_malloc.h rte_keepalive.h rte_time.h
> > > INC
> > > += rte_service.h rte_service_component.h  INC += rte_bitmap.h
> > > rte_vfio.h rte_hypervisor.h rte_test.h  INC += rte_reciprocal.h
> > > rte_fbarray.h rte_uuid.h
> > > +INC += rte_bitops.h
> > >
> > >  GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h
> > > rte_prefetch.h  GENERIC_INC += rte_memcpy.h rte_cpuflags.h diff
> > > --git a/lib/librte_eal/common/include/rte_bitops.h
> > > b/lib/librte_eal/common/include/rte_bitops.h
> > > new file mode 100644
> > > index 0000000..4d7c5a3
> > > --- /dev/null
> > > +++ b/lib/librte_eal/common/include/rte_bitops.h
> > > @@ -0,0 +1,56 @@
> > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > + * Copyright(c) 2019 Arm Corporation  */
> > > +
> > > +#ifndef _RTE_BITOPS_H_
> > > +#define _RTE_BITOPS_H_
> > > +
> > > +/**
> > > + * @file
> > > + * Bit Operations
> > > + *
> > > + * This file defines a generic API for bit operations.
> > > + */
> > > +
> > > +#include <stdint.h>
> > > +#include <rte_atomic.h>
> > > +
> > > +static inline void
> > > +rte_set_bit(unsigned int nr, unsigned long *addr) {
> > > +	__atomic_fetch_or(addr, (1UL << nr), __ATOMIC_ACQ_REL); }
> > > +
> > > +static inline void
> > > +rte_clear_bit(int nr, unsigned long *addr) {
> > > +	__atomic_fetch_and(addr, ~(1UL << nr), __ATOMIC_ACQ_REL); }
> > > +
> > > +static inline int
> > > +rte_test_bit(int nr, unsigned long *addr) {
> > > +	int res;
> > > +	rte_mb();
> > > +	res = ((*addr) & (1UL << nr)) != 0;
> > > +	rte_mb();
> > > +
> > > +	return res;
> > > +}
> >
> > Why does rte_test_bit() not use any of the __atomic_xx functions instead?
> > E.g.:
> >
> > static inline int
> > rte_test_bit(int nr, unsigned long *addr) {
> > 	return __atomic_load_n(addr, __ATOMIC_ACQUIRE); }
> >
> You re right, it's better to use __atomic_xx here to keep the consistent with
> other APIs.
> 
> > > +
> > > +static inline int
> > > +rte_test_and_set_bit(int nr, unsigned long *addr) {
> > > +	unsigned long mask = (1UL << nr);
> > > +
> > > +	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) &
> > mask; }
> > > +
> > > +static inline int
> > > +rte_test_and_clear_bit(int nr, unsigned long *addr) {
> > > +	unsigned long mask = (1UL << nr);
> > > +
> > > +	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) &
> > mask; }
> > > +#endif /* _RTE_BITOPS_H_ */
> > > diff --git a/lib/librte_eal/common/meson.build
> > > b/lib/librte_eal/common/meson.build
> > > index 386577c..a277cdf 100644
> > > --- a/lib/librte_eal/common/meson.build
> > > +++ b/lib/librte_eal/common/meson.build
> > > @@ -52,6 +52,7 @@ common_headers = files(
> > >  	'include/rte_alarm.h',
> > >  	'include/rte_branch_prediction.h',
> > >  	'include/rte_bus.h',
> > > +	'include/rte_bitops.h',
> > >  	'include/rte_bitmap.h',
> > >  	'include/rte_class.h',
> > >  	'include/rte_common.h',
> > > --
> > > 2.7.4
> > >
> >
> > These functions use unsigned long as the type of their value, like
> > they do in the PMDs.
> >
> > However, a generic bit operations library should preferably work with
> > multiple types, like the __atomic_xx functions. Or use an well defined
> > uint_NN_t type. Or have individually named functions for each type size,
> e.g.
> > rte_set_bit_32() and rte_set_bit_64().
> >
> Good suggestion! And will do this in next version.

The PMDs which use the common API now are all 32bit operation, so change
the definition to uint_32_t type instead of individually naming functions for
each type size.


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/6] lib/eal: implement the family of rte bit operation APIs
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 1/6] lib/eal: implement the family of rte bit operation APIs Joyce Kong
@ 2019-10-23  3:09   ` Honnappa Nagarahalli
  2019-10-23  4:56   ` Jerin Jacob
  2019-10-23  7:46   ` Morten Brørup
  2 siblings, 0 replies; 70+ messages in thread
From: Honnappa Nagarahalli @ 2019-10-23  3:09 UTC (permalink / raw)
  To: Joyce Kong (Arm Technology China), dev
  Cc: nd, thomas, jerinj, stephen, mb, Gavin Hu (Arm Technology China),
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, Honnappa Nagarahalli, nd

Hi Joyce,
	Thanks for the patch, few comments.

<snip>

> 
> There are a lot functions of bit operations scattered and duplicated in PMDs,
> consolidating them into a common API family is necessary. Furthermore, the
> bit operation is mostly applied to the IO devices, so use __ATOMIC_ACQ_REL
> to ensure the ordering.
The APIs are not taking memory ordering as a parameter. This presents the same problem as the rte_atomic_xxx APIs. IMO, the APIs should take memory ordering as a parameter.

> 
> Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> Reviewed-by: Gavin Hu <gavin.hu@arm.com>
> ---
>  lib/librte_eal/common/Makefile                |   1 +
>  lib/librte_eal/common/include/rte_io_bitops.h | 112
> ++++++++++++++++++++++++++
>  lib/librte_eal/common/meson.build             |   1 +
>  3 files changed, 114 insertions(+)
>  create mode 100644 lib/librte_eal/common/include/rte_io_bitops.h
> 
> diff --git a/lib/librte_eal/common/Makefile
> b/lib/librte_eal/common/Makefile index a00d4fc..3831313 100644
> --- a/lib/librte_eal/common/Makefile
> +++ b/lib/librte_eal/common/Makefile
> @@ -18,6 +18,7 @@ INC += rte_malloc.h rte_keepalive.h rte_time.h  INC +=
> rte_service.h rte_service_component.h  INC += rte_bitmap.h rte_vfio.h
> rte_hypervisor.h rte_test.h  INC += rte_reciprocal.h rte_fbarray.h rte_uuid.h
> +INC += rte_io_bitops.h
> 
>  GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h
> GENERIC_INC += rte_memcpy.h rte_cpuflags.h diff --git
> a/lib/librte_eal/common/include/rte_io_bitops.h
> b/lib/librte_eal/common/include/rte_io_bitops.h
> new file mode 100644
> index 0000000..5f778b8
> --- /dev/null
> +++ b/lib/librte_eal/common/include/rte_io_bitops.h
> @@ -0,0 +1,112 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2019 Arm Limited
> + */
> +
> +#ifndef _RTE_IO_BITOPS_H_
> +#define _RTE_IO_BITOPS_H_
> +
> +/**
> + * @file
> + * Bit Operations
> + *
> + * This file defines a generic API for bit operations.
> + */
> +
> +#include <rte_lcore.h>
> +
> +/**
> + * Get a bit.
> + *
> + * @param nr
> + *   The bit to get.
> + * @param addr
> + *   The address to count from.
> + * @return
> + *   The value of the bit.
> + */
> +static inline int32_t
> +rte_io_get_bit(uint32_t nr, uint64_t *addr) {
> +	return __atomic_load_n(addr, __ATOMIC_ACQUIRE) & (1UL << nr); }
Some use cases might need 'relaxed' memory order for this API. So, the user of this API should be able to provide the memory order.

> +
> +/**
> + * Set a bit to 1.
> + *
> + * @param nr
> + *   The bit to set.
> + * @param addr
> + *   The address to count from.
> + */
> +static inline void
> +rte_io_set_bit(uint32_t nr, uint64_t *addr) {
> +	__atomic_fetch_or(addr, (1UL << nr), __ATOMIC_ACQ_REL); }
Some use cases might need 'release' or 'relaxed' memory order.
Similar requirements apply to other APIs too.

> +
> +/**
> + * Set a bit to 0.
> + *
> + * @param nr
> + *   The bit to set.
> + * @param addr
> + *   The address to count from.
> + */
> +static inline void
> +rte_io_clear_bit(int32_t nr, uint64_t *addr) {
> +	__atomic_fetch_and(addr, ~(1UL << nr), __ATOMIC_ACQ_REL); }
> +
> +/**
> + * Test if a bit is 1.
> + *
> + * @param nr
> + *   The bit to test.
> + * @param addr
> + *   The address to count from.
> + * @return
> + *   1 if the bit is 1; else 0.
> + */
> +static inline int32_t
> +rte_io_test_bit(int32_t nr, uint64_t *addr) {
> +	return (__atomic_load_n(addr, __ATOMIC_ACQUIRE) & (1UL <<
> nr)) != 0; }
> +
> +/**
> + * Set a bit to 1 and return its old value.
> + *
> + * @param nr
> + *   The bit to set.
> + * @param addr
> + *   The address to count from.
> + * @return
> + *   The old value of the bit.
> + */
> +static inline int32_t
> +rte_io_test_and_set_bit(int32_t nr, uint64_t *addr) {
> +	unsigned long mask = (1UL << nr);
> +
> +	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) &
> mask; }
> +
> +/**
> + * Set a bit to 0 and return its old value.
> + *
> + * @param nr
> + *   The bit to set.
> + * @param addr
> + *   The address to count from.
> + * @return
> + *   The old value of the bit.
> + */
> +static inline int32_t
> +rte_io_test_and_clear_bit(int32_t nr, uint64_t *addr) {
> +	unsigned long mask = (1UL << nr);
> +
> +	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) &
> mask; }
> +#endif /* _RTE_IO_BITOPS_H_ */
> diff --git a/lib/librte_eal/common/meson.build
> b/lib/librte_eal/common/meson.build
> index 386577c..0a65d04 100644
> --- a/lib/librte_eal/common/meson.build
> +++ b/lib/librte_eal/common/meson.build
> @@ -52,6 +52,7 @@ common_headers = files(
>  	'include/rte_alarm.h',
>  	'include/rte_branch_prediction.h',
>  	'include/rte_bus.h',
> +	'include/rte_io_bitops.h',
>  	'include/rte_bitmap.h',
>  	'include/rte_class.h',
>  	'include/rte_common.h',
> --
> 2.7.4


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bit operation APIs
  2019-10-18  9:02     ` Joyce Kong (Arm Technology China)
@ 2019-10-23  3:12       ` Joyce Kong (Arm Technology China)
  0 siblings, 0 replies; 70+ messages in thread
From: Joyce Kong (Arm Technology China) @ 2019-10-23  3:12 UTC (permalink / raw)
  To: Jerin Jacob, dpdk-dev
  Cc: nd, thomas, jerinj, ravi1.kumar, Ziyang Xuan, Xiaoyun Wang,
	Guoyang Zhou, Rasesh Mody, Shahed Shaikh, Honnappa Nagarahalli,
	Gavin Hu (Arm Technology China)

> > On Tue, Oct 15, 2019 at 1:20 PM Joyce Kong <joyce.kong@arm.com> wrote:
> > >
> > > There are a lot functions of bit operations scattered and duplicated
> > > in PMDs, consolidating them into a common API family is necessary.
> > > Furthermore, the bit operation is mostly applied to the IO devices,
> > > so use __ATOMIC_ACQ_REL to ensure the ordering.
> > >
> > > Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> > > ---
> > >  lib/librte_eal/common/Makefile             |  1 +
> > >  lib/librte_eal/common/include/rte_bitops.h | 56
> > ++++++++++++++++++++++++++++++
> > >  lib/librte_eal/common/meson.build          |  1 +
> > > +
> > > +static inline void
> > > +rte_set_bit(unsigned int nr, unsigned long *addr) {
> > > +       __atomic_fetch_or(addr, (1UL << nr), __ATOMIC_ACQ_REL); }
> >
> > If it is specific for IO the IMO, it makes sense call the API to
> > rte_io_set_bit() like rte_io_rmb
> > and change the header file to rte_io_bitops.h.
> >
> > The barries are only needed for IO operations. Explicitly is not
> > conveying it in API name would call for using it for normal cases.
> >
> > Other option could be to introduce, generic and IO specific bit
> > operations operations separately.
> 
> Would do some related changes in next version.

As bit operations are mostly applied to IO devices, change the header file
to rte_io_bitops.h to introduce IO specific bit operations now. And do this
change in v2.


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [dpdk-dev] [PATCH v2 3/6] net/axgbe: use common rte bit operation APIs instead
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 3/6] net/axgbe: use common rte bit operation APIs instead Joyce Kong
@ 2019-10-23  3:16   ` Honnappa Nagarahalli
  0 siblings, 0 replies; 70+ messages in thread
From: Honnappa Nagarahalli @ 2019-10-23  3:16 UTC (permalink / raw)
  To: Joyce Kong (Arm Technology China), dev
  Cc: nd, thomas, jerinj, stephen, mb, Gavin Hu (Arm Technology China),
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, Honnappa Nagarahalli, nd

<snip>

> 
> Remove its own bit operation APIs and use the common one, this can reduce
> the code duplication largely.
> 
> Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> Reviewed-by: Gavin Hu <gavin.hu@arm.com>
> ---
>  drivers/net/axgbe/axgbe_common.h | 29 +----------------------------
> drivers/net/axgbe/axgbe_ethdev.c | 14 +++++++-------
>  drivers/net/axgbe/axgbe_mdio.c   | 14 +++++++-------
>  3 files changed, 15 insertions(+), 42 deletions(-)
> 
> diff --git a/drivers/net/axgbe/axgbe_common.h
> b/drivers/net/axgbe/axgbe_common.h
> index 34f60f1..e44ec7d 100644
> --- a/drivers/net/axgbe/axgbe_common.h
> +++ b/drivers/net/axgbe/axgbe_common.h
> @@ -22,6 +22,7 @@
>  #include <pthread.h>
> 
>  #include <rte_byteorder.h>
> +#include <rte_io_bitops.h>
>  #include <rte_memory.h>
>  #include <rte_malloc.h>
>  #include <rte_hexdump.h>
> @@ -1674,34 +1675,6 @@ do {
> 				\
>  #define time_after_eq(a, b)     ((long)((a) - (b)) >= 0)
>  #define time_before_eq(a, b)	time_after_eq(b, a)
> 
> -/*---bitmap support apis---*/
> -static inline int axgbe_test_bit(int nr, volatile unsigned long *addr) -{
> -	int res;
> -
> -	rte_mb();
> -	res = ((*addr) & (1UL << nr)) != 0;
> -	rte_mb();
> -	return res;
> -}
This function uses rte_mb before and after the load. Where as the new API uses just 'acquire', please ensure 'acquire' is enough.

> -
> -static inline void axgbe_set_bit(unsigned int nr, volatile unsigned long *addr)
> -{
> -	__sync_fetch_and_or(addr, (1UL << nr));
> -}
> -
> -static inline void axgbe_clear_bit(int nr, volatile unsigned long *addr) -{
> -	__sync_fetch_and_and(addr, ~(1UL << nr));
> -}
> -
> -static inline int axgbe_test_and_clear_bit(int nr, volatile unsigned long *addr)
> -{
> -	unsigned long mask = (1UL << nr);
> -
> -	return __sync_fetch_and_and(addr, ~mask) & mask;
> -}
> -
>  static inline unsigned long msecs_to_timer_cycles(unsigned int m)  {
>  	return rte_get_timer_hz() * (m / 1000); diff --git
> a/drivers/net/axgbe/axgbe_ethdev.c b/drivers/net/axgbe/axgbe_ethdev.c
> index d1f160e..8c8e5ff 100644
> --- a/drivers/net/axgbe/axgbe_ethdev.c
> +++ b/drivers/net/axgbe/axgbe_ethdev.c
> @@ -201,8 +201,8 @@ axgbe_dev_start(struct rte_eth_dev *dev)
>  	axgbe_dev_enable_tx(dev);
>  	axgbe_dev_enable_rx(dev);
> 
> -	axgbe_clear_bit(AXGBE_STOPPED, &pdata->dev_state);
> -	axgbe_clear_bit(AXGBE_DOWN, &pdata->dev_state);
> +	rte_io_clear_bit(AXGBE_STOPPED, &pdata->dev_state);
> +	rte_io_clear_bit(AXGBE_DOWN, &pdata->dev_state);
>  	return 0;
>  }
> 
> @@ -216,17 +216,17 @@ axgbe_dev_stop(struct rte_eth_dev *dev)
> 
>  	rte_intr_disable(&pdata->pci_dev->intr_handle);
> 
> -	if (axgbe_test_bit(AXGBE_STOPPED, &pdata->dev_state))
> +	if (rte_io_test_bit(AXGBE_STOPPED, &pdata->dev_state))
>  		return;
> 
> -	axgbe_set_bit(AXGBE_STOPPED, &pdata->dev_state);
> +	rte_io_set_bit(AXGBE_STOPPED, &pdata->dev_state);
>  	axgbe_dev_disable_tx(dev);
>  	axgbe_dev_disable_rx(dev);
> 
>  	pdata->phy_if.phy_stop(pdata);
>  	pdata->hw_if.exit(pdata);
>  	memset(&dev->data->dev_link, 0, sizeof(struct rte_eth_link));
> -	axgbe_set_bit(AXGBE_DOWN, &pdata->dev_state);
> +	rte_io_set_bit(AXGBE_DOWN, &pdata->dev_state);
>  }
> 
>  /* Clear all resources like TX/RX queues. */ @@ -598,8 +598,8 @@
> eth_axgbe_dev_init(struct rte_eth_dev *eth_dev)
> 
>  	pdata = eth_dev->data->dev_private;
>  	/* initial state */
> -	axgbe_set_bit(AXGBE_DOWN, &pdata->dev_state);
> -	axgbe_set_bit(AXGBE_STOPPED, &pdata->dev_state);
> +	rte_io_set_bit(AXGBE_DOWN, &pdata->dev_state);
> +	rte_io_set_bit(AXGBE_STOPPED, &pdata->dev_state);
>  	pdata->eth_dev = eth_dev;
> 
>  	pci_dev = RTE_DEV_TO_PCI(eth_dev->device); diff --git
> a/drivers/net/axgbe/axgbe_mdio.c b/drivers/net/axgbe/axgbe_mdio.c index
> 2721e5c..4164564 100644
> --- a/drivers/net/axgbe/axgbe_mdio.c
> +++ b/drivers/net/axgbe/axgbe_mdio.c
> @@ -743,7 +743,7 @@ static int __axgbe_phy_config_aneg(struct
> axgbe_port *pdata)  {
>  	int ret;
> 
> -	axgbe_set_bit(AXGBE_LINK_INIT, &pdata->dev_state);
> +	rte_io_set_bit(AXGBE_LINK_INIT, &pdata->dev_state);
>  	pdata->link_check = rte_get_timer_cycles();
> 
>  	ret = pdata->phy_if.phy_impl.an_config(pdata);
> @@ -807,9 +807,9 @@ static int axgbe_phy_config_aneg(struct axgbe_port
> *pdata)
> 
>  	ret = __axgbe_phy_config_aneg(pdata);
>  	if (ret)
> -		axgbe_set_bit(AXGBE_LINK_ERR, &pdata->dev_state);
> +		rte_io_set_bit(AXGBE_LINK_ERR, &pdata->dev_state);
>  	else
> -		axgbe_clear_bit(AXGBE_LINK_ERR, &pdata->dev_state);
> +		rte_io_clear_bit(AXGBE_LINK_ERR, &pdata->dev_state);
> 
>  	pthread_mutex_unlock(&pdata->an_mutex);
> 
> @@ -880,7 +880,7 @@ static void axgbe_phy_status(struct axgbe_port
> *pdata)
>  	unsigned int link_aneg;
>  	int an_restart;
> 
> -	if (axgbe_test_bit(AXGBE_LINK_ERR, &pdata->dev_state)) {
> +	if (rte_io_test_bit(AXGBE_LINK_ERR, &pdata->dev_state)) {
>  		pdata->phy.link = 0;
>  		goto adjust_link;
>  	}
> @@ -900,10 +900,10 @@ static void axgbe_phy_status(struct axgbe_port
> *pdata)
>  			return;
>  		}
>  		axgbe_phy_status_result(pdata);
> -		if (axgbe_test_bit(AXGBE_LINK_INIT, &pdata->dev_state))
> -			axgbe_clear_bit(AXGBE_LINK_INIT, &pdata-
> >dev_state);
> +		if (rte_io_test_bit(AXGBE_LINK_INIT, &pdata->dev_state))
> +			rte_io_clear_bit(AXGBE_LINK_INIT, &pdata-
> >dev_state);
>  	} else {
> -		if (axgbe_test_bit(AXGBE_LINK_INIT, &pdata->dev_state)) {
> +		if (rte_io_test_bit(AXGBE_LINK_INIT, &pdata->dev_state)) {
>  			axgbe_check_link_timeout(pdata);
> 
>  			if (link_aneg)
> --
> 2.7.4


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/6] lib/eal: implement the family of rte bit operation APIs
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 1/6] lib/eal: implement the family of rte bit operation APIs Joyce Kong
  2019-10-23  3:09   ` Honnappa Nagarahalli
@ 2019-10-23  4:56   ` Jerin Jacob
  2019-10-23  7:46   ` Morten Brørup
  2 siblings, 0 replies; 70+ messages in thread
From: Jerin Jacob @ 2019-10-23  4:56 UTC (permalink / raw)
  To: Joyce Kong
  Cc: dpdk-dev, nd, Thomas Monjalon, Jerin Jacob, Stephen Hemminger,
	mb, Honnappa Nagarahalli, Gavin Hu, ravi1.kumar, Rasesh Mody,
	Shahed Shaikh, Ziyang Xuan, Xiaoyun Wang, Guoyang Zhou

On Wed, Oct 23, 2019 at 8:25 AM Joyce Kong <joyce.kong@arm.com> wrote:
>
> There are a lot functions of bit operations scattered and
> duplicated in PMDs, consolidating them into a common API
> family is necessary. Furthermore, the bit operation is
> mostly applied to the IO devices, so use __ATOMIC_ACQ_REL
> to ensure the ordering.
>
> Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> Reviewed-by: Gavin Hu <gavin.hu@arm.com>
> ---
>  lib/librte_eal/common/Makefile                |   1 +
>  lib/librte_eal/common/include/rte_io_bitops.h | 112 ++++++++++++++++++++++++++
>  lib/librte_eal/common/meson.build             |   1 +


Missing doc/api/doxy-api-index.md update

>  3 files changed, 114 insertions(+)
>  create mode 100644 lib/librte_eal/common/include/rte_io_bitops.h
>

> +
> +/**
> + * Get a bit.
> + *
> + * @param nr
> + *   The bit to get.
> + * @param addr
> + *   The address to count from.
> + * @return
> + *   The value of the bit.
> + */
> +static inline int32_t

Missing __rte_experimental

> +rte_io_get_bit(uint32_t nr, uint64_t *addr)
> +{
> +       return __atomic_load_n(addr, __ATOMIC_ACQUIRE) & (1UL << nr);
> +}
> +

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bitoperation APIs
  2019-10-23  3:07       ` Joyce Kong (Arm Technology China)
@ 2019-10-23  7:45         ` Morten Brørup
  2019-10-23 17:30           ` Honnappa Nagarahalli
  0 siblings, 1 reply; 70+ messages in thread
From: Morten Brørup @ 2019-10-23  7:45 UTC (permalink / raw)
  To: Joyce Kong (Arm Technology China), dev
  Cc: nd, thomas, jerinj, ravi1.kumar, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, rmody, shshaikh, Honnappa Nagarahalli,
	Gavin Hu (Arm Technology China),
	Stephen Hemminger

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Joyce Kong (Arm
> Technology China)
> Sent: Wednesday, October 23, 2019 5:08 AM
> 
> > > > -----Original Message-----
> > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Joyce Kong
> > > > Sent: Tuesday, October 15, 2019 9:50 AM
> > > >
> > > > There are a lot functions of bit operations scattered and
> duplicated
> > > > in PMDs, consolidating them into a common API family is
> necessary.
> > > > Furthermore, the bit operation is mostly applied to the IO
> devices,
> > > > so use __ATOMIC_ACQ_REL to ensure the ordering.
> > >
> > > Good initiative.
> > >
> > > >
> > > > Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> > > > ---
> > > >  lib/librte_eal/common/Makefile             |  1 +
> > > >  lib/librte_eal/common/include/rte_bitops.h | 56
> > > > ++++++++++++++++++++++++++++++
> > > >  lib/librte_eal/common/meson.build          |  1 +
> > > >  3 files changed, 58 insertions(+)
> > > >  create mode 100644 lib/librte_eal/common/include/rte_bitops.h
> > > >
> > > > diff --git a/lib/librte_eal/common/Makefile
> > > > b/lib/librte_eal/common/Makefile index a00d4fc..8586ca8 100644
> > > > --- a/lib/librte_eal/common/Makefile
> > > > +++ b/lib/librte_eal/common/Makefile
> > > > @@ -18,6 +18,7 @@ INC += rte_malloc.h rte_keepalive.h rte_time.h
> > > > INC
> > > > += rte_service.h rte_service_component.h  INC += rte_bitmap.h
> > > > rte_vfio.h rte_hypervisor.h rte_test.h  INC += rte_reciprocal.h
> > > > rte_fbarray.h rte_uuid.h
> > > > +INC += rte_bitops.h
> > > >
> > > >  GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h
> > > > rte_prefetch.h  GENERIC_INC += rte_memcpy.h rte_cpuflags.h diff
> > > > --git a/lib/librte_eal/common/include/rte_bitops.h
> > > > b/lib/librte_eal/common/include/rte_bitops.h
> > > > new file mode 100644
> > > > index 0000000..4d7c5a3
> > > > --- /dev/null
> > > > +++ b/lib/librte_eal/common/include/rte_bitops.h
> > > > @@ -0,0 +1,56 @@
> > > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > > + * Copyright(c) 2019 Arm Corporation  */
> > > > +
> > > > +#ifndef _RTE_BITOPS_H_
> > > > +#define _RTE_BITOPS_H_
> > > > +
> > > > +/**
> > > > + * @file
> > > > + * Bit Operations
> > > > + *
> > > > + * This file defines a generic API for bit operations.
> > > > + */
> > > > +
> > > > +#include <stdint.h>
> > > > +#include <rte_atomic.h>
> > > > +
> > > > +static inline void
> > > > +rte_set_bit(unsigned int nr, unsigned long *addr) {
> > > > +	__atomic_fetch_or(addr, (1UL << nr), __ATOMIC_ACQ_REL); }
> > > > +
> > > > +static inline void
> > > > +rte_clear_bit(int nr, unsigned long *addr) {
> > > > +	__atomic_fetch_and(addr, ~(1UL << nr), __ATOMIC_ACQ_REL); }
> > > > +
> > > > +static inline int
> > > > +rte_test_bit(int nr, unsigned long *addr) {
> > > > +	int res;
> > > > +	rte_mb();
> > > > +	res = ((*addr) & (1UL << nr)) != 0;
> > > > +	rte_mb();
> > > > +
> > > > +	return res;
> > > > +}
> > >
> > > Why does rte_test_bit() not use any of the __atomic_xx functions
> instead?
> > > E.g.:
> > >
> > > static inline int
> > > rte_test_bit(int nr, unsigned long *addr) {
> > > 	return __atomic_load_n(addr, __ATOMIC_ACQUIRE); }
> > >
> > You re right, it's better to use __atomic_xx here to keep the
> consistent with
> > other APIs.
> >
> > > > +
> > > > +static inline int
> > > > +rte_test_and_set_bit(int nr, unsigned long *addr) {
> > > > +	unsigned long mask = (1UL << nr);
> > > > +
> > > > +	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) &
> > > mask; }
> > > > +
> > > > +static inline int
> > > > +rte_test_and_clear_bit(int nr, unsigned long *addr) {
> > > > +	unsigned long mask = (1UL << nr);
> > > > +
> > > > +	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) &
> > > mask; }
> > > > +#endif /* _RTE_BITOPS_H_ */
> > > > diff --git a/lib/librte_eal/common/meson.build
> > > > b/lib/librte_eal/common/meson.build
> > > > index 386577c..a277cdf 100644
> > > > --- a/lib/librte_eal/common/meson.build
> > > > +++ b/lib/librte_eal/common/meson.build
> > > > @@ -52,6 +52,7 @@ common_headers = files(
> > > >  	'include/rte_alarm.h',
> > > >  	'include/rte_branch_prediction.h',
> > > >  	'include/rte_bus.h',
> > > > +	'include/rte_bitops.h',
> > > >  	'include/rte_bitmap.h',
> > > >  	'include/rte_class.h',
> > > >  	'include/rte_common.h',
> > > > --
> > > > 2.7.4
> > > >
> > >
> > > These functions use unsigned long as the type of their value, like
> > > they do in the PMDs.
> > >
> > > However, a generic bit operations library should preferably work
> with
> > > multiple types, like the __atomic_xx functions. Or use an well
> defined
> > > uint_NN_t type. Or have individually named functions for each type
> size,
> > e.g.
> > > rte_set_bit_32() and rte_set_bit_64().
> > >
> > Good suggestion! And will do this in next version.
> 
> The PMDs which use the common API now are all 32bit operation, so
> change
> the definition to uint_32_t type instead of individually naming
> functions for
> each type size.

Unless you are certain that all current and future I/O devices only need 32 bit, it should provide variants for different types, like the rte_atomic_xxx API.

There might also be a need to support both big and little endian byte ordering? Perhaps the CPU uses a different byte ordering than the I/O device being accessed through this API. I don't know; I'm only providing half baked feedback on this point.


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/6] lib/eal: implement the family of rte bit operation APIs
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 1/6] lib/eal: implement the family of rte bit operation APIs Joyce Kong
  2019-10-23  3:09   ` Honnappa Nagarahalli
  2019-10-23  4:56   ` Jerin Jacob
@ 2019-10-23  7:46   ` Morten Brørup
  2 siblings, 0 replies; 70+ messages in thread
From: Morten Brørup @ 2019-10-23  7:46 UTC (permalink / raw)
  To: Joyce Kong, dev
  Cc: nd, thomas, jerinj, stephen, honnappa.nagarahalli, gavin.hu,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang

> -----Original Message-----
> From: Joyce Kong [mailto:joyce.kong@arm.com]
> Sent: Wednesday, October 23, 2019 4:55 AM
> 
> There are a lot functions of bit operations scattered and
> duplicated in PMDs, consolidating them into a common API
> family is necessary. Furthermore, the bit operation is
> mostly applied to the IO devices, so use __ATOMIC_ACQ_REL
> to ensure the ordering.
> 
> Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> Reviewed-by: Gavin Hu <gavin.hu@arm.com>
> ---
>  lib/librte_eal/common/Makefile                |   1 +
>  lib/librte_eal/common/include/rte_io_bitops.h | 112
> ++++++++++++++++++++++++++
>  lib/librte_eal/common/meson.build             |   1 +
>  3 files changed, 114 insertions(+)
>  create mode 100644 lib/librte_eal/common/include/rte_io_bitops.h
> 
> diff --git a/lib/librte_eal/common/Makefile
> b/lib/librte_eal/common/Makefile
> index a00d4fc..3831313 100644
> --- a/lib/librte_eal/common/Makefile
> +++ b/lib/librte_eal/common/Makefile
> @@ -18,6 +18,7 @@ INC += rte_malloc.h rte_keepalive.h rte_time.h
>  INC += rte_service.h rte_service_component.h
>  INC += rte_bitmap.h rte_vfio.h rte_hypervisor.h rte_test.h
>  INC += rte_reciprocal.h rte_fbarray.h rte_uuid.h
> +INC += rte_io_bitops.h
> 
>  GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h
> rte_prefetch.h
>  GENERIC_INC += rte_memcpy.h rte_cpuflags.h
> diff --git a/lib/librte_eal/common/include/rte_io_bitops.h
> b/lib/librte_eal/common/include/rte_io_bitops.h
> new file mode 100644
> index 0000000..5f778b8
> --- /dev/null
> +++ b/lib/librte_eal/common/include/rte_io_bitops.h
> @@ -0,0 +1,112 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2019 Arm Limited
> + */
> +
> +#ifndef _RTE_IO_BITOPS_H_
> +#define _RTE_IO_BITOPS_H_
> +
> +/**
> + * @file
> + * Bit Operations
> + *
> + * This file defines a generic API for bit operations.
> + */

-> This file defines a generic API for I/O device bit operations.

> +
> +#include <rte_lcore.h>

This library doesn't do any lcore operations. Please use the appropriate headers.

> +
> +/**
> + * Get a bit.
> + *
> + * @param nr
> + *   The bit to get.
> + * @param addr
> + *   The address to count from.

The address to count from. -> The address holding the bit. (Applies to all functions.)

> + * @return
> + *   The value of the bit.

The description of the return value can be misinterpreted. The return value is not the value of the bit, which is 0 or 1, but the value of the word holding the bit, masked with the bit position. (Applies to all functions returning a value.)

> + */
> +static inline int32_t
> +rte_io_get_bit(uint32_t nr, uint64_t *addr)

The return type should be an unsigned type. (Applies to all functions returning a value.)

The addr type for 32 bit operations should not be uint64_t *, but uint32_t *. The __atomic_xxx functions actually use this type for something, and I think it would access the wrong 32 bits on a big endian CPU.

In some of the functions below, nr is signed (int32_t); it should be unsigned (uint32_t).

And a suggestion: Consider changing the type of nr from uint32_t to unsigned int.

> +{
> +	return __atomic_load_n(addr, __ATOMIC_ACQUIRE) & (1UL << nr);
> +}
> +
> +/**
> + * Set a bit to 1.
> + *
> + * @param nr
> + *   The bit to set.
> + * @param addr
> + *   The address to count from.
> + */
> +static inline void
> +rte_io_set_bit(uint32_t nr, uint64_t *addr)
> +{
> +	__atomic_fetch_or(addr, (1UL << nr), __ATOMIC_ACQ_REL);
> +}
> +
> +/**
> + * Set a bit to 0.
> + *
> + * @param nr
> + *   The bit to set.
> + * @param addr
> + *   The address to count from.
> + */
> +static inline void
> +rte_io_clear_bit(int32_t nr, uint64_t *addr)
> +{
> +	__atomic_fetch_and(addr, ~(1UL << nr), __ATOMIC_ACQ_REL);
> +}
> +
> +/**
> + * Test if a bit is 1.
> + *
> + * @param nr
> + *   The bit to test.
> + * @param addr
> + *   The address to count from.
> + * @return
> + *   1 if the bit is 1; else 0.
> + */
> +static inline int32_t
> +rte_io_test_bit(int32_t nr, uint64_t *addr)
> +{
> +	return (__atomic_load_n(addr, __ATOMIC_ACQUIRE) & (1UL << nr)) !=
> 0;
> +}

All the other functions in this library return a word with the bit masked. This function returns 0 or 1. I think it should return a word value, similar to the other functions.

> +
> +/**
> + * Set a bit to 1 and return its old value.
> + *
> + * @param nr
> + *   The bit to set.
> + * @param addr
> + *   The address to count from.
> + * @return
> + *   The old value of the bit.
> + */
> +static inline int32_t
> +rte_io_test_and_set_bit(int32_t nr, uint64_t *addr)
> +{
> +	unsigned long mask = (1UL << nr);

unsigned long mask -> uint32_t mask. (Also applies to other functions.)

> +
> +	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) & mask;
> +}
> +
> +/**
> + * Set a bit to 0 and return its old value.
> + *
> + * @param nr
> + *   The bit to set.
> + * @param addr
> + *   The address to count from.
> + * @return
> + *   The old value of the bit.
> + */
> +static inline int32_t
> +rte_io_test_and_clear_bit(int32_t nr, uint64_t *addr)
> +{
> +	unsigned long mask = (1UL << nr);
> +
> +	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) & mask;
> +}
> +#endif /* _RTE_IO_BITOPS_H_ */
> diff --git a/lib/librte_eal/common/meson.build
> b/lib/librte_eal/common/meson.build
> index 386577c..0a65d04 100644
> --- a/lib/librte_eal/common/meson.build
> +++ b/lib/librte_eal/common/meson.build
> @@ -52,6 +52,7 @@ common_headers = files(
>  	'include/rte_alarm.h',
>  	'include/rte_branch_prediction.h',
>  	'include/rte_bus.h',
> +	'include/rte_io_bitops.h',
>  	'include/rte_bitmap.h',
>  	'include/rte_class.h',
>  	'include/rte_common.h',
> --
> 2.7.4
> 


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bitoperation APIs
  2019-10-23  7:45         ` Morten Brørup
@ 2019-10-23 17:30           ` Honnappa Nagarahalli
  2019-10-24  3:38             ` Gavin Hu (Arm Technology China)
  0 siblings, 1 reply; 70+ messages in thread
From: Honnappa Nagarahalli @ 2019-10-23 17:30 UTC (permalink / raw)
  To: Morten Brørup, Joyce Kong (Arm Technology China), dev
  Cc: nd, thomas, jerinj, ravi1.kumar, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, rmody, shshaikh, Gavin Hu (Arm Technology China),
	Stephen Hemminger, Honnappa Nagarahalli, nd

> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Joyce Kong (Arm
> > Technology China)
> > Sent: Wednesday, October 23, 2019 5:08 AM
> >
> > > > > -----Original Message-----
> > > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Joyce Kong
> > > > > Sent: Tuesday, October 15, 2019 9:50 AM
> > > > >
> > > > > There are a lot functions of bit operations scattered and
> > duplicated
> > > > > in PMDs, consolidating them into a common API family is
> > necessary.
> > > > > Furthermore, the bit operation is mostly applied to the IO
> > devices,
> > > > > so use __ATOMIC_ACQ_REL to ensure the ordering.
> > > >
> > > > Good initiative.
> > > >
> > > > >
> > > > > Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> > > > > ---
> > > > >  lib/librte_eal/common/Makefile             |  1 +
> > > > >  lib/librte_eal/common/include/rte_bitops.h | 56
> > > > > ++++++++++++++++++++++++++++++
> > > > >  lib/librte_eal/common/meson.build          |  1 +
> > > > >  3 files changed, 58 insertions(+)  create mode 100644
> > > > > lib/librte_eal/common/include/rte_bitops.h
> > > > >
> > > > > diff --git a/lib/librte_eal/common/Makefile
> > > > > b/lib/librte_eal/common/Makefile index a00d4fc..8586ca8 100644
> > > > > --- a/lib/librte_eal/common/Makefile
> > > > > +++ b/lib/librte_eal/common/Makefile
> > > > > @@ -18,6 +18,7 @@ INC += rte_malloc.h rte_keepalive.h rte_time.h
> > > > > INC
> > > > > += rte_service.h rte_service_component.h  INC += rte_bitmap.h
> > > > > rte_vfio.h rte_hypervisor.h rte_test.h  INC += rte_reciprocal.h
> > > > > rte_fbarray.h rte_uuid.h
> > > > > +INC += rte_bitops.h
> > > > >
> > > > >  GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h
> > > > > rte_prefetch.h  GENERIC_INC += rte_memcpy.h rte_cpuflags.h diff
> > > > > --git a/lib/librte_eal/common/include/rte_bitops.h
> > > > > b/lib/librte_eal/common/include/rte_bitops.h
> > > > > new file mode 100644
> > > > > index 0000000..4d7c5a3
> > > > > --- /dev/null
> > > > > +++ b/lib/librte_eal/common/include/rte_bitops.h
> > > > > @@ -0,0 +1,56 @@
> > > > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > > > + * Copyright(c) 2019 Arm Corporation  */
> > > > > +
> > > > > +#ifndef _RTE_BITOPS_H_
> > > > > +#define _RTE_BITOPS_H_
> > > > > +
> > > > > +/**
> > > > > + * @file
> > > > > + * Bit Operations
> > > > > + *
> > > > > + * This file defines a generic API for bit operations.
> > > > > + */
> > > > > +
> > > > > +#include <stdint.h>
> > > > > +#include <rte_atomic.h>
> > > > > +
> > > > > +static inline void
> > > > > +rte_set_bit(unsigned int nr, unsigned long *addr) {
> > > > > +	__atomic_fetch_or(addr, (1UL << nr), __ATOMIC_ACQ_REL); }
> > > > > +
> > > > > +static inline void
> > > > > +rte_clear_bit(int nr, unsigned long *addr) {
> > > > > +	__atomic_fetch_and(addr, ~(1UL << nr),
> __ATOMIC_ACQ_REL); }
> > > > > +
> > > > > +static inline int
> > > > > +rte_test_bit(int nr, unsigned long *addr) {
> > > > > +	int res;
> > > > > +	rte_mb();
> > > > > +	res = ((*addr) & (1UL << nr)) != 0;
> > > > > +	rte_mb();
> > > > > +
> > > > > +	return res;
> > > > > +}
> > > >
> > > > Why does rte_test_bit() not use any of the __atomic_xx functions
> > instead?
> > > > E.g.:
> > > >
> > > > static inline int
> > > > rte_test_bit(int nr, unsigned long *addr) {
> > > > 	return __atomic_load_n(addr, __ATOMIC_ACQUIRE); }
> > > >
> > > You re right, it's better to use __atomic_xx here to keep the
> > consistent with
> > > other APIs.
> > >
> > > > > +
> > > > > +static inline int
> > > > > +rte_test_and_set_bit(int nr, unsigned long *addr) {
> > > > > +	unsigned long mask = (1UL << nr);
> > > > > +
> > > > > +	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) &
> > > > mask; }
> > > > > +
> > > > > +static inline int
> > > > > +rte_test_and_clear_bit(int nr, unsigned long *addr) {
> > > > > +	unsigned long mask = (1UL << nr);
> > > > > +
> > > > > +	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL)
> &
> > > > mask; }
> > > > > +#endif /* _RTE_BITOPS_H_ */
> > > > > diff --git a/lib/librte_eal/common/meson.build
> > > > > b/lib/librte_eal/common/meson.build
> > > > > index 386577c..a277cdf 100644
> > > > > --- a/lib/librte_eal/common/meson.build
> > > > > +++ b/lib/librte_eal/common/meson.build
> > > > > @@ -52,6 +52,7 @@ common_headers = files(
> > > > >  	'include/rte_alarm.h',
> > > > >  	'include/rte_branch_prediction.h',
> > > > >  	'include/rte_bus.h',
> > > > > +	'include/rte_bitops.h',
> > > > >  	'include/rte_bitmap.h',
> > > > >  	'include/rte_class.h',
> > > > >  	'include/rte_common.h',
> > > > > --
> > > > > 2.7.4
> > > > >
> > > >
> > > > These functions use unsigned long as the type of their value, like
> > > > they do in the PMDs.
> > > >
> > > > However, a generic bit operations library should preferably work
> > with
> > > > multiple types, like the __atomic_xx functions. Or use an well
> > defined
> > > > uint_NN_t type. Or have individually named functions for each type
> > size,
> > > e.g.
> > > > rte_set_bit_32() and rte_set_bit_64().
> > > >
> > > Good suggestion! And will do this in next version.
> >
> > The PMDs which use the common API now are all 32bit operation, so
> > change the definition to uint_32_t type instead of individually naming
> > functions for each type size.
> 
> Unless you are certain that all current and future I/O devices only need 32 bit,
> it should provide variants for different types, like the rte_atomic_xxx API.
Why not do these using macros? The __atomic_xxx APIs anyway work with multiple types. Then we do not have to provide variants for all sizes.

> 
> There might also be a need to support both big and little endian byte ordering?
> Perhaps the CPU uses a different byte ordering than the I/O device being
> accessed through this API. I don't know; I'm only providing half baked feedback
> on this point.


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bitoperation APIs
  2019-10-23 17:30           ` Honnappa Nagarahalli
@ 2019-10-24  3:38             ` Gavin Hu (Arm Technology China)
  2019-11-01 13:48               ` Honnappa Nagarahalli
  0 siblings, 1 reply; 70+ messages in thread
From: Gavin Hu (Arm Technology China) @ 2019-10-24  3:38 UTC (permalink / raw)
  To: Honnappa Nagarahalli, Morten Brørup,
	Joyce Kong (Arm Technology China),
	dev
  Cc: nd, thomas, jerinj, ravi1.kumar, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, rmody, shshaikh, Stephen Hemminger, nd, nd



> -----Original Message-----
> From: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>
> Sent: Thursday, October 24, 2019 1:30 AM
> To: Morten Brørup <mb@smartsharesystems.com>; Joyce Kong (Arm
> Technology China) <Joyce.Kong@arm.com>; dev@dpdk.org
> Cc: nd <nd@arm.com>; thomas@monjalon.net; jerinj@marvell.com;
> ravi1.kumar@amd.com; xuanziyang2@huawei.com;
> cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com;
> rmody@marvell.com; shshaikh@marvell.com; Gavin Hu (Arm Technology
> China) <Gavin.Hu@arm.com>; Stephen Hemminger
> <stephen@networkplumber.org>; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; nd <nd@arm.com>
> Subject: RE: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte
> bitoperation APIs
> 
> >
> > > -----Original Message-----
> > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Joyce Kong
> (Arm
> > > Technology China)
> > > Sent: Wednesday, October 23, 2019 5:08 AM
> > >
> > > > > > -----Original Message-----
> > > > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Joyce
> Kong
> > > > > > Sent: Tuesday, October 15, 2019 9:50 AM
> > > > > >
> > > > > > There are a lot functions of bit operations scattered and
> > > duplicated
> > > > > > in PMDs, consolidating them into a common API family is
> > > necessary.
> > > > > > Furthermore, the bit operation is mostly applied to the IO
> > > devices,
> > > > > > so use __ATOMIC_ACQ_REL to ensure the ordering.
> > > > >
> > > > > Good initiative.
> > > > >
> > > > > >
> > > > > > Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> > > > > > ---
> > > > > >  lib/librte_eal/common/Makefile             |  1 +
> > > > > >  lib/librte_eal/common/include/rte_bitops.h | 56
> > > > > > ++++++++++++++++++++++++++++++
> > > > > >  lib/librte_eal/common/meson.build          |  1 +
> > > > > >  3 files changed, 58 insertions(+)  create mode 100644
> > > > > > lib/librte_eal/common/include/rte_bitops.h
> > > > > >
> > > > > > diff --git a/lib/librte_eal/common/Makefile
> > > > > > b/lib/librte_eal/common/Makefile index a00d4fc..8586ca8 100644
> > > > > > --- a/lib/librte_eal/common/Makefile
> > > > > > +++ b/lib/librte_eal/common/Makefile
> > > > > > @@ -18,6 +18,7 @@ INC += rte_malloc.h rte_keepalive.h
> rte_time.h
> > > > > > INC
> > > > > > += rte_service.h rte_service_component.h  INC += rte_bitmap.h
> > > > > > rte_vfio.h rte_hypervisor.h rte_test.h  INC += rte_reciprocal.h
> > > > > > rte_fbarray.h rte_uuid.h
> > > > > > +INC += rte_bitops.h
> > > > > >
> > > > > >  GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h
> > > > > > rte_prefetch.h  GENERIC_INC += rte_memcpy.h rte_cpuflags.h diff
> > > > > > --git a/lib/librte_eal/common/include/rte_bitops.h
> > > > > > b/lib/librte_eal/common/include/rte_bitops.h
> > > > > > new file mode 100644
> > > > > > index 0000000..4d7c5a3
> > > > > > --- /dev/null
> > > > > > +++ b/lib/librte_eal/common/include/rte_bitops.h
> > > > > > @@ -0,0 +1,56 @@
> > > > > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > > > > + * Copyright(c) 2019 Arm Corporation  */
> > > > > > +
> > > > > > +#ifndef _RTE_BITOPS_H_
> > > > > > +#define _RTE_BITOPS_H_
> > > > > > +
> > > > > > +/**
> > > > > > + * @file
> > > > > > + * Bit Operations
> > > > > > + *
> > > > > > + * This file defines a generic API for bit operations.
> > > > > > + */
> > > > > > +
> > > > > > +#include <stdint.h>
> > > > > > +#include <rte_atomic.h>
> > > > > > +
> > > > > > +static inline void
> > > > > > +rte_set_bit(unsigned int nr, unsigned long *addr) {
> > > > > > +__atomic_fetch_or(addr, (1UL << nr), __ATOMIC_ACQ_REL); }
> > > > > > +
> > > > > > +static inline void
> > > > > > +rte_clear_bit(int nr, unsigned long *addr) {
> > > > > > +__atomic_fetch_and(addr, ~(1UL << nr),
> > __ATOMIC_ACQ_REL); }
> > > > > > +
> > > > > > +static inline int
> > > > > > +rte_test_bit(int nr, unsigned long *addr) {
> > > > > > +int res;
> > > > > > +rte_mb();
> > > > > > +res = ((*addr) & (1UL << nr)) != 0;
> > > > > > +rte_mb();
> > > > > > +
> > > > > > +return res;
> > > > > > +}
> > > > >
> > > > > Why does rte_test_bit() not use any of the __atomic_xx functions
> > > instead?
> > > > > E.g.:
> > > > >
> > > > > static inline int
> > > > > rte_test_bit(int nr, unsigned long *addr) {
> > > > > return __atomic_load_n(addr, __ATOMIC_ACQUIRE); }
> > > > >
> > > > You re right, it's better to use __atomic_xx here to keep the
> > > consistent with
> > > > other APIs.
> > > >
> > > > > > +
> > > > > > +static inline int
> > > > > > +rte_test_and_set_bit(int nr, unsigned long *addr) {
> > > > > > +unsigned long mask = (1UL << nr);
> > > > > > +
> > > > > > +return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) &
> > > > > mask; }
> > > > > > +
> > > > > > +static inline int
> > > > > > +rte_test_and_clear_bit(int nr, unsigned long *addr) {
> > > > > > +unsigned long mask = (1UL << nr);
> > > > > > +
> > > > > > +return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL)
> > &
> > > > > mask; }
> > > > > > +#endif /* _RTE_BITOPS_H_ */
> > > > > > diff --git a/lib/librte_eal/common/meson.build
> > > > > > b/lib/librte_eal/common/meson.build
> > > > > > index 386577c..a277cdf 100644
> > > > > > --- a/lib/librte_eal/common/meson.build
> > > > > > +++ b/lib/librte_eal/common/meson.build
> > > > > > @@ -52,6 +52,7 @@ common_headers = files(
> > > > > >  'include/rte_alarm.h',
> > > > > >  'include/rte_branch_prediction.h',
> > > > > >  'include/rte_bus.h',
> > > > > > +'include/rte_bitops.h',
> > > > > >  'include/rte_bitmap.h',
> > > > > >  'include/rte_class.h',
> > > > > >  'include/rte_common.h',
> > > > > > --
> > > > > > 2.7.4
> > > > > >
> > > > >
> > > > > These functions use unsigned long as the type of their value, like
> > > > > they do in the PMDs.
> > > > >
> > > > > However, a generic bit operations library should preferably work
> > > with
> > > > > multiple types, like the __atomic_xx functions. Or use an well
> > > defined
> > > > > uint_NN_t type. Or have individually named functions for each type
> > > size,
> > > > e.g.
> > > > > rte_set_bit_32() and rte_set_bit_64().
> > > > >
> > > > Good suggestion! And will do this in next version.
> > >
> > > The PMDs which use the common API now are all 32bit operation, so
> > > change the definition to uint_32_t type instead of individually naming
> > > functions for each type size.
> >
> > Unless you are certain that all current and future I/O devices only need 32
> bit,
> > it should provide variants for different types, like the rte_atomic_xxx API.
> Why not do these using macros? The __atomic_xxx APIs anyway work with
> multiple types. Then we do not have to provide variants for all sizes.

We really come to the point for the community to give a guideline: how to generalize APIs to support multiple-sized arguments. 
Looks like macros was disliked by the community, for readability and debuggability reasons.
Besides macros, there are an alternative: _Generic https://gcc.gnu.org/onlinedocs/gccint/GENERIC.html, but it is not supported by older gcc(<4.9), this made a hard requirement for gcc/clang.

We have to compromise over all these: code duplication, readability and debuggability.
/Gavin
> >
> > There might also be a need to support both big and little endian byte
> ordering?
> > Perhaps the CPU uses a different byte ordering than the I/O device being
> > accessed through this API. I don't know; I'm only providing half baked
> feedback
> > on this point.
> 


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [dpdk-dev] [PATCH v2 0/6] implement common rte bit operation APIs in PMDs
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 0/6] " Joyce Kong
@ 2019-10-25 13:14   ` David Marchand
  2019-10-29 16:42   ` Thomas Monjalon
  1 sibling, 0 replies; 70+ messages in thread
From: David Marchand @ 2019-10-25 13:14 UTC (permalink / raw)
  To: Joyce Kong
  Cc: dev, nd, Thomas Monjalon, Jerin Jacob Kollanukkaran,
	Stephen Hemminger, mb, Honnappa Nagarahalli, Gavin Hu,
	Ravi Kumar, Rasesh Mody, Shahed Shaikh, Ziyang Xuan,
	Xiaoyun Wang, Guoyang Zhou

On Wed, Oct 23, 2019 at 4:55 AM Joyce Kong <joyce.kong@arm.com> wrote:
>
> There are a lot functions of bit operations scattered in PMDs,
> consolidate them into a common API family and applied in different
> PMDs to reduce code duplication.
>
> v2:
>   1. Add doxygen comments for the rte bit operation API(suggested by Stephen Hemminger).
>   2. Add test cases for common rte bit operation API(suggested by Stephen Hemminger).
>   3. Change the header file to rte_io_bitops.h and the operation to rte_io_set_bit()etc.,
>      as the API uses barriers inside and the barriers are only needed for IO operations
>      (suggested by Jerin Jacob).
>   4. Use an well defined uint_NN_t type(suggested by Morten Brørup).

Thanks for working on this.
This series is a cleanup and worth looking at, yet it came rather late.

Discussion and enhancement can still continue, but it will be deferred to 20.02.


--
David Marchand


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [dpdk-dev] [PATCH v2 0/6] implement common rte bit operation APIs in PMDs
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 0/6] " Joyce Kong
  2019-10-25 13:14   ` David Marchand
@ 2019-10-29 16:42   ` Thomas Monjalon
  2019-10-30  9:55     ` Gavin Hu (Arm Technology China)
  1 sibling, 1 reply; 70+ messages in thread
From: Thomas Monjalon @ 2019-10-29 16:42 UTC (permalink / raw)
  To: Joyce Kong
  Cc: dev, nd, jerinj, stephen, mb, honnappa.nagarahalli, gavin.hu,
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, adrien.mazarguil

23/10/2019 04:54, Joyce Kong:
> There are a lot functions of bit operations scattered in PMDs,
> consolidate them into a common API family and applied in different
> PMDs to reduce code duplication.

Please, could you look at what Adrien did in the Mellanox PMD?

http://code.dpdk.org/dpdk/latest/source/drivers/net/mlx5/mlx5_utils.h#L28




^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [dpdk-dev] [PATCH v2 0/6] implement common rte bit operation APIs in PMDs
  2019-10-29 16:42   ` Thomas Monjalon
@ 2019-10-30  9:55     ` Gavin Hu (Arm Technology China)
  2019-10-30 10:17       ` Thomas Monjalon
  2019-10-30 12:32       ` Jerin Jacob
  0 siblings, 2 replies; 70+ messages in thread
From: Gavin Hu (Arm Technology China) @ 2019-10-30  9:55 UTC (permalink / raw)
  To: thomas, Joyce Kong (Arm Technology China)
  Cc: dev, nd, jerinj, stephen, mb, Honnappa Nagarahalli, ravi1.kumar,
	rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun, zhouguoyang,
	adrien.mazarguil, nd

Hi Thomas,

> -----Original Message-----
> From: Thomas Monjalon <thomas@monjalon.net>
> Sent: Wednesday, October 30, 2019 12:43 AM
> To: Joyce Kong (Arm Technology China) <Joyce.Kong@arm.com>
> Cc: dev@dpdk.org; nd <nd@arm.com>; jerinj@marvell.com;
> stephen@networkplumber.org; mb@smartsharesystems.com; Honnappa
> Nagarahalli <Honnappa.Nagarahalli@arm.com>; Gavin Hu (Arm Technology
> China) <Gavin.Hu@arm.com>; ravi1.kumar@amd.com; rmody@marvell.com;
> shshaikh@marvell.com; xuanziyang2@huawei.com;
> cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com;
> adrien.mazarguil@6wind.com
> Subject: Re: [dpdk-dev] [PATCH v2 0/6] implement common rte bit operation
> APIs in PMDs
> 
> 23/10/2019 04:54, Joyce Kong:
> > There are a lot functions of bit operations scattered in PMDs,
> > consolidate them into a common API family and applied in different
> > PMDs to reduce code duplication.
> 
> Please, could you look at what Adrien did in the Mellanox PMD?
> 
> http://code.dpdk.org/dpdk/latest/source/drivers/net/mlx5/mlx5_utils.h#L28
The code has less duplication, but it requires a less natural declaration of variables
http://code.dpdk.org/dpdk/latest/source/drivers/net/mlx5/mlx5.h#L607 
Should we take this way?
/Gavin
> 
> 


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [dpdk-dev] [PATCH v2 0/6] implement common rte bit operation APIs in PMDs
  2019-10-30  9:55     ` Gavin Hu (Arm Technology China)
@ 2019-10-30 10:17       ` Thomas Monjalon
  2019-10-30 12:32       ` Jerin Jacob
  1 sibling, 0 replies; 70+ messages in thread
From: Thomas Monjalon @ 2019-10-30 10:17 UTC (permalink / raw)
  To: Gavin Hu (Arm Technology China)
  Cc: Joyce Kong (Arm Technology China),
	dev, nd, jerinj, stephen, mb, Honnappa Nagarahalli, ravi1.kumar,
	rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun, zhouguoyang,
	adrien.mazarguil

30/10/2019 10:55, Gavin Hu (Arm Technology China):
> Hi Thomas,
> 
> From: Thomas Monjalon <thomas@monjalon.net>
> > 23/10/2019 04:54, Joyce Kong:
> > > There are a lot functions of bit operations scattered in PMDs,
> > > consolidate them into a common API family and applied in different
> > > PMDs to reduce code duplication.
> > 
> > Please, could you look at what Adrien did in the Mellanox PMD?
> > 
> > http://code.dpdk.org/dpdk/latest/source/drivers/net/mlx5/mlx5_utils.h#L28
> The code has less duplication, but it requires a less natural declaration of variables
> http://code.dpdk.org/dpdk/latest/source/drivers/net/mlx5/mlx5.h#L607 
> Should we take this way?

I don't know which way is best.
I suggested to read this code for 2 reasons:
1. we can be inspired
2. it may be replaced by the new common API as you did for other drivers



^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [dpdk-dev] [PATCH v2 0/6] implement common rte bit operation APIs in PMDs
  2019-10-30  9:55     ` Gavin Hu (Arm Technology China)
  2019-10-30 10:17       ` Thomas Monjalon
@ 2019-10-30 12:32       ` Jerin Jacob
  2019-10-30 13:02         ` Morten Brørup
  1 sibling, 1 reply; 70+ messages in thread
From: Jerin Jacob @ 2019-10-30 12:32 UTC (permalink / raw)
  To: Gavin Hu (Arm Technology China)
  Cc: thomas, Joyce Kong (Arm Technology China),
	dev, nd, jerinj, stephen, mb, Honnappa Nagarahalli, ravi1.kumar,
	rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun, zhouguoyang,
	adrien.mazarguil

On Wed, Oct 30, 2019 at 3:25 PM Gavin Hu (Arm Technology China)
<Gavin.Hu@arm.com> wrote:
>
> Hi Thomas,
>
> > -----Original Message-----
> > From: Thomas Monjalon <thomas@monjalon.net>
> > Sent: Wednesday, October 30, 2019 12:43 AM
> > To: Joyce Kong (Arm Technology China) <Joyce.Kong@arm.com>
> > Cc: dev@dpdk.org; nd <nd@arm.com>; jerinj@marvell.com;
> > stephen@networkplumber.org; mb@smartsharesystems.com; Honnappa
> > Nagarahalli <Honnappa.Nagarahalli@arm.com>; Gavin Hu (Arm Technology
> > China) <Gavin.Hu@arm.com>; ravi1.kumar@amd.com; rmody@marvell.com;
> > shshaikh@marvell.com; xuanziyang2@huawei.com;
> > cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com;
> > adrien.mazarguil@6wind.com
> > Subject: Re: [dpdk-dev] [PATCH v2 0/6] implement common rte bit operation
> > APIs in PMDs
> >
> > 23/10/2019 04:54, Joyce Kong:
> > > There are a lot functions of bit operations scattered in PMDs,
> > > consolidate them into a common API family and applied in different
> > > PMDs to reduce code duplication.
> >
> > Please, could you look at what Adrien did in the Mellanox PMD?
> >
> > http://code.dpdk.org/dpdk/latest/source/drivers/net/mlx5/mlx5_utils.h#L28
> The code has less duplication, but it requires a less natural declaration of variables
> http://code.dpdk.org/dpdk/latest/source/drivers/net/mlx5/mlx5.h#L607
> Should we take this way?


IMO, We need to consider the MACRO based scheme only as of the last resort.


> /Gavin
> >
> >
>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [dpdk-dev] [PATCH v2 0/6] implement common rte bit operation APIs in PMDs
  2019-10-30 12:32       ` Jerin Jacob
@ 2019-10-30 13:02         ` Morten Brørup
  2019-10-31 10:39           ` Gavin Hu (Arm Technology China)
  0 siblings, 1 reply; 70+ messages in thread
From: Morten Brørup @ 2019-10-30 13:02 UTC (permalink / raw)
  To: Jerin Jacob, Gavin Hu (Arm Technology China)
  Cc: thomas, Joyce Kong (Arm Technology China),
	dev, nd, jerinj, stephen, Honnappa Nagarahalli, ravi1.kumar,
	rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun, zhouguoyang,
	adrien.mazarguil

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Jerin Jacob
> Sent: Wednesday, October 30, 2019 1:33 PM
> 
> On Wed, Oct 30, 2019 at 3:25 PM Gavin Hu (Arm Technology China)
> <Gavin.Hu@arm.com> wrote:
> >
> > Hi Thomas,
> >
> > > -----Original Message-----
> > > From: Thomas Monjalon <thomas@monjalon.net>
> > > Sent: Wednesday, October 30, 2019 12:43 AM
> > >
> > > 23/10/2019 04:54, Joyce Kong:
> > > > There are a lot functions of bit operations scattered in PMDs,
> > > > consolidate them into a common API family and applied in different
> > > > PMDs to reduce code duplication.
> > >
> > > Please, could you look at what Adrien did in the Mellanox PMD?
> > >
> > >
> http://code.dpdk.org/dpdk/latest/source/drivers/net/mlx5/mlx5_utils.h#L28
> > The code has less duplication, but it requires a less natural declaration
> of variables
> > http://code.dpdk.org/dpdk/latest/source/drivers/net/mlx5/mlx5.h#L607
> > Should we take this way?
> 
> 
> IMO, We need to consider the MACRO based scheme only as of the last resort.
> 

I agree.

The EAL library already has an I/O device memory access API, i.e. with functionality closely related to the proposed I/O device bit operation API:
http://code.dpdk.org/dpdk/latest/source/lib/librte_eal/common/include/generic/rte_io.h

I would prefer a similar approach, and API familiarity would be my strongest argument.


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [dpdk-dev] [PATCH v2 0/6] implement common rte bit operation APIs in PMDs
  2019-10-30 13:02         ` Morten Brørup
@ 2019-10-31 10:39           ` Gavin Hu (Arm Technology China)
  0 siblings, 0 replies; 70+ messages in thread
From: Gavin Hu (Arm Technology China) @ 2019-10-31 10:39 UTC (permalink / raw)
  To: Morten Brørup, Jerin Jacob
  Cc: thomas, Joyce Kong (Arm Technology China),
	dev, nd, jerinj, stephen, Honnappa Nagarahalli, ravi1.kumar,
	rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun, zhouguoyang,
	adrien.mazarguil, nd

> -----Original Message-----
> From: Morten Brørup <mb@smartsharesystems.com>
> Sent: Wednesday, October 30, 2019 9:02 PM
> To: Jerin Jacob <jerinjacobk@gmail.com>; Gavin Hu (Arm Technology China)
> <Gavin.Hu@arm.com>
> Cc: thomas@monjalon.net; Joyce Kong (Arm Technology China)
> <Joyce.Kong@arm.com>; dev@dpdk.org; nd <nd@arm.com>;
> jerinj@marvell.com; stephen@networkplumber.org; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; ravi1.kumar@amd.com;
> rmody@marvell.com; shshaikh@marvell.com; xuanziyang2@huawei.com;
> cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com;
> adrien.mazarguil@6wind.com
> Subject: RE: [dpdk-dev] [PATCH v2 0/6] implement common rte bit operation
> APIs in PMDs
> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Jerin Jacob
> > Sent: Wednesday, October 30, 2019 1:33 PM
> >
> > On Wed, Oct 30, 2019 at 3:25 PM Gavin Hu (Arm Technology China)
> > <Gavin.Hu@arm.com> wrote:
> > >
> > > Hi Thomas,
> > >
> > > > -----Original Message-----
> > > > From: Thomas Monjalon <thomas@monjalon.net>
> > > > Sent: Wednesday, October 30, 2019 12:43 AM
> > > >
> > > > 23/10/2019 04:54, Joyce Kong:
> > > > > There are a lot functions of bit operations scattered in PMDs,
> > > > > consolidate them into a common API family and applied in different
> > > > > PMDs to reduce code duplication.
> > > >
> > > > Please, could you look at what Adrien did in the Mellanox PMD?
> > > >
> > > >
> > http://code.dpdk.org/dpdk/latest/source/drivers/net/mlx5/mlx5_utils.h#L28
> > > The code has less duplication, but it requires a less natural declaration
> > of variables
> > > http://code.dpdk.org/dpdk/latest/source/drivers/net/mlx5/mlx5.h#L607
> > > Should we take this way?
> >
> >
> > IMO, We need to consider the MACRO based scheme only as of the last resort.
> >
> 
> I agree.
> 
> The EAL library already has an I/O device memory access API, i.e. with
> functionality closely related to the proposed I/O device bit operation API:
> http://code.dpdk.org/dpdk/latest/source/lib/librte_eal/common/include/gene
> ric/rte_io.h
> 
> I would prefer a similar approach, and API familiarity would be my strongest
> argument.
Yes, this is a more natural way, and engineers are more familiar with the APIs.
We will take this way as more people vote for this.
Thanks Thomas also for your comment, we are inspired by this code, we will add assert() also to guarantee the 'bit' argument is in the valid range.
We used this common API for some PMDs, but not extensively, the reason is we want to finalize the API firstly(with your comments coming) and then propagate later.
/Gavin   



^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bitoperation APIs
  2019-10-24  3:38             ` Gavin Hu (Arm Technology China)
@ 2019-11-01 13:48               ` Honnappa Nagarahalli
  2019-11-03 15:45                 ` Gavin Hu (Arm Technology China)
  0 siblings, 1 reply; 70+ messages in thread
From: Honnappa Nagarahalli @ 2019-11-01 13:48 UTC (permalink / raw)
  To: Gavin Hu (Arm Technology China),
	Morten Brørup, Joyce Kong (Arm Technology China),
	dev
  Cc: nd, thomas, jerinj, ravi1.kumar, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, rmody, shshaikh, Stephen Hemminger,
	Honnappa Nagarahalli, nd

> >
> > >
> > > > -----Original Message-----
> > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Joyce Kong
> > (Arm
> > > > Technology China)
> > > > Sent: Wednesday, October 23, 2019 5:08 AM
> > > >
> > > > > > > -----Original Message-----
> > > > > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Joyce
> > Kong
> > > > > > > Sent: Tuesday, October 15, 2019 9:50 AM
> > > > > > >
> > > > > > > There are a lot functions of bit operations scattered and
> > > > duplicated
> > > > > > > in PMDs, consolidating them into a common API family is
> > > > necessary.
> > > > > > > Furthermore, the bit operation is mostly applied to the IO
> > > > devices,
> > > > > > > so use __ATOMIC_ACQ_REL to ensure the ordering.
> > > > > >
> > > > > > Good initiative.
> > > > > >
> > > > > > >
> > > > > > > Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> > > > > > > ---
> > > > > > >  lib/librte_eal/common/Makefile             |  1 +
> > > > > > >  lib/librte_eal/common/include/rte_bitops.h | 56
> > > > > > > ++++++++++++++++++++++++++++++
> > > > > > >  lib/librte_eal/common/meson.build          |  1 +
> > > > > > >  3 files changed, 58 insertions(+)  create mode 100644
> > > > > > > lib/librte_eal/common/include/rte_bitops.h
> > > > > > >
> > > > > > > diff --git a/lib/librte_eal/common/Makefile
> > > > > > > b/lib/librte_eal/common/Makefile index a00d4fc..8586ca8
> > > > > > > 100644
> > > > > > > --- a/lib/librte_eal/common/Makefile
> > > > > > > +++ b/lib/librte_eal/common/Makefile
> > > > > > > @@ -18,6 +18,7 @@ INC += rte_malloc.h rte_keepalive.h
> > rte_time.h
> > > > > > > INC
> > > > > > > += rte_service.h rte_service_component.h  INC +=
> > > > > > > +rte_bitmap.h
> > > > > > > rte_vfio.h rte_hypervisor.h rte_test.h  INC +=
> > > > > > > rte_reciprocal.h rte_fbarray.h rte_uuid.h
> > > > > > > +INC += rte_bitops.h
> > > > > > >
> > > > > > >  GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h
> > > > > > > rte_prefetch.h  GENERIC_INC += rte_memcpy.h rte_cpuflags.h
> > > > > > > diff --git a/lib/librte_eal/common/include/rte_bitops.h
> > > > > > > b/lib/librte_eal/common/include/rte_bitops.h
> > > > > > > new file mode 100644
> > > > > > > index 0000000..4d7c5a3
> > > > > > > --- /dev/null
> > > > > > > +++ b/lib/librte_eal/common/include/rte_bitops.h
> > > > > > > @@ -0,0 +1,56 @@
> > > > > > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > > > > > + * Copyright(c) 2019 Arm Corporation  */
> > > > > > > +
> > > > > > > +#ifndef _RTE_BITOPS_H_
> > > > > > > +#define _RTE_BITOPS_H_
> > > > > > > +
> > > > > > > +/**
> > > > > > > + * @file
> > > > > > > + * Bit Operations
> > > > > > > + *
> > > > > > > + * This file defines a generic API for bit operations.
> > > > > > > + */
> > > > > > > +
> > > > > > > +#include <stdint.h>
> > > > > > > +#include <rte_atomic.h>
> > > > > > > +
> > > > > > > +static inline void
> > > > > > > +rte_set_bit(unsigned int nr, unsigned long *addr) {
> > > > > > > +__atomic_fetch_or(addr, (1UL << nr), __ATOMIC_ACQ_REL); }
> > > > > > > +
> > > > > > > +static inline void
> > > > > > > +rte_clear_bit(int nr, unsigned long *addr) {
> > > > > > > +__atomic_fetch_and(addr, ~(1UL << nr),
> > > __ATOMIC_ACQ_REL); }
> > > > > > > +
> > > > > > > +static inline int
> > > > > > > +rte_test_bit(int nr, unsigned long *addr) { int res;
> > > > > > > +rte_mb(); res = ((*addr) & (1UL << nr)) != 0; rte_mb();
> > > > > > > +
> > > > > > > +return res;
> > > > > > > +}
> > > > > >
> > > > > > Why does rte_test_bit() not use any of the __atomic_xx
> > > > > > functions
> > > > instead?
> > > > > > E.g.:
> > > > > >
> > > > > > static inline int
> > > > > > rte_test_bit(int nr, unsigned long *addr) { return
> > > > > > __atomic_load_n(addr, __ATOMIC_ACQUIRE); }
> > > > > >
> > > > > You re right, it's better to use __atomic_xx here to keep the
> > > > consistent with
> > > > > other APIs.
> > > > >
> > > > > > > +
> > > > > > > +static inline int
> > > > > > > +rte_test_and_set_bit(int nr, unsigned long *addr) {
> > > > > > > +unsigned long mask = (1UL << nr);
> > > > > > > +
> > > > > > > +return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) &
> > > > > > mask; }
> > > > > > > +
> > > > > > > +static inline int
> > > > > > > +rte_test_and_clear_bit(int nr, unsigned long *addr) {
> > > > > > > +unsigned long mask = (1UL << nr);
> > > > > > > +
> > > > > > > +return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL)
> > > &
> > > > > > mask; }
> > > > > > > +#endif /* _RTE_BITOPS_H_ */
> > > > > > > diff --git a/lib/librte_eal/common/meson.build
> > > > > > > b/lib/librte_eal/common/meson.build
> > > > > > > index 386577c..a277cdf 100644
> > > > > > > --- a/lib/librte_eal/common/meson.build
> > > > > > > +++ b/lib/librte_eal/common/meson.build
> > > > > > > @@ -52,6 +52,7 @@ common_headers = files(
> > > > > > > 'include/rte_alarm.h',  'include/rte_branch_prediction.h',
> > > > > > >  'include/rte_bus.h',
> > > > > > > +'include/rte_bitops.h',
> > > > > > >  'include/rte_bitmap.h',
> > > > > > >  'include/rte_class.h',
> > > > > > >  'include/rte_common.h',
> > > > > > > --
> > > > > > > 2.7.4
> > > > > > >
> > > > > >
> > > > > > These functions use unsigned long as the type of their value,
> > > > > > like they do in the PMDs.
> > > > > >
> > > > > > However, a generic bit operations library should preferably
> > > > > > work
> > > > with
> > > > > > multiple types, like the __atomic_xx functions. Or use an well
> > > > defined
> > > > > > uint_NN_t type. Or have individually named functions for each
> > > > > > type
> > > > size,
> > > > > e.g.
> > > > > > rte_set_bit_32() and rte_set_bit_64().
> > > > > >
> > > > > Good suggestion! And will do this in next version.
> > > >
> > > > The PMDs which use the common API now are all 32bit operation, so
> > > > change the definition to uint_32_t type instead of individually
> > > > naming functions for each type size.
> > >
> > > Unless you are certain that all current and future I/O devices only
> > > need 32
> > bit,
> > > it should provide variants for different types, like the rte_atomic_xxx API.
> > Why not do these using macros? The __atomic_xxx APIs anyway work with
> > multiple types. Then we do not have to provide variants for all sizes.
> 
> We really come to the point for the community to give a guideline: how to
> generalize APIs to support multiple-sized arguments.
> Looks like macros was disliked by the community, for readability and
> debuggability reasons.
IMO, it should not be considered as a blanket ban on using macros. It should be considered case by case basis. For ex: I do not see a point in writing the same API for 32b/64b/128b especially when the APIs are one liners.

> Besides macros, there are an alternative: _Generic
> https://gcc.gnu.org/onlinedocs/gccint/GENERIC.html, but it is not supported
> by older gcc(<4.9), this made a hard requirement for gcc/clang.
> 
> We have to compromise over all these: code duplication, readability and
> debuggability.
> /Gavin
> > >
> > > There might also be a need to support both big and little endian
> > > byte
> > ordering?
> > > Perhaps the CPU uses a different byte ordering than the I/O device
> > > being accessed through this API. I don't know; I'm only providing
> > > half baked
> > feedback
> > > on this point.
> >
> 


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bitoperation APIs
  2019-11-01 13:48               ` Honnappa Nagarahalli
@ 2019-11-03 15:45                 ` Gavin Hu (Arm Technology China)
  0 siblings, 0 replies; 70+ messages in thread
From: Gavin Hu (Arm Technology China) @ 2019-11-03 15:45 UTC (permalink / raw)
  To: Honnappa Nagarahalli, Morten Brørup,
	Joyce Kong (Arm Technology China),
	dev
  Cc: nd, thomas, jerinj, ravi1.kumar, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang, rmody, shshaikh, Stephen Hemminger, nd, nd

Hi Honnappa,
> -----Original Message-----
> From: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>
> Sent: Friday, November 1, 2019 9:48 PM
> To: Gavin Hu (Arm Technology China) <Gavin.Hu@arm.com>; Morten
> Brørup <mb@smartsharesystems.com>; Joyce Kong (Arm Technology China)
> <Joyce.Kong@arm.com>; dev@dpdk.org
> Cc: nd <nd@arm.com>; thomas@monjalon.net; jerinj@marvell.com;
> ravi1.kumar@amd.com; xuanziyang2@huawei.com;
> cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com;
> rmody@marvell.com; shshaikh@marvell.com; Stephen Hemminger
> <stephen@networkplumber.org>; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; nd <nd@arm.com>
> Subject: RE: [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte
> bitoperation APIs
> 
> > >
> > > >
> > > > > -----Original Message-----
> > > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Joyce Kong
> > > (Arm
> > > > > Technology China)
> > > > > Sent: Wednesday, October 23, 2019 5:08 AM
> > > > >
> > > > > > > > -----Original Message-----
> > > > > > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Joyce
> > > Kong
> > > > > > > > Sent: Tuesday, October 15, 2019 9:50 AM
> > > > > > > >
> > > > > > > > There are a lot functions of bit operations scattered and
> > > > > duplicated
> > > > > > > > in PMDs, consolidating them into a common API family is
> > > > > necessary.
> > > > > > > > Furthermore, the bit operation is mostly applied to the IO
> > > > > devices,
> > > > > > > > so use __ATOMIC_ACQ_REL to ensure the ordering.
> > > > > > >
> > > > > > > Good initiative.
> > > > > > >
> > > > > > > >
> > > > > > > > Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> > > > > > > > ---
> > > > > > > >  lib/librte_eal/common/Makefile             |  1 +
> > > > > > > >  lib/librte_eal/common/include/rte_bitops.h | 56
> > > > > > > > ++++++++++++++++++++++++++++++
> > > > > > > >  lib/librte_eal/common/meson.build          |  1 +
> > > > > > > >  3 files changed, 58 insertions(+)  create mode 100644
> > > > > > > > lib/librte_eal/common/include/rte_bitops.h
> > > > > > > >
> > > > > > > > diff --git a/lib/librte_eal/common/Makefile
> > > > > > > > b/lib/librte_eal/common/Makefile index a00d4fc..8586ca8
> > > > > > > > 100644
> > > > > > > > --- a/lib/librte_eal/common/Makefile
> > > > > > > > +++ b/lib/librte_eal/common/Makefile
> > > > > > > > @@ -18,6 +18,7 @@ INC += rte_malloc.h rte_keepalive.h
> > > rte_time.h
> > > > > > > > INC
> > > > > > > > += rte_service.h rte_service_component.h  INC +=
> > > > > > > > +rte_bitmap.h
> > > > > > > > rte_vfio.h rte_hypervisor.h rte_test.h  INC +=
> > > > > > > > rte_reciprocal.h rte_fbarray.h rte_uuid.h
> > > > > > > > +INC += rte_bitops.h
> > > > > > > >
> > > > > > > >  GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h
> > > > > > > > rte_prefetch.h  GENERIC_INC += rte_memcpy.h rte_cpuflags.h
> > > > > > > > diff --git a/lib/librte_eal/common/include/rte_bitops.h
> > > > > > > > b/lib/librte_eal/common/include/rte_bitops.h
> > > > > > > > new file mode 100644
> > > > > > > > index 0000000..4d7c5a3
> > > > > > > > --- /dev/null
> > > > > > > > +++ b/lib/librte_eal/common/include/rte_bitops.h
> > > > > > > > @@ -0,0 +1,56 @@
> > > > > > > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > > > > > > + * Copyright(c) 2019 Arm Corporation  */
> > > > > > > > +
> > > > > > > > +#ifndef _RTE_BITOPS_H_
> > > > > > > > +#define _RTE_BITOPS_H_
> > > > > > > > +
> > > > > > > > +/**
> > > > > > > > + * @file
> > > > > > > > + * Bit Operations
> > > > > > > > + *
> > > > > > > > + * This file defines a generic API for bit operations.
> > > > > > > > + */
> > > > > > > > +
> > > > > > > > +#include <stdint.h>
> > > > > > > > +#include <rte_atomic.h>
> > > > > > > > +
> > > > > > > > +static inline void
> > > > > > > > +rte_set_bit(unsigned int nr, unsigned long *addr) {
> > > > > > > > +__atomic_fetch_or(addr, (1UL << nr), __ATOMIC_ACQ_REL); }
> > > > > > > > +
> > > > > > > > +static inline void
> > > > > > > > +rte_clear_bit(int nr, unsigned long *addr) {
> > > > > > > > +__atomic_fetch_and(addr, ~(1UL << nr),
> > > > __ATOMIC_ACQ_REL); }
> > > > > > > > +
> > > > > > > > +static inline int
> > > > > > > > +rte_test_bit(int nr, unsigned long *addr) { int res;
> > > > > > > > +rte_mb(); res = ((*addr) & (1UL << nr)) != 0; rte_mb();
> > > > > > > > +
> > > > > > > > +return res;
> > > > > > > > +}
> > > > > > >
> > > > > > > Why does rte_test_bit() not use any of the __atomic_xx
> > > > > > > functions
> > > > > instead?
> > > > > > > E.g.:
> > > > > > >
> > > > > > > static inline int
> > > > > > > rte_test_bit(int nr, unsigned long *addr) { return
> > > > > > > __atomic_load_n(addr, __ATOMIC_ACQUIRE); }
> > > > > > >
> > > > > > You re right, it's better to use __atomic_xx here to keep the
> > > > > consistent with
> > > > > > other APIs.
> > > > > >
> > > > > > > > +
> > > > > > > > +static inline int
> > > > > > > > +rte_test_and_set_bit(int nr, unsigned long *addr) {
> > > > > > > > +unsigned long mask = (1UL << nr);
> > > > > > > > +
> > > > > > > > +return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) &
> > > > > > > mask; }
> > > > > > > > +
> > > > > > > > +static inline int
> > > > > > > > +rte_test_and_clear_bit(int nr, unsigned long *addr) {
> > > > > > > > +unsigned long mask = (1UL << nr);
> > > > > > > > +
> > > > > > > > +return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL)
> > > > &
> > > > > > > mask; }
> > > > > > > > +#endif /* _RTE_BITOPS_H_ */
> > > > > > > > diff --git a/lib/librte_eal/common/meson.build
> > > > > > > > b/lib/librte_eal/common/meson.build
> > > > > > > > index 386577c..a277cdf 100644
> > > > > > > > --- a/lib/librte_eal/common/meson.build
> > > > > > > > +++ b/lib/librte_eal/common/meson.build
> > > > > > > > @@ -52,6 +52,7 @@ common_headers = files(
> > > > > > > > 'include/rte_alarm.h',  'include/rte_branch_prediction.h',
> > > > > > > >  'include/rte_bus.h',
> > > > > > > > +'include/rte_bitops.h',
> > > > > > > >  'include/rte_bitmap.h',
> > > > > > > >  'include/rte_class.h',
> > > > > > > >  'include/rte_common.h',
> > > > > > > > --
> > > > > > > > 2.7.4
> > > > > > > >
> > > > > > >
> > > > > > > These functions use unsigned long as the type of their value,
> > > > > > > like they do in the PMDs.
> > > > > > >
> > > > > > > However, a generic bit operations library should preferably
> > > > > > > work
> > > > > with
> > > > > > > multiple types, like the __atomic_xx functions. Or use an well
> > > > > defined
> > > > > > > uint_NN_t type. Or have individually named functions for each
> > > > > > > type
> > > > > size,
> > > > > > e.g.
> > > > > > > rte_set_bit_32() and rte_set_bit_64().
> > > > > > >
> > > > > > Good suggestion! And will do this in next version.
> > > > >
> > > > > The PMDs which use the common API now are all 32bit operation, so
> > > > > change the definition to uint_32_t type instead of individually
> > > > > naming functions for each type size.
> > > >
> > > > Unless you are certain that all current and future I/O devices only
> > > > need 32
> > > bit,
> > > > it should provide variants for different types, like the rte_atomic_xxx
> API.
> > > Why not do these using macros? The __atomic_xxx APIs anyway work
> with
> > > multiple types. Then we do not have to provide variants for all sizes.
> >
> > We really come to the point for the community to give a guideline: how to
> > generalize APIs to support multiple-sized arguments.
> > Looks like macros was disliked by the community, for readability and
> > debuggability reasons.
> IMO, it should not be considered as a blanket ban on using macros. It should
> be considered case by case basis. For ex: I do not see a point in writing the
> same API for 32b/64b/128b especially when the APIs are one liners.
Jerin and Morten have different opinions, they thought the MACRO based scheme only as of the last resort. 
Another argument is the API familiarity(similar to rte io read APIs).
Joyce made a new version and let's see how the community balance the duplication and other considerations. 
/Gavin
> 
> > Besides macros, there are an alternative: _Generic
> > https://gcc.gnu.org/onlinedocs/gccint/GENERIC.html, but it is not
> supported
> > by older gcc(<4.9), this made a hard requirement for gcc/clang.
> >
> > We have to compromise over all these: code duplication, readability and
> > debuggability.
> > /Gavin
> > > >
> > > > There might also be a need to support both big and little endian
> > > > byte
> > > ordering?
> > > > Perhaps the CPU uses a different byte ordering than the I/O device
> > > > being accessed through this API. I don't know; I'm only providing
> > > > half baked
> > > feedback
> > > > on this point.
> > >
> >
> 


^ permalink raw reply	[flat|nested] 70+ messages in thread

* [dpdk-dev] [PATCH v3 0/6] implement common rte bit operation APIs in PMDs
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (12 preceding siblings ...)
  2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 6/6] net/qede: " Joyce Kong
@ 2019-11-18 10:06 ` Joyce Kong
  2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 1/6] lib/eal: implement the family of rte bit operation APIs Joyce Kong
                   ` (19 subsequent siblings)
  33 siblings, 0 replies; 70+ messages in thread
From: Joyce Kong @ 2019-11-18 10:06 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

There are a lot functions of bit operations scattered in PMDs,
consolidate them into a common API family and applied in different
PMDs to reduce code duplication.

v3:
  1. Change the API's head file back to rte_bitops.h, then implement both 32-bit and
     64-bit operations with and without C11 atomic memory ordering.
  2. Add multi-core test case for bit operations which implemented with memory ordering.
  3. Modify the doc of both APIs and test cases.

v2:
  1. Add doxygen comments for the rte bit operation API(suggested by Stephen Hemminger).
  2. Add test cases for common rte bit operation API(suggested by Stephen Hemminger).
  3. Change the header file to rte_io_bitops.h and the operation to rte_io_set_bit()etc.,
     as the API uses barriers inside and the barriers are only needed for IO operations
     (suggested by Jerin Jacob).
  4. Use an well defined uint_NN_t type(suggested by Morten Brørup).

Joyce Kong (6):
  lib/eal: implement the family of rte bit operation APIs
  test/bitops: add bit operation test case
  net/axgbe: use common rte bit operation APIs instead
  net/bnx2x: use common rte bit operation APIs instead
  net/qede: use common rte bit operation APIs instead
  net/hinic: use common rte bit operation APIs instead

 app/test/Makefile                          |   1 +
 app/test/autotest_data.py                  |   6 +
 app/test/meson.build                       |   2 +
 app/test/test_bitops.c                     | 303 +++++++++++++
 doc/api/doxy-api-index.md                  |   3 +-
 drivers/net/axgbe/axgbe_common.h           |  29 +-
 drivers/net/axgbe/axgbe_ethdev.c           |  14 +-
 drivers/net/axgbe/axgbe_mdio.c             |  14 +-
 drivers/net/bnx2x/bnx2x.c                  | 209 ++++-----
 drivers/net/bnx2x/bnx2x.h                  |   4 -
 drivers/net/bnx2x/ecore_sp.h               |   9 +-
 drivers/net/hinic/Makefile                 |   1 +
 drivers/net/hinic/base/hinic_compat.h      |  33 +-
 drivers/net/hinic/hinic_pmd_ethdev.c       |  16 +-
 drivers/net/hinic/meson.build              |   2 +
 drivers/net/qede/base/bcm_osal.c           |  20 -
 drivers/net/qede/base/bcm_osal.h           |  10 +-
 lib/librte_eal/common/Makefile             |   1 +
 lib/librte_eal/common/include/rte_bitops.h | 474 +++++++++++++++++++++
 lib/librte_eal/common/meson.build          |   1 +
 20 files changed, 919 insertions(+), 233 deletions(-)
 create mode 100644 app/test/test_bitops.c
 create mode 100644 lib/librte_eal/common/include/rte_bitops.h

-- 
2.17.1


^ permalink raw reply	[flat|nested] 70+ messages in thread

* [dpdk-dev] [PATCH v3 1/6] lib/eal: implement the family of rte bit operation APIs
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (13 preceding siblings ...)
  2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 0/6] implement common rte bit operation APIs in PMDs Joyce Kong
@ 2019-11-18 10:06 ` Joyce Kong
  2019-11-18 10:52   ` [dpdk-dev] [PATCH v3 1/6] lib/eal: implement the family of rte bitoperation APIs Morten Brørup
  2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 2/6] test/bitops: add bit operation test case Joyce Kong
                   ` (18 subsequent siblings)
  33 siblings, 1 reply; 70+ messages in thread
From: Joyce Kong @ 2019-11-18 10:06 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

There are a lot functions of bit operations scattered and
duplicated in PMDs, consolidating them into a common API
family is necessary. Furthermore, when the bit operation
is applied to the IO devices, use __ATOMIC_ACQ_REL to
ensure the ordering for io bit operation.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
---
 doc/api/doxy-api-index.md                  |   3 +-
 lib/librte_eal/common/Makefile             |   1 +
 lib/librte_eal/common/include/rte_bitops.h | 474 +++++++++++++++++++++
 lib/librte_eal/common/meson.build          |   1 +
 4 files changed, 478 insertions(+), 1 deletion(-)
 create mode 100644 lib/librte_eal/common/include/rte_bitops.h

diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md
index dff496be0..1aed266d3 100644
--- a/doc/api/doxy-api-index.md
+++ b/doc/api/doxy-api-index.md
@@ -181,4 +181,5 @@ The public API headers are grouped by topics:
   [common]             (@ref rte_common.h),
   [experimental APIs]  (@ref rte_compat.h),
   [ABI versioning]     (@ref rte_function_versioning.h),
-  [version]            (@ref rte_version.h)
+  [version]            (@ref rte_version.h),
+  [bitops]             (@ref rte_bitops.h)
diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index c2c6d92cd..dd025c130 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -19,6 +19,7 @@ INC += rte_malloc.h rte_keepalive.h rte_time.h
 INC += rte_service.h rte_service_component.h
 INC += rte_bitmap.h rte_vfio.h rte_hypervisor.h rte_test.h
 INC += rte_reciprocal.h rte_fbarray.h rte_uuid.h
+INC += rte_bitops.h
 
 GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h
 GENERIC_INC += rte_memcpy.h rte_cpuflags.h
diff --git a/lib/librte_eal/common/include/rte_bitops.h b/lib/librte_eal/common/include/rte_bitops.h
new file mode 100644
index 000000000..16c0a23f7
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_bitops.h
@@ -0,0 +1,474 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#ifndef _RTE_BITOPS_H_
+#define _RTE_BITOPS_H_
+
+/**
+ * @file
+ * Bit Operations
+ *
+ * This file defines a API for bit operations without/with memory ordering.
+ */
+
+#include <stdint.h>
+#include <assert.h>
+#include <rte_compat.h>
+
+/*---------------------------- 32 bit operations ----------------------------*/
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Get the target bit from a 32-bit value without memory ordering.
+ *
+ * @param nr
+ *   The target bit to get.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The target bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_get_bit32_relaxed(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 32);
+
+	uint32_t mask = 1UL << nr;
+	return __atomic_load_n(addr, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Set the target bit in a 32-bit value to 1 without memory ordering.
+ *
+ * @param nr
+ *   The target bit to set.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_set_bit32_relaxed(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 32);
+
+	uint32_t mask = 1UL << nr;
+	__atomic_fetch_or(addr, mask, __ATOMIC_RELAXED);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Clear the target bit in a 32-bit value to 0 without memory ordering.
+ *
+ * @param nr
+ *   The target bit to clear.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_clear_bit32_relaxed(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 32);
+
+	uint32_t mask = 1UL << nr;
+	__atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 32-bit value, then set it to 1 without
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and set.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_test_and_set_bit32_relaxed(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 32);
+
+	uint32_t mask = 1UL << nr;
+	return __atomic_fetch_or(addr, mask, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 32-bit value, then clear it to 0 without
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and clear.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_test_and_clear_bit32_relaxed(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 32);
+
+	uint32_t mask = 1UL << nr;
+	return __atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Get the target bit from a 32-bit value with memory ordering.
+ *
+ * @param nr
+ *   The target bit to get.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The target bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_get_bit32(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 32);
+
+	uint32_t mask = 1UL << nr;
+	return __atomic_load_n(addr, __ATOMIC_ACQUIRE) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Set the target bit in a 32-bit value to 1 with memory ordering.
+ *
+ * @param nr
+ *   The target bit to set.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_set_bit32(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 32);
+
+	uint32_t mask = 1UL << nr;
+	__atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Clear the target bit in a 32-bit value to 0 with memory ordering.
+ *
+ * @param nr
+ *   The target bit to clear.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_clear_bit32(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 32);
+
+	uint32_t mask = 1UL << nr;
+	__atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 32-bit value, then set it to 1 with
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and set.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_test_and_set_bit32(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 32);
+
+	uint32_t mask = 1UL << nr;
+	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 32-bit value, then clear it to 0 with
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and clear.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_test_and_clear_bit32(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 32);
+
+	uint32_t mask = 1UL << nr;
+	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) & mask;
+}
+
+/*---------------------------- 64 bit operations ----------------------------*/
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Get the target bit from a 64-bit value without memory ordering.
+ *
+ * @param nr
+ *   The target bit to get.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The target bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_get_bit64_relaxed(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 64);
+
+	uint64_t mask = 1UL << nr;
+	return __atomic_load_n(addr, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Set the target bit in a 64-bit value to 1 without memory ordering.
+ *
+ * @param nr
+ *   The target bit to set.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_set_bit64_relaxed(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 64);
+
+	uint64_t mask = 1UL << nr;
+	__atomic_fetch_or(addr, mask, __ATOMIC_RELAXED);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Clear the target bit in a 64-bit value to 0 without memory ordering.
+ *
+ * @param nr
+ *   The target bit to clear.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_clear_bit64_relaxed(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 64);
+
+	uint64_t mask = 1UL << nr;
+	__atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 64-bit value, then set it to 1 without
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and set.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_test_and_set_bit64_relaxed(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 64);
+
+	uint64_t mask = 1UL << nr;
+	return __atomic_fetch_or(addr, mask, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 64-bit value, then clear it to 0 without
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and clear.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_test_and_clear_bit64_relaxed(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 64);
+
+	uint64_t mask = 1UL << nr;
+	return __atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Get the target bit from a 64-bit value with memory ordering.
+ *
+ * @param nr
+ *   The target bit to get.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The target bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_get_bit64(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 64);
+
+	uint64_t mask = 1UL << nr;
+	return __atomic_load_n(addr, __ATOMIC_ACQUIRE) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Set the target bit in a 64-bit value to 1 with memory ordering.
+ *
+ * @param nr
+ *   The target bit to set.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_set_bit64(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 64);
+
+	uint64_t mask = 1UL << nr;
+	__atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Clear the target bit in a 64-bit value to 0 with memory ordering.
+ *
+ * @param nr
+ *   The target bit to clear.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_clear_bit64(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 64);
+
+	uint64_t mask = 1UL << nr;
+	__atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 64-bit value, then set it to 1 with
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and set.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_test_and_set_bit64(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 64);
+
+	uint64_t mask = 1UL << nr;
+	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 64-bit value, then clear it to 0 with
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and clear.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_test_and_clear_bit64(unsigned int nr, unsigned long *addr)
+{
+	assert(nr < 64);
+
+	uint64_t mask = 1UL << nr;
+	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) & mask;
+}
+#endif /* _RTE_BITOPS_H_ */
diff --git a/lib/librte_eal/common/meson.build b/lib/librte_eal/common/meson.build
index d6a149bec..e2f9c163c 100644
--- a/lib/librte_eal/common/meson.build
+++ b/lib/librte_eal/common/meson.build
@@ -52,6 +52,7 @@ common_headers = files(
 	'include/rte_alarm.h',
 	'include/rte_branch_prediction.h',
 	'include/rte_bus.h',
+	'include/rte_bitops.h',
 	'include/rte_bitmap.h',
 	'include/rte_class.h',
 	'include/rte_common.h',
-- 
2.17.1


^ permalink raw reply	[flat|nested] 70+ messages in thread

* [dpdk-dev] [PATCH v3 2/6] test/bitops: add bit operation test case
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (14 preceding siblings ...)
  2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 1/6] lib/eal: implement the family of rte bit operation APIs Joyce Kong
@ 2019-11-18 10:06 ` Joyce Kong
  2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 3/6] net/axgbe: use common rte bit operation APIs instead Joyce Kong
                   ` (17 subsequent siblings)
  33 siblings, 0 replies; 70+ messages in thread
From: Joyce Kong @ 2019-11-18 10:06 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

Add test cases for set bit, clear bit, test and set bit,
test and clear bit operations.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
---
 app/test/Makefile         |   1 +
 app/test/autotest_data.py |   6 +
 app/test/meson.build      |   2 +
 app/test/test_bitops.c    | 303 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 312 insertions(+)
 create mode 100644 app/test/test_bitops.c

diff --git a/app/test/Makefile b/app/test/Makefile
index 57930c00b..4f3327492 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -70,6 +70,7 @@ SRCS-y += test_ticketlock.c
 SRCS-y += test_memory.c
 SRCS-y += test_memzone.c
 SRCS-y += test_bitmap.c
+SRCS-y += test_bitops.c
 SRCS-y += test_reciprocal_division.c
 SRCS-y += test_reciprocal_division_perf.c
 SRCS-y += test_fbarray.c
diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py
index 6deb97bcc..e06344087 100644
--- a/app/test/autotest_data.py
+++ b/app/test/autotest_data.py
@@ -404,6 +404,12 @@
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Bitops autotest",
+        "Command": "bitops_autotest",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
         "Name":    "Hash multiwriter autotest",
         "Command": "hash_multiwriter_autotest",
diff --git a/app/test/meson.build b/app/test/meson.build
index ff59c3131..33b41353c 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -14,6 +14,7 @@ test_sources = files('commands.c',
 	'test_atomic.c',
 	'test_barrier.c',
 	'test_bitratestats.c',
+	'test_bitops.c',
 	'test_bpf.c',
 	'test_byteorder.c',
 	'test_cmdline.c',
@@ -167,6 +168,7 @@ fast_test_names = [
         'alarm_autotest',
         'atomic_autotest',
         'byteorder_autotest',
+        'bitops_autotest',
         'cmdline_autotest',
         'common_autotest',
         'cpuflags_autotest',
diff --git a/app/test/test_bitops.c b/app/test/test_bitops.c
new file mode 100644
index 000000000..9d0ac0299
--- /dev/null
+++ b/app/test/test_bitops.c
@@ -0,0 +1,303 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#include <rte_bitops.h>
+#include <rte_launch.h>
+#include "test.h"
+
+unsigned long val32 = 1UL << 10;
+unsigned long val64 = 1UL << 33;
+unsigned int synchro;
+unsigned int count32;
+unsigned int count64;
+
+#define MAX_BITS_32 32
+#define MAX_BITS_64 64
+/*
+ * Bitops functions
+ * ================
+ *
+ * - The main test function performs several subtests.
+ * - For relaxed version, check bit operations on one core.
+ *   - Initialize valXX to specified values, then set each bit of valXX
+ *     to 1 one by one in "test_bitops_set_relaxed".
+ *
+ *   - Clear each bit of valXX to 0 one by one in "test_bitops_clear_relaxed".
+ *
+ *   - Function "test_bitops_test_set_clear_relaxed" checks whether each bit
+ *     of valXX can do "test and set" and "test and clear" correctly.
+ *
+ * - For C11 atomic barrier version, check bit operations on multi cores.
+ *   - Per bit of valXX is set to 1, then cleared to 0 on each core in
+ *     "test_bitops_set_clear". The function checks that once all lcores finish
+ *     their set_clear, the value of valXX would still be zero.
+ *
+ *   - The cores are waiting for a synchro which is triggered by the main test
+ *     function. Then all cores would do "rte_test_and_set_bitXX" or
+ *     "rte_test_and_clear_bitXX" at the same time, "countXX" which is checked
+ *     as the result later would inc by one or not according to the original
+ *     bit value.
+ *
+ */
+
+static int
+test_bitops_set_relaxed(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_set_bit32_relaxed(i, &val32);
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (!rte_get_bit32_relaxed(i, &val32)) {
+			printf("Failed to set bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_set_bit64_relaxed(i, &val64);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (!rte_get_bit64_relaxed(i, &val64)) {
+			printf("Failed to set bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_bitops_clear_relaxed(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_clear_bit32_relaxed(i, &val32);
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (rte_get_bit32_relaxed(i, &val32)) {
+			printf("Failed to clear bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_clear_bit64_relaxed(i, &val64);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (rte_get_bit64_relaxed(i, &val64)) {
+			printf("Failed to clear bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_bitops_test_set_clear_relaxed(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_test_and_set_bit32_relaxed(i, &val32);
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (!rte_test_and_clear_bit32_relaxed(i, &val32)) {
+			printf("Failed to set and test bit in relaxed version.\n");
+			return TEST_FAILED;
+	}
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (rte_get_bit32_relaxed(i, &val32)) {
+			printf("Failed to test and clear bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_test_and_set_bit64_relaxed(i, &val64);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (!rte_test_and_clear_bit64_relaxed(i, &val64)) {
+			printf("Failed to set and test bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (rte_get_bit64_relaxed(i, &val64)) {
+			printf("Failed to test and clear bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_bitops_set_clear(__attribute__((unused)) void *arg)
+{
+	while (__atomic_load_n(&synchro, __ATOMIC_RELAXED) == 0)
+		;
+
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_set_bit32(i, &val32);
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_clear_bit32(i, &val32);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_set_bit64(i, &val64);
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_clear_bit64(i, &val64);
+
+	return TEST_SUCCESS;
+}
+
+/*
+ * rte_test_and_set_bitXX() returns the original bit value, then set it to 1.
+ * This functions checks that if the target bit is equal to 0, set it to 1 and
+ * increase the variable of "countXX" by one. If it is equal to 1, do nothing
+ * for "countXX". The value of "countXX" would be checked as the result later.
+ */
+static int
+test_bitops_test_set(__attribute__((unused)) void *arg)
+
+{
+	while (__atomic_load_n(&synchro, __ATOMIC_RELAXED) == 0)
+		;
+
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (!rte_test_and_set_bit32(i, &val32))
+			__atomic_fetch_add(&count32, 1, __ATOMIC_ACQ_REL);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (!rte_test_and_set_bit64(i, &val64))
+			__atomic_fetch_add(&count64, 1, __ATOMIC_ACQ_REL);
+
+	return TEST_SUCCESS;
+}
+
+/*
+ * rte_test_and_set_bitXX() returns the original bit value, then clear it to 0.
+ * This functions checks that if the target bit is equal to 1, clear it to 0 and
+ * increase the variable of "countXX" by one. If it is equal to 0, do nothing
+ * for "countXX". The value of "countXX" would be checked as the result later.
+ */
+static int
+test_bitops_test_clear(__attribute__((unused)) void *arg)
+
+{
+	while (__atomic_load_n(&synchro, __ATOMIC_RELAXED) == 0)
+		;
+
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (rte_test_and_clear_bit32(i, &val32))
+			__atomic_fetch_add(&count32, 1, __ATOMIC_ACQ_REL);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (rte_test_and_clear_bit64(i, &val64))
+			__atomic_fetch_add(&count64, 1, __ATOMIC_ACQ_REL);
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_bitops(void)
+{
+	 __atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+	 __atomic_store_n(&count32, 0, __ATOMIC_RELAXED);
+	 __atomic_store_n(&count64, 0, __ATOMIC_RELAXED);
+
+	if (test_bitops_set_relaxed() < 0)
+		return TEST_FAILED;
+
+	if (test_bitops_clear_relaxed() < 0)
+		return TEST_FAILED;
+
+	if (test_bitops_test_set_clear_relaxed() < 0)
+		return TEST_FAILED;
+
+
+	rte_eal_mp_remote_launch(test_bitops_set_clear, NULL, SKIP_MASTER);
+	__atomic_store_n(&synchro, 1,  __ATOMIC_RELAXED);
+	rte_eal_mp_wait_lcore();
+	__atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (rte_get_bit32(i, &val32)) {
+			printf("Failed to set and clear bit on multi cores.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (rte_get_bit64(i, &val64)) {
+			printf("Failed to set and clear bit on multi cores.\n");
+			return TEST_FAILED;
+		}
+
+	/*
+	 * Launch all slave lcores to do "rte_bitops_test_and_set_bitXX"
+	 * respectively.
+	 * Each lcore should have MAX_BITS_XX chances to check the target bit.
+	 * If it's equal to 0, set it to 1 and "countXX (which is initialized
+	 * to 0)" would be increased by one. If the target bit is 1, still set
+	 * it to 1 and do nothing for "countXX". There would be only one lcore
+	 * that finds the target bit is 0.
+	 * If the final value of "countXX" is equal to MAX_BITS_XX, all slave
+	 * lcores performed "rte_bitops_test_and_set_bitXX" correctly.
+	 */
+	__atomic_store_n(&count32, 0, __ATOMIC_RELAXED);
+	__atomic_store_n(&count64, 0, __ATOMIC_RELAXED);
+
+	rte_eal_mp_remote_launch(test_bitops_test_set, NULL, SKIP_MASTER);
+	__atomic_store_n(&synchro, 1,  __ATOMIC_RELAXED);
+	rte_eal_mp_wait_lcore();
+	__atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+
+	if (__atomic_load_n(&count32, __ATOMIC_RELAXED) != MAX_BITS_32) {
+		printf("Failed to test and set on multi cores.\n");
+		return TEST_FAILED;
+	}
+	if (__atomic_load_n(&count64, __ATOMIC_RELAXED) != MAX_BITS_64) {
+		printf("Failed to test and set on multi cores.\n");
+		return TEST_FAILED;
+	}
+
+	/*
+	 * Launch all slave lcores to do "rte_bitops_test_and_clear_bitXX"
+	 * respectively.
+	 * Each lcore should have MAX_BITS_XX chances to check the target bit.
+	 * If it's equal to 1, clear it to 0 and "countXX (which is initialized
+	 * to 0)" would be increased by one. If the target bit is 0, still clear
+	 * it to 0 and do nothing for "countXX". There would be only one lcore
+	 * that finds the target bit is 1.
+	 * If the final value of "countXX" is equal to MAX_BITS_XX, all slave
+	 * lcores performed "rte_bitops_test_and_clear_bitXX" correctly.
+	 */
+
+	__atomic_store_n(&count32, 0, __ATOMIC_RELAXED);
+	__atomic_store_n(&count64, 0, __ATOMIC_RELAXED);
+
+	rte_eal_mp_remote_launch(test_bitops_test_clear, NULL, SKIP_MASTER);
+	__atomic_store_n(&synchro, 1,  __ATOMIC_RELAXED);
+	rte_eal_mp_wait_lcore();
+	__atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+
+	if (__atomic_load_n(&count32, __ATOMIC_RELAXED) != MAX_BITS_32) {
+		printf("Failed to test and clear on multi cores.\n");
+		return TEST_FAILED;
+	}
+	if (__atomic_load_n(&count64, __ATOMIC_RELAXED) != MAX_BITS_64) {
+		printf("Failed to test and clear on multi cores.\n");
+		return TEST_FAILED;
+	}
+
+	return TEST_SUCCESS;
+}
+
+REGISTER_TEST_COMMAND(bitops_autotest, test_bitops);
-- 
2.17.1


^ permalink raw reply	[flat|nested] 70+ messages in thread

* [dpdk-dev] [PATCH v3 3/6] net/axgbe: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (15 preceding siblings ...)
  2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 2/6] test/bitops: add bit operation test case Joyce Kong
@ 2019-11-18 10:06 ` Joyce Kong
  2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 4/6] net/bnx2x: " Joyce Kong
                   ` (16 subsequent siblings)
  33 siblings, 0 replies; 70+ messages in thread
From: Joyce Kong @ 2019-11-18 10:06 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/axgbe/axgbe_common.h | 29 +----------------------------
 drivers/net/axgbe/axgbe_ethdev.c | 14 +++++++-------
 drivers/net/axgbe/axgbe_mdio.c   | 14 +++++++-------
 3 files changed, 15 insertions(+), 42 deletions(-)

diff --git a/drivers/net/axgbe/axgbe_common.h b/drivers/net/axgbe/axgbe_common.h
index 34f60f156..9cabda875 100644
--- a/drivers/net/axgbe/axgbe_common.h
+++ b/drivers/net/axgbe/axgbe_common.h
@@ -22,6 +22,7 @@
 #include <pthread.h>
 
 #include <rte_byteorder.h>
+#include <rte_bitops.h>
 #include <rte_memory.h>
 #include <rte_malloc.h>
 #include <rte_hexdump.h>
@@ -1674,34 +1675,6 @@ do {									\
 #define time_after_eq(a, b)     ((long)((a) - (b)) >= 0)
 #define time_before_eq(a, b)	time_after_eq(b, a)
 
-/*---bitmap support apis---*/
-static inline int axgbe_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	rte_mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	rte_mb();
-	return res;
-}
-
-static inline void axgbe_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-static inline void axgbe_clear_bit(int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-static inline int axgbe_test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-
-	return __sync_fetch_and_and(addr, ~mask) & mask;
-}
-
 static inline unsigned long msecs_to_timer_cycles(unsigned int m)
 {
 	return rte_get_timer_hz() * (m / 1000);
diff --git a/drivers/net/axgbe/axgbe_ethdev.c b/drivers/net/axgbe/axgbe_ethdev.c
index d1f160e79..c3744bbf9 100644
--- a/drivers/net/axgbe/axgbe_ethdev.c
+++ b/drivers/net/axgbe/axgbe_ethdev.c
@@ -201,8 +201,8 @@ axgbe_dev_start(struct rte_eth_dev *dev)
 	axgbe_dev_enable_tx(dev);
 	axgbe_dev_enable_rx(dev);
 
-	axgbe_clear_bit(AXGBE_STOPPED, &pdata->dev_state);
-	axgbe_clear_bit(AXGBE_DOWN, &pdata->dev_state);
+	rte_clear_bit32(AXGBE_STOPPED, &pdata->dev_state);
+	rte_clear_bit32(AXGBE_DOWN, &pdata->dev_state);
 	return 0;
 }
 
@@ -216,17 +216,17 @@ axgbe_dev_stop(struct rte_eth_dev *dev)
 
 	rte_intr_disable(&pdata->pci_dev->intr_handle);
 
-	if (axgbe_test_bit(AXGBE_STOPPED, &pdata->dev_state))
+	if (rte_get_bit32(AXGBE_STOPPED, &pdata->dev_state))
 		return;
 
-	axgbe_set_bit(AXGBE_STOPPED, &pdata->dev_state);
+	rte_set_bit32(AXGBE_STOPPED, &pdata->dev_state);
 	axgbe_dev_disable_tx(dev);
 	axgbe_dev_disable_rx(dev);
 
 	pdata->phy_if.phy_stop(pdata);
 	pdata->hw_if.exit(pdata);
 	memset(&dev->data->dev_link, 0, sizeof(struct rte_eth_link));
-	axgbe_set_bit(AXGBE_DOWN, &pdata->dev_state);
+	rte_set_bit32(AXGBE_DOWN, &pdata->dev_state);
 }
 
 /* Clear all resources like TX/RX queues. */
@@ -598,8 +598,8 @@ eth_axgbe_dev_init(struct rte_eth_dev *eth_dev)
 
 	pdata = eth_dev->data->dev_private;
 	/* initial state */
-	axgbe_set_bit(AXGBE_DOWN, &pdata->dev_state);
-	axgbe_set_bit(AXGBE_STOPPED, &pdata->dev_state);
+	rte_set_bit32(AXGBE_DOWN, &pdata->dev_state);
+	rte_set_bit32(AXGBE_STOPPED, &pdata->dev_state);
 	pdata->eth_dev = eth_dev;
 
 	pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
diff --git a/drivers/net/axgbe/axgbe_mdio.c b/drivers/net/axgbe/axgbe_mdio.c
index 2721e5cc9..af7a1ecb7 100644
--- a/drivers/net/axgbe/axgbe_mdio.c
+++ b/drivers/net/axgbe/axgbe_mdio.c
@@ -743,7 +743,7 @@ static int __axgbe_phy_config_aneg(struct axgbe_port *pdata)
 {
 	int ret;
 
-	axgbe_set_bit(AXGBE_LINK_INIT, &pdata->dev_state);
+	rte_set_bit32(AXGBE_LINK_INIT, &pdata->dev_state);
 	pdata->link_check = rte_get_timer_cycles();
 
 	ret = pdata->phy_if.phy_impl.an_config(pdata);
@@ -807,9 +807,9 @@ static int axgbe_phy_config_aneg(struct axgbe_port *pdata)
 
 	ret = __axgbe_phy_config_aneg(pdata);
 	if (ret)
-		axgbe_set_bit(AXGBE_LINK_ERR, &pdata->dev_state);
+		rte_set_bit32(AXGBE_LINK_ERR, &pdata->dev_state);
 	else
-		axgbe_clear_bit(AXGBE_LINK_ERR, &pdata->dev_state);
+		rte_clear_bit32(AXGBE_LINK_ERR, &pdata->dev_state);
 
 	pthread_mutex_unlock(&pdata->an_mutex);
 
@@ -880,7 +880,7 @@ static void axgbe_phy_status(struct axgbe_port *pdata)
 	unsigned int link_aneg;
 	int an_restart;
 
-	if (axgbe_test_bit(AXGBE_LINK_ERR, &pdata->dev_state)) {
+	if (rte_get_bit32(AXGBE_LINK_ERR, &pdata->dev_state)) {
 		pdata->phy.link = 0;
 		goto adjust_link;
 	}
@@ -900,10 +900,10 @@ static void axgbe_phy_status(struct axgbe_port *pdata)
 			return;
 		}
 		axgbe_phy_status_result(pdata);
-		if (axgbe_test_bit(AXGBE_LINK_INIT, &pdata->dev_state))
-			axgbe_clear_bit(AXGBE_LINK_INIT, &pdata->dev_state);
+		if (rte_get_bit32(AXGBE_LINK_INIT, &pdata->dev_state))
+			rte_clear_bit32(AXGBE_LINK_INIT, &pdata->dev_state);
 	} else {
-		if (axgbe_test_bit(AXGBE_LINK_INIT, &pdata->dev_state)) {
+		if (rte_get_bit32(AXGBE_LINK_INIT, &pdata->dev_state)) {
 			axgbe_check_link_timeout(pdata);
 
 			if (link_aneg)
-- 
2.17.1


^ permalink raw reply	[flat|nested] 70+ messages in thread

* [dpdk-dev] [PATCH v3 4/6] net/bnx2x: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (16 preceding siblings ...)
  2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 3/6] net/axgbe: use common rte bit operation APIs instead Joyce Kong
@ 2019-11-18 10:06 ` " Joyce Kong
  2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 5/6] net/qede: " Joyce Kong
                   ` (15 subsequent siblings)
  33 siblings, 0 replies; 70+ messages in thread
From: Joyce Kong @ 2019-11-18 10:06 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/bnx2x/bnx2x.c    | 209 ++++++++++++++++-------------------
 drivers/net/bnx2x/bnx2x.h    |   4 -
 drivers/net/bnx2x/ecore_sp.h |   9 +-
 3 files changed, 98 insertions(+), 124 deletions(-)

diff --git a/drivers/net/bnx2x/bnx2x.c b/drivers/net/bnx2x/bnx2x.c
index ed31335ac..979eed988 100644
--- a/drivers/net/bnx2x/bnx2x.c
+++ b/drivers/net/bnx2x/bnx2x.c
@@ -26,6 +26,7 @@
 #include <fcntl.h>
 #include <zlib.h>
 #include <rte_string_fns.h>
+#include <rte_bitops.h>
 
 #define BNX2X_PMD_VER_PREFIX "BNX2X PMD"
 #define BNX2X_PMD_VERSION_MAJOR 1
@@ -129,32 +130,6 @@ static void bnx2x_ack_sb(struct bnx2x_softc *sc, uint8_t igu_sb_id,
 			 uint8_t storm, uint16_t index, uint8_t op,
 			 uint8_t update);
 
-int bnx2x_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	mb();
-	return res;
-}
-
-void bnx2x_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-void bnx2x_clear_bit(int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-int bnx2x_test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-	return __sync_fetch_and_and(addr, ~mask) & mask;
-}
-
 int bnx2x_cmpxchg(volatile int *addr, int old, int new)
 {
 	return __sync_val_compare_and_swap(addr, old, new);
@@ -1427,11 +1402,11 @@ bnx2x_del_all_macs(struct bnx2x_softc *sc, struct ecore_vlan_mac_obj *mac_obj,
 
 	/* wait for completion of requested */
 	if (wait_for_comp) {
-		bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+		rte_set_bit32(RAMROD_COMP_WAIT, &ramrod_flags);
 	}
 
 	/* Set the mac type of addresses we want to clear */
-	bnx2x_set_bit(mac_type, &vlan_mac_flags);
+	rte_set_bit32(mac_type, &vlan_mac_flags);
 
 	rc = mac_obj->delete_all(sc, mac_obj, &vlan_mac_flags, &ramrod_flags);
 	if (rc < 0)
@@ -1458,26 +1433,26 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 		break;
 
 	case BNX2X_RX_MODE_NORMAL:
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_MULTICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_UNICAST, rx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_MULTICAST, rx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
 
 		/* internal switching mode */
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_MULTICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_UNICAST, tx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_MULTICAST, tx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
 
 		break;
 
 	case BNX2X_RX_MODE_ALLMULTI:
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_UNICAST, rx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
 
 		/* internal switching mode */
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_UNICAST, tx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
 
 		break;
 
@@ -1488,19 +1463,20 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 		 * should receive matched and unmatched (in resolution of port)
 		 * unicast packets.
 		 */
-		bnx2x_set_bit(ECORE_ACCEPT_UNMATCHED, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_UNMATCHED, rx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_UNICAST, rx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
 
 		/* internal switching mode */
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
 
 		if (IS_MF_SI(sc)) {
-			bnx2x_set_bit(ECORE_ACCEPT_ALL_UNICAST, tx_accept_flags);
+			rte_set_bit32(ECORE_ACCEPT_ALL_UNICAST,
+					tx_accept_flags);
 		} else {
-			bnx2x_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
+			rte_set_bit32(ECORE_ACCEPT_UNICAST, tx_accept_flags);
 		}
 
 		break;
@@ -1512,8 +1488,8 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 
 	/* Set ACCEPT_ANY_VLAN as we do not enable filtering by VLAN */
 	if (rx_mode != BNX2X_RX_MODE_NONE) {
-		bnx2x_set_bit(ECORE_ACCEPT_ANY_VLAN, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ANY_VLAN, tx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_ANY_VLAN, rx_accept_flags);
+		rte_set_bit32(ECORE_ACCEPT_ANY_VLAN, tx_accept_flags);
 	}
 
 	return 0;
@@ -1542,7 +1518,7 @@ bnx2x_set_q_rx_mode(struct bnx2x_softc *sc, uint8_t cl_id,
 	ramrod_param.rdata = BNX2X_SP(sc, rx_mode_rdata);
 	ramrod_param.rdata_mapping =
 	    (rte_iova_t)BNX2X_SP_MAPPING(sc, rx_mode_rdata),
-	    bnx2x_set_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
+	    rte_set_bit32(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
 
 	ramrod_param.ramrod_flags = ramrod_flags;
 	ramrod_param.rx_mode_flags = rx_mode_flags;
@@ -1571,9 +1547,9 @@ int bnx2x_set_storm_rx_mode(struct bnx2x_softc *sc)
 		return rc;
 	}
 
-	bnx2x_set_bit(RAMROD_RX, &ramrod_flags);
-	bnx2x_set_bit(RAMROD_TX, &ramrod_flags);
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	rte_set_bit32(RAMROD_RX, &ramrod_flags);
+	rte_set_bit32(RAMROD_TX, &ramrod_flags);
+	rte_set_bit32(RAMROD_COMP_WAIT, &ramrod_flags);
 
 	return bnx2x_set_q_rx_mode(sc, sc->fp[0].cl_id, rx_mode_flags,
 				 rx_accept_flags, tx_accept_flags,
@@ -1698,7 +1674,7 @@ static int bnx2x_func_wait_started(struct bnx2x_softc *sc)
 			    "Forcing STARTED-->TX_STOPPED-->STARTED");
 
 		func_params.f_obj = &sc->func_obj;
-		bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
+		rte_set_bit32(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
 
 		/* STARTED-->TX_STOPPED */
 		func_params.cmd = ECORE_F_CMD_TX_STOP;
@@ -1722,7 +1698,7 @@ static int bnx2x_stop_queue(struct bnx2x_softc *sc, int index)
 
 	q_params.q_obj = &sc->sp_objs[fp->index].q_obj;
 	/* We want to wait for completion in this context */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
+	rte_set_bit32(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
 
 	/* Stop the primary connection: */
 
@@ -1783,7 +1759,7 @@ static int bnx2x_func_stop(struct bnx2x_softc *sc)
 	int rc;
 
 	/* prepare parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit32(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_STOP;
 
@@ -1797,7 +1773,7 @@ static int bnx2x_func_stop(struct bnx2x_softc *sc)
 	if (rc) {
 		PMD_DRV_LOG(NOTICE, sc, "FUNC_STOP ramrod failed. "
 			    "Running a dry transaction");
-		bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
+		rte_set_bit32(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
 		return ecore_func_state_change(sc, &func_params);
 	}
 
@@ -1809,7 +1785,7 @@ static int bnx2x_reset_hw(struct bnx2x_softc *sc, uint32_t load_code)
 	struct ecore_func_state_params func_params = { NULL };
 
 	/* Prepare parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit32(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_HW_RESET;
@@ -1866,11 +1842,10 @@ bnx2x_chip_cleanup(struct bnx2x_softc *sc, uint32_t unload_mode, uint8_t keep_li
 	 * a race between the completion code and this code.
 	 */
 
-	if (bnx2x_test_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state)) {
-		bnx2x_set_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state);
-	} else {
+	if (rte_get_bit32(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state))
+		rte_set_bit32(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state);
+	else
 		bnx2x_set_storm_rx_mode(sc);
-	}
 
 	/* Clean up multicast configuration */
 	rparam.mcast_obj = &sc->mcast_obj;
@@ -1960,12 +1935,12 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 	/* Cleanup MACs' object first... */
 
 	/* Wait for completion of requested */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	rte_set_bit32(RAMROD_COMP_WAIT, &ramrod_flags);
 	/* Perform a dry cleanup */
-	bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &ramrod_flags);
+	rte_set_bit32(RAMROD_DRV_CLR_ONLY, &ramrod_flags);
 
 	/* Clean ETH primary MAC */
-	bnx2x_set_bit(ECORE_ETH_MAC, &vlan_mac_flags);
+	rte_set_bit32(ECORE_ETH_MAC, &vlan_mac_flags);
 	rc = mac_obj->delete_all(sc, &sc->sp_objs->mac_obj, &vlan_mac_flags,
 				 &ramrod_flags);
 	if (rc != 0) {
@@ -1974,7 +1949,7 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 
 	/* Cleanup UC list */
 	vlan_mac_flags = 0;
-	bnx2x_set_bit(ECORE_UC_LIST_MAC, &vlan_mac_flags);
+	rte_set_bit32(ECORE_UC_LIST_MAC, &vlan_mac_flags);
 	rc = mac_obj->delete_all(sc, mac_obj, &vlan_mac_flags, &ramrod_flags);
 	if (rc != 0) {
 		PMD_DRV_LOG(NOTICE, sc,
@@ -1984,7 +1959,7 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 	/* Now clean mcast object... */
 
 	rparam.mcast_obj = &sc->mcast_obj;
-	bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags);
+	rte_set_bit32(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags);
 
 	/* Add a DEL command... */
 	rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_DEL);
@@ -4288,7 +4263,7 @@ bnx2x_handle_classification_eqe(struct bnx2x_softc *sc, union event_ring_elem *e
 	struct ecore_vlan_mac_obj *vlan_mac_obj;
 
 	/* always push next commands out, don't wait here */
-	bnx2x_set_bit(RAMROD_CONT, &ramrod_flags);
+	rte_set_bit32(RAMROD_CONT, &ramrod_flags);
 
 	switch (le32toh(elem->message.data.eth_event.echo) >> BNX2X_SWCID_SHIFT) {
 	case ECORE_FILTER_MAC_PENDING:
@@ -4319,12 +4294,12 @@ bnx2x_handle_classification_eqe(struct bnx2x_softc *sc, union event_ring_elem *e
 
 static void bnx2x_handle_rx_mode_eqe(struct bnx2x_softc *sc)
 {
-	bnx2x_clear_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
+	rte_clear_bit32(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
 
 	/* send rx_mode command again if was requested */
-	if (bnx2x_test_and_clear_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state)) {
+	if (rte_test_and_clear_bit32(ECORE_FILTER_RX_MODE_SCHED,
+						&sc->sp_state))
 		bnx2x_set_storm_rx_mode(sc);
-	}
 }
 
 static void bnx2x_update_eq_prod(struct bnx2x_softc *sc, uint16_t prod)
@@ -4693,7 +4668,7 @@ static int bnx2x_init_hw(struct bnx2x_softc *sc, uint32_t load_code)
 	PMD_INIT_FUNC_TRACE(sc);
 
 	/* prepare the parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit32(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_HW_INIT;
@@ -4988,8 +4963,8 @@ static void bnx2x_init_eth_fp(struct bnx2x_softc *sc, int idx)
 	bnx2x_update_fp_sb_idx(fp);
 
 	/* Configure Queue State object */
-	bnx2x_set_bit(ECORE_Q_TYPE_HAS_RX, &q_type);
-	bnx2x_set_bit(ECORE_Q_TYPE_HAS_TX, &q_type);
+	rte_set_bit32(ECORE_Q_TYPE_HAS_RX, &q_type);
+	rte_set_bit32(ECORE_Q_TYPE_HAS_TX, &q_type);
 
 	ecore_init_queue_obj(sc,
 			     &sc->sp_objs[idx].q_obj,
@@ -5803,7 +5778,7 @@ static int bnx2x_func_start(struct bnx2x_softc *sc)
 	    &func_params.params.start;
 
 	/* Prepare parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit32(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_START;
@@ -6379,11 +6354,11 @@ bnx2x_pf_q_prep_init(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp,
 	uint8_t cos;
 	int cxt_index, cxt_offset;
 
-	bnx2x_set_bit(ECORE_Q_FLG_HC, &init_params->rx.flags);
-	bnx2x_set_bit(ECORE_Q_FLG_HC, &init_params->tx.flags);
+	rte_set_bit32(ECORE_Q_FLG_HC, &init_params->rx.flags);
+	rte_set_bit32(ECORE_Q_FLG_HC, &init_params->tx.flags);
 
-	bnx2x_set_bit(ECORE_Q_FLG_HC_EN, &init_params->rx.flags);
-	bnx2x_set_bit(ECORE_Q_FLG_HC_EN, &init_params->tx.flags);
+	rte_set_bit32(ECORE_Q_FLG_HC_EN, &init_params->rx.flags);
+	rte_set_bit32(ECORE_Q_FLG_HC_EN, &init_params->tx.flags);
 
 	/* HC rate */
 	init_params->rx.hc_rate =
@@ -6417,7 +6392,7 @@ bnx2x_get_common_flags(struct bnx2x_softc *sc, uint8_t zero_stats)
 	unsigned long flags = 0;
 
 	/* PF driver will always initialize the Queue to an ACTIVE state */
-	bnx2x_set_bit(ECORE_Q_FLG_ACTIVE, &flags);
+	rte_set_bit32(ECORE_Q_FLG_ACTIVE, &flags);
 
 	/*
 	 * tx only connections collect statistics (on the same index as the
@@ -6425,9 +6400,9 @@ bnx2x_get_common_flags(struct bnx2x_softc *sc, uint8_t zero_stats)
 	 * connection is initialized.
 	 */
 
-	bnx2x_set_bit(ECORE_Q_FLG_STATS, &flags);
+	rte_set_bit32(ECORE_Q_FLG_STATS, &flags);
 	if (zero_stats) {
-		bnx2x_set_bit(ECORE_Q_FLG_ZERO_STATS, &flags);
+		rte_set_bit32(ECORE_Q_FLG_ZERO_STATS, &flags);
 	}
 
 	/*
@@ -6435,10 +6410,10 @@ bnx2x_get_common_flags(struct bnx2x_softc *sc, uint8_t zero_stats)
 	 * CoS-ness doesn't survive the loopback
 	 */
 	if (sc->flags & BNX2X_TX_SWITCHING) {
-		bnx2x_set_bit(ECORE_Q_FLG_TX_SWITCH, &flags);
+		rte_set_bit32(ECORE_Q_FLG_TX_SWITCH, &flags);
 	}
 
-	bnx2x_set_bit(ECORE_Q_FLG_PCSUM_ON_PKT, &flags);
+	rte_set_bit32(ECORE_Q_FLG_PCSUM_ON_PKT, &flags);
 
 	return flags;
 }
@@ -6448,15 +6423,15 @@ static unsigned long bnx2x_get_q_flags(struct bnx2x_softc *sc, uint8_t leading)
 	unsigned long flags = 0;
 
 	if (IS_MF_SD(sc)) {
-		bnx2x_set_bit(ECORE_Q_FLG_OV, &flags);
+		rte_set_bit32(ECORE_Q_FLG_OV, &flags);
 	}
 
 	if (leading) {
-		bnx2x_set_bit(ECORE_Q_FLG_LEADING_RSS, &flags);
-		bnx2x_set_bit(ECORE_Q_FLG_MCAST, &flags);
+		rte_set_bit32(ECORE_Q_FLG_LEADING_RSS, &flags);
+		rte_set_bit32(ECORE_Q_FLG_MCAST, &flags);
 	}
 
-	bnx2x_set_bit(ECORE_Q_FLG_VLAN, &flags);
+	rte_set_bit32(ECORE_Q_FLG_VLAN, &flags);
 
 	/* merge with common flags */
 	return flags | bnx2x_get_common_flags(sc, TRUE);
@@ -6577,7 +6552,7 @@ bnx2x_setup_queue(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp, uint8_t lea
 	q_params.q_obj = &BNX2X_SP_OBJ(sc, fp).q_obj;
 
 	/* we want to wait for completion in this context */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
+	rte_set_bit32(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
 
 	/* prepare the INIT parameters */
 	bnx2x_pf_q_prep_init(sc, fp, &q_params.params.init);
@@ -6645,20 +6620,20 @@ bnx2x_config_rss_pf(struct bnx2x_softc *sc, struct ecore_rss_config_obj *rss_obj
 
 	params.rss_obj = rss_obj;
 
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &params.ramrod_flags);
+	rte_set_bit32(RAMROD_COMP_WAIT, &params.ramrod_flags);
 
-	bnx2x_set_bit(ECORE_RSS_MODE_REGULAR, &params.rss_flags);
+	rte_set_bit32(ECORE_RSS_MODE_REGULAR, &params.rss_flags);
 
 	/* RSS configuration */
-	bnx2x_set_bit(ECORE_RSS_IPV4, &params.rss_flags);
-	bnx2x_set_bit(ECORE_RSS_IPV4_TCP, &params.rss_flags);
-	bnx2x_set_bit(ECORE_RSS_IPV6, &params.rss_flags);
-	bnx2x_set_bit(ECORE_RSS_IPV6_TCP, &params.rss_flags);
+	rte_set_bit32(ECORE_RSS_IPV4, &params.rss_flags);
+	rte_set_bit32(ECORE_RSS_IPV4_TCP, &params.rss_flags);
+	rte_set_bit32(ECORE_RSS_IPV6, &params.rss_flags);
+	rte_set_bit32(ECORE_RSS_IPV6_TCP, &params.rss_flags);
 	if (rss_obj->udp_rss_v4) {
-		bnx2x_set_bit(ECORE_RSS_IPV4_UDP, &params.rss_flags);
+		rte_set_bit32(ECORE_RSS_IPV4_UDP, &params.rss_flags);
 	}
 	if (rss_obj->udp_rss_v6) {
-		bnx2x_set_bit(ECORE_RSS_IPV6_UDP, &params.rss_flags);
+		rte_set_bit32(ECORE_RSS_IPV6_UDP, &params.rss_flags);
 	}
 
 	/* Hash bits */
@@ -6673,7 +6648,7 @@ bnx2x_config_rss_pf(struct bnx2x_softc *sc, struct ecore_rss_config_obj *rss_obj
 			params.rss_key[i] = (uint32_t) rte_rand();
 		}
 
-		bnx2x_set_bit(ECORE_RSS_SET_SRCH, &params.rss_flags);
+		rte_set_bit32(ECORE_RSS_SET_SRCH, &params.rss_flags);
 	}
 
 	if (IS_PF(sc))
@@ -6730,11 +6705,11 @@ bnx2x_set_mac_one(struct bnx2x_softc *sc, uint8_t * mac,
 	ramrod_param.ramrod_flags = *ramrod_flags;
 
 	/* fill a user request section if needed */
-	if (!bnx2x_test_bit(RAMROD_CONT, ramrod_flags)) {
+	if (!rte_get_bit32(RAMROD_CONT, ramrod_flags)) {
 		rte_memcpy(ramrod_param.user_req.u.mac.mac, mac,
 				 ETH_ALEN);
 
-		bnx2x_set_bit(mac_type, &ramrod_param.user_req.vlan_mac_flags);
+		rte_set_bit32(mac_type, &ramrod_param.user_req.vlan_mac_flags);
 
 /* Set the command: ADD or DEL */
 		ramrod_param.user_req.cmd = (set) ? ECORE_VLAN_MAC_ADD :
@@ -6761,7 +6736,7 @@ static int bnx2x_set_eth_mac(struct bnx2x_softc *sc, uint8_t set)
 
 	PMD_DRV_LOG(DEBUG, sc, "Adding Ethernet MAC");
 
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	rte_set_bit32(RAMROD_COMP_WAIT, &ramrod_flags);
 
 	/* Eth MAC is set on RSS leading client (fp[0]) */
 	return bnx2x_set_mac_one(sc, sc->link_params.mac_addr,
@@ -6893,24 +6868,26 @@ bnx2x_fill_report_data(struct bnx2x_softc *sc, struct bnx2x_link_report_data *da
 
 	/* Link is down */
 	if (!sc->link_vars.link_up || (sc->flags & BNX2X_MF_FUNC_DIS)) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+		rte_set_bit32(BNX2X_LINK_REPORT_LINK_DOWN,
 			    &data->link_report_flags);
 	}
 
 	/* Full DUPLEX */
 	if (sc->link_vars.duplex == DUPLEX_FULL) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_FULL_DUPLEX,
+		rte_set_bit32(BNX2X_LINK_REPORT_FULL_DUPLEX,
 			    &data->link_report_flags);
 	}
 
 	/* Rx Flow Control is ON */
 	if (sc->link_vars.flow_ctrl & ELINK_FLOW_CTRL_RX) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_RX_FC_ON, &data->link_report_flags);
+		rte_set_bit32(BNX2X_LINK_REPORT_RX_FC_ON,
+				&data->link_report_flags);
 	}
 
 	/* Tx Flow Control is ON */
 	if (sc->link_vars.flow_ctrl & ELINK_FLOW_CTRL_TX) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_TX_FC_ON, &data->link_report_flags);
+		rte_set_bit32(BNX2X_LINK_REPORT_TX_FC_ON,
+				&data->link_report_flags);
 	}
 }
 
@@ -6929,9 +6906,9 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
 
 	/* Don't report link down or exactly the same link status twice */
 	if (!memcmp(&cur_data, &sc->last_reported_link, sizeof(cur_data)) ||
-	    (bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+	    (rte_get_bit32(BNX2X_LINK_REPORT_LINK_DOWN,
 			  &sc->last_reported_link.link_report_flags) &&
-	     bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+	     rte_get_bit32(BNX2X_LINK_REPORT_LINK_DOWN,
 			  &cur_data.link_report_flags))) {
 		return;
 	}
@@ -6946,14 +6923,14 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
 	/* report new link params and remember the state for the next time */
 	rte_memcpy(&sc->last_reported_link, &cur_data, sizeof(cur_data));
 
-	if (bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+	if (rte_get_bit32(BNX2X_LINK_REPORT_LINK_DOWN,
 			 &cur_data.link_report_flags)) {
 		ELINK_DEBUG_P0(sc, "NIC Link is Down");
 	} else {
 		__rte_unused const char *duplex;
 		__rte_unused const char *flow;
 
-		if (bnx2x_test_and_clear_bit(BNX2X_LINK_REPORT_FULL_DUPLEX,
+		if (rte_test_and_clear_bit32(BNX2X_LINK_REPORT_FULL_DUPLEX,
 					   &cur_data.link_report_flags)) {
 			duplex = "full";
 				ELINK_DEBUG_P0(sc, "link set to full duplex");
@@ -6968,19 +6945,19 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
  * enabled.
  */
 		if (cur_data.link_report_flags) {
-			if (bnx2x_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+			if (rte_get_bit32(BNX2X_LINK_REPORT_RX_FC_ON,
 					 &cur_data.link_report_flags) &&
-			    bnx2x_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+			    rte_get_bit32(BNX2X_LINK_REPORT_TX_FC_ON,
 					 &cur_data.link_report_flags)) {
 				flow = "ON - receive & transmit";
-			} else if (bnx2x_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+			} else if (rte_get_bit32(BNX2X_LINK_REPORT_RX_FC_ON,
 						&cur_data.link_report_flags) &&
-				   !bnx2x_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+				   !rte_get_bit32(BNX2X_LINK_REPORT_TX_FC_ON,
 						 &cur_data.link_report_flags)) {
 				flow = "ON - receive";
-			} else if (!bnx2x_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+			} else if (!rte_get_bit32(BNX2X_LINK_REPORT_RX_FC_ON,
 						 &cur_data.link_report_flags) &&
-				   bnx2x_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+				   rte_get_bit32(BNX2X_LINK_REPORT_TX_FC_ON,
 						&cur_data.link_report_flags)) {
 				flow = "ON - transmit";
 			} else {
diff --git a/drivers/net/bnx2x/bnx2x.h b/drivers/net/bnx2x/bnx2x.h
index 3383c7675..e6e66e870 100644
--- a/drivers/net/bnx2x/bnx2x.h
+++ b/drivers/net/bnx2x/bnx2x.h
@@ -1809,10 +1809,6 @@ static const uint32_t dmae_reg_go_c[] = {
 #define PCI_PM_D0    1
 #define PCI_PM_D3hot 2
 
-int  bnx2x_test_bit(int nr, volatile unsigned long * addr);
-void bnx2x_set_bit(unsigned int nr, volatile unsigned long * addr);
-void bnx2x_clear_bit(int nr, volatile unsigned long * addr);
-int  bnx2x_test_and_clear_bit(int nr, volatile unsigned long * addr);
 int  bnx2x_cmpxchg(volatile int *addr, int old, int new);
 
 int bnx2x_dma_alloc(struct bnx2x_softc *sc, size_t size,
diff --git a/drivers/net/bnx2x/ecore_sp.h b/drivers/net/bnx2x/ecore_sp.h
index cc1db377a..59a2e8e39 100644
--- a/drivers/net/bnx2x/ecore_sp.h
+++ b/drivers/net/bnx2x/ecore_sp.h
@@ -15,6 +15,7 @@
 #define ECORE_SP_H
 
 #include <rte_byteorder.h>
+#include <rte_bitops.h>
 
 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
 #ifndef __LITTLE_ENDIAN
@@ -73,10 +74,10 @@ typedef rte_spinlock_t ECORE_MUTEX_SPIN;
 #define ECORE_SET_BIT_NA(bit, var)         (*var |= (1 << bit))
 #define ECORE_CLEAR_BIT_NA(bit, var)       (*var &= ~(1 << bit))
 
-#define ECORE_TEST_BIT(bit, var)           bnx2x_test_bit(bit, var)
-#define ECORE_SET_BIT(bit, var)            bnx2x_set_bit(bit, var)
-#define ECORE_CLEAR_BIT(bit, var)          bnx2x_clear_bit(bit, var)
-#define ECORE_TEST_AND_CLEAR_BIT(bit, var) bnx2x_test_and_clear_bit(bit, var)
+#define ECORE_TEST_BIT(bit, var)           rte_get_bit32(bit, var)
+#define ECORE_SET_BIT(bit, var)            rte_set_bit32(bit, var)
+#define ECORE_CLEAR_BIT(bit, var)          rte_clear_bit32(bit, var)
+#define ECORE_TEST_AND_CLEAR_BIT(bit, var) rte_test_and_clear_bit32(bit, var)
 
 #define atomic_load_acq_int                (int)*
 #define atomic_store_rel_int(a, v)         (*a = v)
-- 
2.17.1


^ permalink raw reply	[flat|nested] 70+ messages in thread

* [dpdk-dev] [PATCH v3 5/6] net/qede: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (17 preceding siblings ...)
  2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 4/6] net/bnx2x: " Joyce Kong
@ 2019-11-18 10:06 ` " Joyce Kong
  2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 6/6] net/hinic: " Joyce Kong
                   ` (14 subsequent siblings)
  33 siblings, 0 replies; 70+ messages in thread
From: Joyce Kong @ 2019-11-18 10:06 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/qede/base/bcm_osal.c | 20 --------------------
 drivers/net/qede/base/bcm_osal.h | 10 ++++------
 2 files changed, 4 insertions(+), 26 deletions(-)

diff --git a/drivers/net/qede/base/bcm_osal.c b/drivers/net/qede/base/bcm_osal.c
index 48d016e24..19457d7c0 100644
--- a/drivers/net/qede/base/bcm_osal.c
+++ b/drivers/net/qede/base/bcm_osal.c
@@ -46,26 +46,6 @@ u32 qede_osal_log2(u32 val)
 	return log;
 }
 
-inline void qede_set_bit(u32 nr, unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-inline void qede_clr_bit(u32 nr, unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-inline bool qede_test_bit(u32 nr, unsigned long *addr)
-{
-	bool res;
-
-	rte_mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	rte_mb();
-	return res;
-}
-
 static inline u32 qede_ffb(unsigned long word)
 {
 	unsigned long first_bit;
diff --git a/drivers/net/qede/base/bcm_osal.h b/drivers/net/qede/base/bcm_osal.h
index 0f09557cf..d2975c8cd 100644
--- a/drivers/net/qede/base/bcm_osal.h
+++ b/drivers/net/qede/base/bcm_osal.h
@@ -8,6 +8,7 @@
 #define __BCM_OSAL_H
 
 #include <rte_byteorder.h>
+#include <rte_bitops.h>
 #include <rte_spinlock.h>
 #include <rte_malloc.h>
 #include <rte_atomic.h>
@@ -311,17 +312,14 @@ typedef struct osal_list_t {
 #define OSAL_BITS_PER_UL_MASK		(OSAL_BITS_PER_UL - 1)
 
 /* Bitops */
-void qede_set_bit(u32, unsigned long *);
 #define OSAL_SET_BIT(bit, bitmap) \
-	qede_set_bit(bit, bitmap)
+	rte_set_bit32(bit, bitmap)
 
-void qede_clr_bit(u32, unsigned long *);
 #define OSAL_CLEAR_BIT(bit, bitmap) \
-	qede_clr_bit(bit, bitmap)
+	rte_clear_bit32(bit, bitmap)
 
-bool qede_test_bit(u32, unsigned long *);
 #define OSAL_TEST_BIT(bit, bitmap) \
-	qede_test_bit(bit, bitmap)
+	rte_get_bit32(bit, bitmap)
 
 u32 qede_find_first_bit(unsigned long *, u32);
 #define OSAL_FIND_FIRST_BIT(bitmap, length) \
-- 
2.17.1


^ permalink raw reply	[flat|nested] 70+ messages in thread

* [dpdk-dev] [PATCH v3 6/6] net/hinic: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (18 preceding siblings ...)
  2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 5/6] net/qede: " Joyce Kong
@ 2019-11-18 10:06 ` " Joyce Kong
  2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 0/6] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (13 subsequent siblings)
  33 siblings, 0 replies; 70+ messages in thread
From: Joyce Kong @ 2019-11-18 10:06 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/hinic/Makefile            |  1 +
 drivers/net/hinic/base/hinic_compat.h | 33 +--------------------------
 drivers/net/hinic/hinic_pmd_ethdev.c  | 16 ++++++-------
 drivers/net/hinic/meson.build         |  2 ++
 4 files changed, 12 insertions(+), 40 deletions(-)

diff --git a/drivers/net/hinic/Makefile b/drivers/net/hinic/Makefile
index b78fd8d53..97f429804 100644
--- a/drivers/net/hinic/Makefile
+++ b/drivers/net/hinic/Makefile
@@ -9,6 +9,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 LIB = librte_pmd_hinic.a
 
 CFLAGS += -O3
+CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += $(WERROR_FLAGS)
 
 ifeq ($(CONFIG_RTE_ARCH_ARM64),y)
diff --git a/drivers/net/hinic/base/hinic_compat.h b/drivers/net/hinic/base/hinic_compat.h
index e4a7f12d1..c0a0b3ea2 100644
--- a/drivers/net/hinic/base/hinic_compat.h
+++ b/drivers/net/hinic/base/hinic_compat.h
@@ -11,6 +11,7 @@
 #include <pthread.h>
 #include <rte_common.h>
 #include <rte_byteorder.h>
+#include <rte_bitops.h>
 #include <rte_memzone.h>
 #include <rte_memcpy.h>
 #include <rte_malloc.h>
@@ -117,38 +118,6 @@ extern int hinic_logtype;
 
 #define HINIC_PAGE_SIZE_DPDK	6
 
-static inline int hinic_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	res = ((*addr) & (1UL << nr)) != 0;
-	return res;
-}
-
-static inline void hinic_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-static inline void hinic_clear_bit(int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-static inline int hinic_test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-
-	return __sync_fetch_and_and(addr, ~mask) & mask;
-}
-
-static inline int hinic_test_and_set_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-
-	return __sync_fetch_and_or(addr, mask) & mask;
-}
-
 void *dma_zalloc_coherent(void *dev, size_t size, dma_addr_t *dma_handle,
 			  gfp_t flag);
 void *dma_zalloc_coherent_aligned(void *dev, size_t size,
diff --git a/drivers/net/hinic/hinic_pmd_ethdev.c b/drivers/net/hinic/hinic_pmd_ethdev.c
index 072fec339..313d8ff2d 100644
--- a/drivers/net/hinic/hinic_pmd_ethdev.c
+++ b/drivers/net/hinic/hinic_pmd_ethdev.c
@@ -269,7 +269,7 @@ static void hinic_dev_interrupt_handler(void *param)
 	struct rte_eth_dev *dev = param;
 	struct hinic_nic_dev *nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
 
-	if (!hinic_test_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status)) {
+	if (!rte_get_bit32(HINIC_DEV_INTR_EN, &nic_dev->dev_status)) {
 		PMD_DRV_LOG(WARNING, "Device's interrupt is disabled, ignore interrupt event, dev_name: %s, port_id: %d",
 			    nic_dev->proc_dev_name, dev->data->port_id);
 		return;
@@ -1075,7 +1075,7 @@ static int hinic_dev_start(struct rte_eth_dev *dev)
 	if (dev->data->dev_conf.intr_conf.lsc != 0)
 		(void)hinic_link_update(dev, 0);
 
-	hinic_set_bit(HINIC_DEV_START, &nic_dev->dev_status);
+	rte_set_bit32(HINIC_DEV_START, &nic_dev->dev_status);
 
 	return 0;
 
@@ -1200,7 +1200,7 @@ static void hinic_dev_stop(struct rte_eth_dev *dev)
 	name = dev->data->name;
 	port_id = dev->data->port_id;
 
-	if (!hinic_test_and_clear_bit(HINIC_DEV_START, &nic_dev->dev_status)) {
+	if (!rte_test_and_clear_bit32(HINIC_DEV_START, &nic_dev->dev_status)) {
 		PMD_DRV_LOG(INFO, "Device %s already stopped", name);
 		return;
 	}
@@ -1245,7 +1245,7 @@ static void hinic_disable_interrupt(struct rte_eth_dev *dev)
 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
 	int ret, retries = 0;
 
-	hinic_clear_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
+	rte_clear_bit32(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
 
 	/* disable msix interrupt in hardware */
 	hinic_set_msix_state(nic_dev->hwdev, 0, HINIC_MSIX_DISABLE);
@@ -2844,7 +2844,7 @@ static void hinic_dev_close(struct rte_eth_dev *dev)
 {
 	struct hinic_nic_dev *nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
 
-	if (hinic_test_and_set_bit(HINIC_DEV_CLOSE, &nic_dev->dev_status)) {
+	if (rte_test_and_set_bit32(HINIC_DEV_CLOSE, &nic_dev->dev_status)) {
 		PMD_DRV_LOG(WARNING, "Device %s already closed",
 			    dev->data->name);
 		return;
@@ -3045,7 +3045,7 @@ static int hinic_func_init(struct rte_eth_dev *eth_dev)
 			    eth_dev->data->name);
 		goto enable_intr_fail;
 	}
-	hinic_set_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
+	rte_set_bit32(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
 
 	/* initialize filter info */
 	filter_info = &nic_dev->filter;
@@ -3057,7 +3057,7 @@ static int hinic_func_init(struct rte_eth_dev *eth_dev)
 	TAILQ_INIT(&nic_dev->filter_fdir_rule_list);
 	TAILQ_INIT(&nic_dev->hinic_flow_list);
 
-	hinic_set_bit(HINIC_DEV_INIT, &nic_dev->dev_status);
+	rte_set_bit32(HINIC_DEV_INIT, &nic_dev->dev_status);
 	PMD_DRV_LOG(INFO, "Initialize %s in primary successfully",
 		    eth_dev->data->name);
 
@@ -3113,7 +3113,7 @@ static int hinic_dev_uninit(struct rte_eth_dev *dev)
 	struct hinic_nic_dev *nic_dev;
 
 	nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
-	hinic_clear_bit(HINIC_DEV_INIT, &nic_dev->dev_status);
+	rte_clear_bit32(HINIC_DEV_INIT, &nic_dev->dev_status);
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
diff --git a/drivers/net/hinic/meson.build b/drivers/net/hinic/meson.build
index bc7e24639..8c7ee9dfc 100644
--- a/drivers/net/hinic/meson.build
+++ b/drivers/net/hinic/meson.build
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Huawei Technologies Co., Ltd
 
+allow_experimental_apis = true
+
 subdir('base')
 objs = [base_objs]
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [dpdk-dev] [PATCH v3 1/6] lib/eal: implement the family of rte bitoperation APIs
  2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 1/6] lib/eal: implement the family of rte bit operation APIs Joyce Kong
@ 2019-11-18 10:52   ` Morten Brørup
  2019-11-19  9:22     ` Joyce Kong (Arm Technology China)
  0 siblings, 1 reply; 70+ messages in thread
From: Morten Brørup @ 2019-11-18 10:52 UTC (permalink / raw)
  To: Joyce Kong, dev
  Cc: nd, thomas, jerinj, stephen, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Joyce Kong
> Sent: Monday, November 18, 2019 11:07 AM
> 

[snip]

> +++ b/lib/librte_eal/common/include/rte_bitops.h
> @@ -0,0 +1,474 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2019 Arm Limited
> + */
> +
> +#ifndef _RTE_BITOPS_H_
> +#define _RTE_BITOPS_H_
> +
> +/**
> + * @file
> + * Bit Operations
> + *
> + * This file defines a API for bit operations without/with memory
> ordering.
> + */
> +
> +#include <stdint.h>
> +#include <assert.h>
> +#include <rte_compat.h>
> +
> +/*---------------------------- 32 bit operations ---------------------
> -------*/
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Get the target bit from a 32-bit value without memory ordering.
> + *
> + * @param nr
> + *   The target bit to get.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The target bit.
> + */
> +__rte_experimental
> +static inline uint32_t
> +rte_get_bit32_relaxed(unsigned int nr, unsigned long *addr)
> +{
> +	assert(nr < 32);
> +
> +	uint32_t mask = 1UL << nr;
> +	return __atomic_load_n(addr, __ATOMIC_RELAXED) & mask;
> +}

Address pointer should be: uint32_t *addr.
Likewise in the other 32 bit functions.

Use RTE_ASSERT() instead of assert().
Likewise in all other functions.

When setting the mask, consider using UINT32_C(1) from <stdint.h> instead of 1UL.

[snip]

> +
> +/*---------------------------- 64 bit operations ---------------------
> -------*/
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Get the target bit from a 64-bit value without memory ordering.
> + *
> + * @param nr
> + *   The target bit to get.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The target bit.
> + */
> +__rte_experimental
> +static inline uint64_t
> +rte_get_bit64_relaxed(unsigned int nr, unsigned long *addr)
> +{
> +	assert(nr < 64);
> +
> +	uint64_t mask = 1UL << nr;
> +	return __atomic_load_n(addr, __ATOMIC_RELAXED) & mask;
> +}

Address pointer should be: uint64_t *addr.
Likewise in the other 64 bit functions.

Mask should be 1ULL, not 1UL. Or use UINT64_C(1) from <stdint.h> instead.
Likewise in the other 64 bit functions.

[snip]


Med venlig hilsen / kind regards
- Morten Brørup




^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [dpdk-dev] [PATCH v3 1/6] lib/eal: implement the family of rte bitoperation APIs
  2019-11-18 10:52   ` [dpdk-dev] [PATCH v3 1/6] lib/eal: implement the family of rte bitoperation APIs Morten Brørup
@ 2019-11-19  9:22     ` Joyce Kong (Arm Technology China)
  0 siblings, 0 replies; 70+ messages in thread
From: Joyce Kong (Arm Technology China) @ 2019-11-19  9:22 UTC (permalink / raw)
  To: Morten Brørup, dev
  Cc: nd, thomas, jerinj, stephen, david.marchand,
	Honnappa Nagarahalli, Gavin Hu (Arm Technology China),
	ravi1.kumar, rmody, shshaikh, xuanziyang2, cloud.wangxiaoyun,
	zhouguoyang

Hi Morten,

> -----Original Message-----
> From: Morten Brørup <mb@smartsharesystems.com>
> Sent: Monday, November 18, 2019 6:52 PM
> To: Joyce Kong (Arm Technology China) <Joyce.Kong@arm.com>;
> dev@dpdk.org
> Cc: nd <nd@arm.com>; thomas@monjalon.net; jerinj@marvell.com;
> stephen@networkplumber.org; david.marchand@redhat.com; Honnappa
> Nagarahalli <Honnappa.Nagarahalli@arm.com>; Gavin Hu (Arm Technology
> China) <Gavin.Hu@arm.com>; ravi1.kumar@amd.com; rmody@marvell.com;
> shshaikh@marvell.com; xuanziyang2@huawei.com;
> cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com
> Subject: RE: [dpdk-dev] [PATCH v3 1/6] lib/eal: implement the family of rte
> bitoperation APIs
> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Joyce Kong
> > Sent: Monday, November 18, 2019 11:07 AM
> >
> 
> [snip]
> 
> > +++ b/lib/librte_eal/common/include/rte_bitops.h
> > @@ -0,0 +1,474 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2019 Arm Limited
> > + */
> > +
> > +#ifndef _RTE_BITOPS_H_
> > +#define _RTE_BITOPS_H_
> > +
> > +/**
> > + * @file
> > + * Bit Operations
> > + *
> > + * This file defines a API for bit operations without/with memory
> > ordering.
> > + */
> > +
> > +#include <stdint.h>
> > +#include <assert.h>
> > +#include <rte_compat.h>
> > +
> > +/*---------------------------- 32 bit operations
> > +---------------------
> > -------*/
> > +
> > +/**
> > + * @warning
> > + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> > notice
> > + *
> > + * Get the target bit from a 32-bit value without memory ordering.
> > + *
> > + * @param nr
> > + *   The target bit to get.
> > + * @param addr
> > + *   The address holding the bit.
> > + * @return
> > + *   The target bit.
> > + */
> > +__rte_experimental
> > +static inline uint32_t
> > +rte_get_bit32_relaxed(unsigned int nr, unsigned long *addr) {
> > +	assert(nr < 32);
> > +
> > +	uint32_t mask = 1UL << nr;
> > +	return __atomic_load_n(addr, __ATOMIC_RELAXED) & mask; }
> 
> Address pointer should be: uint32_t *addr.
> Likewise in the other 32 bit functions.
> 
> Use RTE_ASSERT() instead of assert().
> Likewise in all other functions.
> 
> When setting the mask, consider using UINT32_C(1) from <stdint.h> instead
> of 1UL.
> 
> [snip]
> 
> > +
> > +/*---------------------------- 64 bit operations
> > +---------------------
> > -------*/
> > +
> > +/**
> > + * @warning
> > + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> > notice
> > + *
> > + * Get the target bit from a 64-bit value without memory ordering.
> > + *
> > + * @param nr
> > + *   The target bit to get.
> > + * @param addr
> > + *   The address holding the bit.
> > + * @return
> > + *   The target bit.
> > + */
> > +__rte_experimental
> > +static inline uint64_t
> > +rte_get_bit64_relaxed(unsigned int nr, unsigned long *addr) {
> > +	assert(nr < 64);
> > +
> > +	uint64_t mask = 1UL << nr;
> > +	return __atomic_load_n(addr, __ATOMIC_RELAXED) & mask; }
> 
> Address pointer should be: uint64_t *addr.
> Likewise in the other 64 bit functions.
> 
> Mask should be 1ULL, not 1UL. Or use UINT64_C(1) from <stdint.h> instead.
> Likewise in the other 64 bit functions.
> 
> [snip]
> 
> 
> Med venlig hilsen / kind regards
> - Morten Brørup
> 
> 
Thanks! I shall address above comments in Patch v4 for both 32-bit and 64-bit functions.


^ permalink raw reply	[flat|nested] 70+ messages in thread

* [dpdk-dev] [PATCH v4 0/6] implement common rte bit operation APIs in PMDs
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (19 preceding siblings ...)
  2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 6/6] net/hinic: " Joyce Kong
@ 2019-11-20 10:12 ` Joyce Kong
  2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 1/6] lib/eal: implement the family of rte bit operation APIs Joyce Kong
                   ` (12 subsequent siblings)
  33 siblings, 0 replies; 70+ messages in thread
From: Joyce Kong @ 2019-11-20 10:12 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

There are a lot functions of bit operations scattered in PMDs,
consolidate them into a common API family and applied in different
PMDs to reduce code duplication.

v4:
  Introduce uint32_t/uint64_t *addr when definiting bit operation APIs(suggested by
  Morten Brørup).

v3:
  1. Change the API's head file back to rte_bitops.h, then implement both 32-bit and
     64-bit operations with and without C11 atomic memory ordering.
  2. Add multi-core test case for bit operations which implemented with memory ordering.
  3. Modify the doc of both APIs and test cases.

v2:
  1. Add doxygen comments for the rte bit operation API(suggested by Stephen Hemminger).
  2. Add test cases for common rte bit operation API(suggested by Stephen Hemminger).
  3. Change the header file to rte_io_bitops.h and the operation to rte_io_set_bit()etc.,
     as the API uses barriers inside and the barriers are only needed for IO operations
     (suggested by Jerin Jacob).
  4. Use an well defined uint_NN_t type(suggested by Morten Brørup).

Joyce Kong (6):
  lib/eal: implement the family of rte bit operation APIs
  test/bitops: add bit operation test case
  net/axgbe: use common rte bit operation APIs instead
  net/bnx2x: use common rte bit operation APIs instead
  net/qede: use common rte bit operation APIs instead
  net/hinic: use common rte bit operation APIs instead

 MAINTAINERS                                |   5 +
 app/test/Makefile                          |   1 +
 app/test/autotest_data.py                  |   6 +
 app/test/meson.build                       |   2 +
 app/test/test_bitops.c                     | 305 +++++++++++++++++++
 doc/api/doxy-api-index.md                  |   5 +-
 drivers/net/axgbe/axgbe_common.h           |  29 +-
 drivers/net/axgbe/axgbe_ethdev.c           |  14 +-
 drivers/net/axgbe/axgbe_mdio.c             |  14 +-
 drivers/net/bnx2x/bnx2x.c                  | 209 ++++++-------
 drivers/net/bnx2x/bnx2x.h                  |   4 -
 drivers/net/bnx2x/ecore_sp.h               |   9 +-
 drivers/net/hinic/Makefile                 |   1 +
 drivers/net/hinic/base/hinic_compat.h      |  33 +-
 drivers/net/hinic/hinic_pmd_ethdev.c       |  16 +-
 drivers/net/hinic/meson.build              |   2 +
 drivers/net/qede/base/bcm_osal.c           |  20 --
 drivers/net/qede/base/bcm_osal.h           |  10 +-
 lib/librte_eal/common/Makefile             |   1 +
 lib/librte_eal/common/include/rte_bitops.h | 474 +++++++++++++++++++++++++++++
 lib/librte_eal/common/meson.build          |   3 +-
 21 files changed, 928 insertions(+), 235 deletions(-)
 create mode 100644 app/test/test_bitops.c
 create mode 100644 lib/librte_eal/common/include/rte_bitops.h

-- 
2.7.4


^ permalink raw reply	[flat|nested] 70+ messages in thread

* [dpdk-dev] [PATCH v4 1/6] lib/eal: implement the family of rte bit operation APIs
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (20 preceding siblings ...)
  2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 0/6] implement common rte bit operation APIs in PMDs Joyce Kong
@ 2019-11-20 10:12 ` Joyce Kong
  2019-11-20 13:40   ` [dpdk-dev] [PATCH v4 1/6] lib/eal: implement the family of rte bitoperation APIs Morten Brørup
  2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 2/6] test/bitops: add bit operation test case Joyce Kong
                   ` (11 subsequent siblings)
  33 siblings, 1 reply; 70+ messages in thread
From: Joyce Kong @ 2019-11-20 10:12 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

There are a lot functions of bit operations scattered and
duplicated in PMDs, consolidating them into a common API
family is necessary. Furthermore, when the bit operation
is applied to the IO devices, use __ATOMIC_ACQ_REL to
ensure the ordering for io bit operation.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
---
 MAINTAINERS                                |   5 +
 doc/api/doxy-api-index.md                  |   5 +-
 lib/librte_eal/common/Makefile             |   1 +
 lib/librte_eal/common/include/rte_bitops.h | 474 +++++++++++++++++++++++++++++
 lib/librte_eal/common/meson.build          |   3 +-
 5 files changed, 485 insertions(+), 3 deletions(-)
 create mode 100644 lib/librte_eal/common/include/rte_bitops.h

diff --git a/MAINTAINERS b/MAINTAINERS
index f2fdb93..4ee2712 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -233,6 +233,11 @@ M: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
 F: lib/librte_eal/common/include/rte_bitmap.h
 F: app/test/test_bitmap.c
 
+Bitops
+M: Joyce Kong <joyce.kong@arm.com>
+F: lib/librte_eal/common/include/rte_bitops.h
+F: app/test/test_bitops.c
+
 MCSlock - EXPERIMENTAL
 M: Phil Yang <phil.yang@arm.com>
 F: lib/librte_eal/common/include/generic/rte_mcslock.h
diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md
index dff496b..ade7c01 100644
--- a/doc/api/doxy-api-index.md
+++ b/doc/api/doxy-api-index.md
@@ -133,12 +133,13 @@ The public API headers are grouped by topics:
   [BPF]                (@ref rte_bpf.h)
 
 - **containers**:
+  [bitmap]             (@ref rte_bitmap.h),
+  [bitops]             (@ref rte_bitops.h),
   [mbuf]               (@ref rte_mbuf.h),
   [mbuf pool ops]      (@ref rte_mbuf_pool_ops.h),
   [ring]               (@ref rte_ring.h),
   [stack]              (@ref rte_stack.h),
-  [tailq]              (@ref rte_tailq.h),
-  [bitmap]             (@ref rte_bitmap.h)
+  [tailq]              (@ref rte_tailq.h)
 
 - **packet framework**:
   * [port]             (@ref rte_port.h):
diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index c2c6d92..dd025c1 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -19,6 +19,7 @@ INC += rte_malloc.h rte_keepalive.h rte_time.h
 INC += rte_service.h rte_service_component.h
 INC += rte_bitmap.h rte_vfio.h rte_hypervisor.h rte_test.h
 INC += rte_reciprocal.h rte_fbarray.h rte_uuid.h
+INC += rte_bitops.h
 
 GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h
 GENERIC_INC += rte_memcpy.h rte_cpuflags.h
diff --git a/lib/librte_eal/common/include/rte_bitops.h b/lib/librte_eal/common/include/rte_bitops.h
new file mode 100644
index 0000000..34158d1
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_bitops.h
@@ -0,0 +1,474 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#ifndef _RTE_BITOPS_H_
+#define _RTE_BITOPS_H_
+
+/**
+ * @file
+ * Bit Operations
+ *
+ * This file defines a API for bit operations without/with memory ordering.
+ */
+
+#include <stdint.h>
+#include <rte_debug.h>
+#include <rte_compat.h>
+
+/*---------------------------- 32 bit operations ----------------------------*/
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Get the target bit from a 32-bit value without memory ordering.
+ *
+ * @param nr
+ *   The target bit to get.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The target bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_get_bit32_relaxed(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	return __atomic_load_n(addr, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Set the target bit in a 32-bit value to 1 without memory ordering.
+ *
+ * @param nr
+ *   The target bit to set.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_set_bit32_relaxed(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	__atomic_fetch_or(addr, mask, __ATOMIC_RELAXED);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Clear the target bit in a 32-bit value to 0 without memory ordering.
+ *
+ * @param nr
+ *   The target bit to clear.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_clear_bit32_relaxed(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	__atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 32-bit value, then set it to 1 without
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and set.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_test_and_set_bit32_relaxed(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	return __atomic_fetch_or(addr, mask, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 32-bit value, then clear it to 0 without
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and clear.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_test_and_clear_bit32_relaxed(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	return __atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Get the target bit from a 32-bit value with memory ordering.
+ *
+ * @param nr
+ *   The target bit to get.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The target bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_get_bit32(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	return __atomic_load_n(addr, __ATOMIC_ACQUIRE) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Set the target bit in a 32-bit value to 1 with memory ordering.
+ *
+ * @param nr
+ *   The target bit to set.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_set_bit32(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	__atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Clear the target bit in a 32-bit value to 0 with memory ordering.
+ *
+ * @param nr
+ *   The target bit to clear.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_clear_bit32(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	__atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 32-bit value, then set it to 1 with
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and set.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_test_and_set_bit32(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 32-bit value, then clear it to 0 with
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and clear.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_test_and_clear_bit32(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) & mask;
+}
+
+/*---------------------------- 64 bit operations ----------------------------*/
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Get the target bit from a 64-bit value without memory ordering.
+ *
+ * @param nr
+ *   The target bit to get.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The target bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_get_bit64_relaxed(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	return __atomic_load_n(addr, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Set the target bit in a 64-bit value to 1 without memory ordering.
+ *
+ * @param nr
+ *   The target bit to set.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_set_bit64_relaxed(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	__atomic_fetch_or(addr, mask, __ATOMIC_RELAXED);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Clear the target bit in a 64-bit value to 0 without memory ordering.
+ *
+ * @param nr
+ *   The target bit to clear.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_clear_bit64_relaxed(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	__atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 64-bit value, then set it to 1 without
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and set.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_test_and_set_bit64_relaxed(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	return __atomic_fetch_or(addr, mask, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 64-bit value, then clear it to 0 without
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and clear.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_test_and_clear_bit64_relaxed(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	return __atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Get the target bit from a 64-bit value with memory ordering.
+ *
+ * @param nr
+ *   The target bit to get.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The target bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_get_bit64(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	return __atomic_load_n(addr, __ATOMIC_ACQUIRE) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Set the target bit in a 64-bit value to 1 with memory ordering.
+ *
+ * @param nr
+ *   The target bit to set.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_set_bit64(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	__atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Clear the target bit in a 64-bit value to 0 with memory ordering.
+ *
+ * @param nr
+ *   The target bit to clear.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_clear_bit64(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	__atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 64-bit value, then set it to 1 with
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and set.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_test_and_set_bit64(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 64-bit value, then clear it to 0 with
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and clear.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_test_and_clear_bit64(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) & mask;
+}
+#endif /* _RTE_BITOPS_H_ */
diff --git a/lib/librte_eal/common/meson.build b/lib/librte_eal/common/meson.build
index d6a149b..8a5197b 100644
--- a/lib/librte_eal/common/meson.build
+++ b/lib/librte_eal/common/meson.build
@@ -50,9 +50,10 @@ common_objs += eal_common_arch_objs
 
 common_headers = files(
 	'include/rte_alarm.h',
+	'include/rte_bitmap.h',
+	'include/rte_bitops.h',
 	'include/rte_branch_prediction.h',
 	'include/rte_bus.h',
-	'include/rte_bitmap.h',
 	'include/rte_class.h',
 	'include/rte_common.h',
 	'include/rte_compat.h',
-- 
2.7.4


^ permalink raw reply	[flat|nested] 70+ messages in thread

* [dpdk-dev] [PATCH v4 2/6] test/bitops: add bit operation test case
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (21 preceding siblings ...)
  2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 1/6] lib/eal: implement the family of rte bit operation APIs Joyce Kong
@ 2019-11-20 10:12 ` Joyce Kong
  2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 3/6] net/axgbe: use common rte bit operation APIs instead Joyce Kong
                   ` (10 subsequent siblings)
  33 siblings, 0 replies; 70+ messages in thread
From: Joyce Kong @ 2019-11-20 10:12 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

Add test cases for set bit, clear bit, test and set bit,
test and clear bit operations.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
---
 app/test/Makefile         |   1 +
 app/test/autotest_data.py |   6 +
 app/test/meson.build      |   2 +
 app/test/test_bitops.c    | 305 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 314 insertions(+)
 create mode 100644 app/test/test_bitops.c

diff --git a/app/test/Makefile b/app/test/Makefile
index 57930c0..4f33274 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -70,6 +70,7 @@ SRCS-y += test_ticketlock.c
 SRCS-y += test_memory.c
 SRCS-y += test_memzone.c
 SRCS-y += test_bitmap.c
+SRCS-y += test_bitops.c
 SRCS-y += test_reciprocal_division.c
 SRCS-y += test_reciprocal_division_perf.c
 SRCS-y += test_fbarray.c
diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py
index 6deb97b..7db2df1 100644
--- a/app/test/autotest_data.py
+++ b/app/test/autotest_data.py
@@ -405,6 +405,12 @@
         "Report":  None,
     },
     {
+        "Name":    "Bitops test",
+        "Command": "bitops_autotest",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
+    {
         "Name":    "Hash multiwriter autotest",
         "Command": "hash_multiwriter_autotest",
         "Func":    default_autotest,
diff --git a/app/test/meson.build b/app/test/meson.build
index ff59c31..33b4135 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -14,6 +14,7 @@ test_sources = files('commands.c',
 	'test_atomic.c',
 	'test_barrier.c',
 	'test_bitratestats.c',
+	'test_bitops.c',
 	'test_bpf.c',
 	'test_byteorder.c',
 	'test_cmdline.c',
@@ -167,6 +168,7 @@ fast_test_names = [
         'alarm_autotest',
         'atomic_autotest',
         'byteorder_autotest',
+        'bitops_autotest',
         'cmdline_autotest',
         'common_autotest',
         'cpuflags_autotest',
diff --git a/app/test/test_bitops.c b/app/test/test_bitops.c
new file mode 100644
index 0000000..3859ca8
--- /dev/null
+++ b/app/test/test_bitops.c
@@ -0,0 +1,305 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#include <rte_bitops.h>
+#include <rte_launch.h>
+#include "test.h"
+
+uint32_t val32;
+uint64_t val64;
+unsigned int synchro;
+unsigned int count32;
+unsigned int count64;
+
+#define MAX_BITS_32 32
+#define MAX_BITS_64 64
+/*
+ * Bitops functions
+ * ================
+ *
+ * - The main test function performs several subtests.
+ * - For relaxed version, check bit operations on one core.
+ *   - Initialize valXX to specified values, then set each bit of valXX
+ *     to 1 one by one in "test_bitops_set_relaxed".
+ *
+ *   - Clear each bit of valXX to 0 one by one in "test_bitops_clear_relaxed".
+ *
+ *   - Function "test_bitops_test_set_clear_relaxed" checks whether each bit
+ *     of valXX can do "test and set" and "test and clear" correctly.
+ *
+ * - For C11 atomic barrier version, check bit operations on multi cores.
+ *   - Per bit of valXX is set to 1, then cleared to 0 on each core in
+ *     "test_bitops_set_clear". The function checks that once all lcores finish
+ *     their set_clear, the value of valXX would still be zero.
+ *
+ *   - The cores are waiting for a synchro which is triggered by the main test
+ *     function. Then all cores would do "rte_test_and_set_bitXX" or
+ *     "rte_test_and_clear_bitXX" at the same time, "countXX" which is checked
+ *     as the result later would inc by one or not according to the original
+ *     bit value.
+ *
+ */
+
+static int
+test_bitops_set_relaxed(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_set_bit32_relaxed(i, &val32);
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (!rte_get_bit32_relaxed(i, &val32)) {
+			printf("Failed to set bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_set_bit64_relaxed(i, &val64);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (!rte_get_bit64_relaxed(i, &val64)) {
+			printf("Failed to set bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_bitops_clear_relaxed(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_clear_bit32_relaxed(i, &val32);
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (rte_get_bit32_relaxed(i, &val32)) {
+			printf("Failed to clear bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_clear_bit64_relaxed(i, &val64);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (rte_get_bit64_relaxed(i, &val64)) {
+			printf("Failed to clear bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_bitops_test_set_clear_relaxed(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_test_and_set_bit32_relaxed(i, &val32);
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (!rte_test_and_clear_bit32_relaxed(i, &val32)) {
+			printf("Failed to set and test bit in relaxed version.\n");
+			return TEST_FAILED;
+	}
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (rte_get_bit32_relaxed(i, &val32)) {
+			printf("Failed to test and clear bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_test_and_set_bit64_relaxed(i, &val64);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (!rte_test_and_clear_bit64_relaxed(i, &val64)) {
+			printf("Failed to set and test bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (rte_get_bit64_relaxed(i, &val64)) {
+			printf("Failed to test and clear bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_bitops_set_clear(__attribute__((unused)) void *arg)
+{
+	while (__atomic_load_n(&synchro, __ATOMIC_RELAXED) == 0)
+		;
+
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_set_bit32(i, &val32);
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_clear_bit32(i, &val32);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_set_bit64(i, &val64);
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_clear_bit64(i, &val64);
+
+	return TEST_SUCCESS;
+}
+
+/*
+ * rte_test_and_set_bitXX() returns the original bit value, then set it to 1.
+ * This functions checks that if the target bit is equal to 0, set it to 1 and
+ * increase the variable of "countXX" by one. If it is equal to 1, do nothing
+ * for "countXX". The value of "countXX" would be checked as the result later.
+ */
+static int
+test_bitops_test_set(__attribute__((unused)) void *arg)
+
+{
+	while (__atomic_load_n(&synchro, __ATOMIC_RELAXED) == 0)
+		;
+
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (!rte_test_and_set_bit32(i, &val32))
+			__atomic_fetch_add(&count32, 1, __ATOMIC_ACQ_REL);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (!rte_test_and_set_bit64(i, &val64))
+			__atomic_fetch_add(&count64, 1, __ATOMIC_ACQ_REL);
+
+	return TEST_SUCCESS;
+}
+
+/*
+ * rte_test_and_set_bitXX() returns the original bit value, then clear it to 0.
+ * This functions checks that if the target bit is equal to 1, clear it to 0 and
+ * increase the variable of "countXX" by one. If it is equal to 0, do nothing
+ * for "countXX". The value of "countXX" would be checked as the result later.
+ */
+static int
+test_bitops_test_clear(__attribute__((unused)) void *arg)
+
+{
+	while (__atomic_load_n(&synchro, __ATOMIC_RELAXED) == 0)
+		;
+
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (rte_test_and_clear_bit32(i, &val32))
+			__atomic_fetch_add(&count32, 1, __ATOMIC_ACQ_REL);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (rte_test_and_clear_bit64(i, &val64))
+			__atomic_fetch_add(&count64, 1, __ATOMIC_ACQ_REL);
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_bitops(void)
+{
+	__atomic_store_n(&val32, 0, __ATOMIC_RELAXED);
+	__atomic_store_n(&val64, 0, __ATOMIC_RELAXED);
+	__atomic_store_n(&synchro, 0,  __ATOMIC_RELAXED);
+	__atomic_store_n(&count32, 0, __ATOMIC_RELAXED);
+	__atomic_store_n(&count64, 0, __ATOMIC_RTELAXED);
+
+	if (test_bitops_set_relaxed() < 0)
+		return TEST_FAILED;
+
+	if (test_bitops_clear_relaxed() < 0)
+		return TEST_FAILED;
+
+	if (test_bitops_test_set_clear_relaxed() < 0)
+		return TEST_FAILED;
+
+
+	rte_eal_mp_remote_launch(test_bitops_set_clear, NULL, SKIP_MASTER);
+	__atomic_store_n(&synchro, 1,  __ATOMIC_RELAXED);
+	rte_eal_mp_wait_lcore();
+	__atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (rte_get_bit32(i, &val32)) {
+			printf("Failed to set and clear bit on multi cores.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (rte_get_bit64(i, &val64)) {
+			printf("Failed to set and clear bit on multi cores.\n");
+			return TEST_FAILED;
+		}
+
+	/*
+	 * Launch all slave lcores to do "rte_bitops_test_and_set_bitXX"
+	 * respectively.
+	 * Each lcore should have MAX_BITS_XX chances to check the target bit.
+	 * If it's equal to 0, set it to 1 and "countXX (which is initialized
+	 * to 0)" would be increased by one. If the target bit is 1, still set
+	 * it to 1 and do nothing for "countXX". There would be only one lcore
+	 * that finds the target bit is 0.
+	 * If the final value of "countXX" is equal to MAX_BITS_XX, all slave
+	 * lcores performed "rte_bitops_test_and_set_bitXX" correctly.
+	 */
+	__atomic_store_n(&count32, 0, __ATOMIC_RELAXED);
+	__atomic_store_n(&count64, 0, __ATOMIC_RELAXED);
+
+	rte_eal_mp_remote_launch(test_bitops_test_set, NULL, SKIP_MASTER);
+	__atomic_store_n(&synchro, 1,  __ATOMIC_RELAXED);
+	rte_eal_mp_wait_lcore();
+	__atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+
+	if (__atomic_load_n(&count32, __ATOMIC_RELAXED) != MAX_BITS_32) {
+		printf("Failed to test and set on multi cores.\n");
+		return TEST_FAILED;
+	}
+	if (__atomic_load_n(&count64, __ATOMIC_RELAXED) != MAX_BITS_64) {
+		printf("Failed to test and set on multi cores.\n");
+		return TEST_FAILED;
+	}
+
+	/*
+	 * Launch all slave lcores to do "rte_bitops_test_and_clear_bitXX"
+	 * respectively.
+	 * Each lcore should have MAX_BITS_XX chances to check the target bit.
+	 * If it's equal to 1, clear it to 0 and "countXX (which is initialized
+	 * to 0)" would be increased by one. If the target bit is 0, still clear
+	 * it to 0 and do nothing for "countXX". There would be only one lcore
+	 * that finds the target bit is 1.
+	 * If the final value of "countXX" is equal to MAX_BITS_XX, all slave
+	 * lcores performed "rte_bitops_test_and_clear_bitXX" correctly.
+	 */
+
+	__atomic_store_n(&count32, 0, __ATOMIC_RELAXED);
+	__atomic_store_n(&count64, 0, __ATOMIC_RELAXED);
+
+	rte_eal_mp_remote_launch(test_bitops_test_clear, NULL, SKIP_MASTER);
+	__atomic_store_n(&synchro, 1,  __ATOMIC_RELAXED);
+	rte_eal_mp_wait_lcore();
+	__atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+
+	if (__atomic_load_n(&count32, __ATOMIC_RELAXED) != MAX_BITS_32) {
+		printf("Failed to test and clear on multi cores.\n");
+		return TEST_FAILED;
+	}
+	if (__atomic_load_n(&count64, __ATOMIC_RELAXED) != MAX_BITS_64) {
+		printf("Failed to test and clear on multi cores.\n");
+		return TEST_FAILED;
+	}
+
+	return TEST_SUCCESS;
+}
+
+REGISTER_TEST_COMMAND(bitops_autotest, test_bitops);
-- 
2.7.4


^ permalink raw reply	[flat|nested] 70+ messages in thread

* [dpdk-dev] [PATCH v4 3/6] net/axgbe: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (22 preceding siblings ...)
  2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 2/6] test/bitops: add bit operation test case Joyce Kong
@ 2019-11-20 10:12 ` Joyce Kong
  2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 4/6] net/bnx2x: " Joyce Kong
                   ` (9 subsequent siblings)
  33 siblings, 0 replies; 70+ messages in thread
From: Joyce Kong @ 2019-11-20 10:12 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/axgbe/axgbe_common.h | 29 +----------------------------
 drivers/net/axgbe/axgbe_ethdev.c | 14 +++++++-------
 drivers/net/axgbe/axgbe_mdio.c   | 14 +++++++-------
 3 files changed, 15 insertions(+), 42 deletions(-)

diff --git a/drivers/net/axgbe/axgbe_common.h b/drivers/net/axgbe/axgbe_common.h
index 34f60f1..9cabda8 100644
--- a/drivers/net/axgbe/axgbe_common.h
+++ b/drivers/net/axgbe/axgbe_common.h
@@ -22,6 +22,7 @@
 #include <pthread.h>
 
 #include <rte_byteorder.h>
+#include <rte_bitops.h>
 #include <rte_memory.h>
 #include <rte_malloc.h>
 #include <rte_hexdump.h>
@@ -1674,34 +1675,6 @@ do {									\
 #define time_after_eq(a, b)     ((long)((a) - (b)) >= 0)
 #define time_before_eq(a, b)	time_after_eq(b, a)
 
-/*---bitmap support apis---*/
-static inline int axgbe_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	rte_mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	rte_mb();
-	return res;
-}
-
-static inline void axgbe_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-static inline void axgbe_clear_bit(int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-static inline int axgbe_test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-
-	return __sync_fetch_and_and(addr, ~mask) & mask;
-}
-
 static inline unsigned long msecs_to_timer_cycles(unsigned int m)
 {
 	return rte_get_timer_hz() * (m / 1000);
diff --git a/drivers/net/axgbe/axgbe_ethdev.c b/drivers/net/axgbe/axgbe_ethdev.c
index d1f160e..fa597f3 100644
--- a/drivers/net/axgbe/axgbe_ethdev.c
+++ b/drivers/net/axgbe/axgbe_ethdev.c
@@ -201,8 +201,8 @@ axgbe_dev_start(struct rte_eth_dev *dev)
 	axgbe_dev_enable_tx(dev);
 	axgbe_dev_enable_rx(dev);
 
-	axgbe_clear_bit(AXGBE_STOPPED, &pdata->dev_state);
-	axgbe_clear_bit(AXGBE_DOWN, &pdata->dev_state);
+	rte_clear_bit64(AXGBE_STOPPED, &pdata->dev_state);
+	rte_clear_bit64(AXGBE_DOWN, &pdata->dev_state);
 	return 0;
 }
 
@@ -216,17 +216,17 @@ axgbe_dev_stop(struct rte_eth_dev *dev)
 
 	rte_intr_disable(&pdata->pci_dev->intr_handle);
 
-	if (axgbe_test_bit(AXGBE_STOPPED, &pdata->dev_state))
+	if (rte_get_bit64(AXGBE_STOPPED, &pdata->dev_state))
 		return;
 
-	axgbe_set_bit(AXGBE_STOPPED, &pdata->dev_state);
+	rte_set_bit64(AXGBE_STOPPED, &pdata->dev_state);
 	axgbe_dev_disable_tx(dev);
 	axgbe_dev_disable_rx(dev);
 
 	pdata->phy_if.phy_stop(pdata);
 	pdata->hw_if.exit(pdata);
 	memset(&dev->data->dev_link, 0, sizeof(struct rte_eth_link));
-	axgbe_set_bit(AXGBE_DOWN, &pdata->dev_state);
+	rte_set_bit64(AXGBE_DOWN, &pdata->dev_state);
 }
 
 /* Clear all resources like TX/RX queues. */
@@ -598,8 +598,8 @@ eth_axgbe_dev_init(struct rte_eth_dev *eth_dev)
 
 	pdata = eth_dev->data->dev_private;
 	/* initial state */
-	axgbe_set_bit(AXGBE_DOWN, &pdata->dev_state);
-	axgbe_set_bit(AXGBE_STOPPED, &pdata->dev_state);
+	rte_set_bit64(AXGBE_DOWN, &pdata->dev_state);
+	rte_set_bit64(AXGBE_STOPPED, &pdata->dev_state);
 	pdata->eth_dev = eth_dev;
 
 	pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
diff --git a/drivers/net/axgbe/axgbe_mdio.c b/drivers/net/axgbe/axgbe_mdio.c
index 2721e5c..00394a7 100644
--- a/drivers/net/axgbe/axgbe_mdio.c
+++ b/drivers/net/axgbe/axgbe_mdio.c
@@ -743,7 +743,7 @@ static int __axgbe_phy_config_aneg(struct axgbe_port *pdata)
 {
 	int ret;
 
-	axgbe_set_bit(AXGBE_LINK_INIT, &pdata->dev_state);
+	rte_set_bit64(AXGBE_LINK_INIT, &pdata->dev_state);
 	pdata->link_check = rte_get_timer_cycles();
 
 	ret = pdata->phy_if.phy_impl.an_config(pdata);
@@ -807,9 +807,9 @@ static int axgbe_phy_config_aneg(struct axgbe_port *pdata)
 
 	ret = __axgbe_phy_config_aneg(pdata);
 	if (ret)
-		axgbe_set_bit(AXGBE_LINK_ERR, &pdata->dev_state);
+		rte_set_bit64(AXGBE_LINK_ERR, &pdata->dev_state);
 	else
-		axgbe_clear_bit(AXGBE_LINK_ERR, &pdata->dev_state);
+		rte_clear_bit64(AXGBE_LINK_ERR, &pdata->dev_state);
 
 	pthread_mutex_unlock(&pdata->an_mutex);
 
@@ -880,7 +880,7 @@ static void axgbe_phy_status(struct axgbe_port *pdata)
 	unsigned int link_aneg;
 	int an_restart;
 
-	if (axgbe_test_bit(AXGBE_LINK_ERR, &pdata->dev_state)) {
+	if (rte_get_bit64(AXGBE_LINK_ERR, &pdata->dev_state)) {
 		pdata->phy.link = 0;
 		goto adjust_link;
 	}
@@ -900,10 +900,10 @@ static void axgbe_phy_status(struct axgbe_port *pdata)
 			return;
 		}
 		axgbe_phy_status_result(pdata);
-		if (axgbe_test_bit(AXGBE_LINK_INIT, &pdata->dev_state))
-			axgbe_clear_bit(AXGBE_LINK_INIT, &pdata->dev_state);
+		if (rte_get_bit64(AXGBE_LINK_INIT, &pdata->dev_state))
+			rte_clear_bit64(AXGBE_LINK_INIT, &pdata->dev_state);
 	} else {
-		if (axgbe_test_bit(AXGBE_LINK_INIT, &pdata->dev_state)) {
+		if (rte_get_bit64(AXGBE_LINK_INIT, &pdata->dev_state)) {
 			axgbe_check_link_timeout(pdata);
 
 			if (link_aneg)
-- 
2.7.4


^ permalink raw reply	[flat|nested] 70+ messages in thread

* [dpdk-dev] [PATCH v4 4/6] net/bnx2x: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (23 preceding siblings ...)
  2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 3/6] net/axgbe: use common rte bit operation APIs instead Joyce Kong
@ 2019-11-20 10:12 ` " Joyce Kong
  2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 5/6] net/qede: " Joyce Kong
                   ` (8 subsequent siblings)
  33 siblings, 0 replies; 70+ messages in thread
From: Joyce Kong @ 2019-11-20 10:12 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/bnx2x/bnx2x.c    | 209 +++++++++++++++++++------------------------
 drivers/net/bnx2x/bnx2x.h    |   4 -
 drivers/net/bnx2x/ecore_sp.h |   9 +-
 3 files changed, 98 insertions(+), 124 deletions(-)

diff --git a/drivers/net/bnx2x/bnx2x.c b/drivers/net/bnx2x/bnx2x.c
index ed31335..1c00a67 100644
--- a/drivers/net/bnx2x/bnx2x.c
+++ b/drivers/net/bnx2x/bnx2x.c
@@ -26,6 +26,7 @@
 #include <fcntl.h>
 #include <zlib.h>
 #include <rte_string_fns.h>
+#include <rte_bitops.h>
 
 #define BNX2X_PMD_VER_PREFIX "BNX2X PMD"
 #define BNX2X_PMD_VERSION_MAJOR 1
@@ -129,32 +130,6 @@ static void bnx2x_ack_sb(struct bnx2x_softc *sc, uint8_t igu_sb_id,
 			 uint8_t storm, uint16_t index, uint8_t op,
 			 uint8_t update);
 
-int bnx2x_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	mb();
-	return res;
-}
-
-void bnx2x_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-void bnx2x_clear_bit(int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-int bnx2x_test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-	return __sync_fetch_and_and(addr, ~mask) & mask;
-}
-
 int bnx2x_cmpxchg(volatile int *addr, int old, int new)
 {
 	return __sync_val_compare_and_swap(addr, old, new);
@@ -1427,11 +1402,11 @@ bnx2x_del_all_macs(struct bnx2x_softc *sc, struct ecore_vlan_mac_obj *mac_obj,
 
 	/* wait for completion of requested */
 	if (wait_for_comp) {
-		bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+		rte_set_bit64(RAMROD_COMP_WAIT, &ramrod_flags);
 	}
 
 	/* Set the mac type of addresses we want to clear */
-	bnx2x_set_bit(mac_type, &vlan_mac_flags);
+	rte_set_bit64(mac_type, &vlan_mac_flags);
 
 	rc = mac_obj->delete_all(sc, mac_obj, &vlan_mac_flags, &ramrod_flags);
 	if (rc < 0)
@@ -1458,26 +1433,26 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 		break;
 
 	case BNX2X_RX_MODE_NORMAL:
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_MULTICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_UNICAST, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_MULTICAST, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
 
 		/* internal switching mode */
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_MULTICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_UNICAST, tx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_MULTICAST, tx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
 
 		break;
 
 	case BNX2X_RX_MODE_ALLMULTI:
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_UNICAST, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
 
 		/* internal switching mode */
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_UNICAST, tx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
 
 		break;
 
@@ -1488,19 +1463,20 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 		 * should receive matched and unmatched (in resolution of port)
 		 * unicast packets.
 		 */
-		bnx2x_set_bit(ECORE_ACCEPT_UNMATCHED, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_UNMATCHED, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_UNICAST, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
 
 		/* internal switching mode */
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
 
 		if (IS_MF_SI(sc)) {
-			bnx2x_set_bit(ECORE_ACCEPT_ALL_UNICAST, tx_accept_flags);
+			rte_set_bit64(ECORE_ACCEPT_ALL_UNICAST,
+					tx_accept_flags);
 		} else {
-			bnx2x_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
+			rte_set_bit64(ECORE_ACCEPT_UNICAST, tx_accept_flags);
 		}
 
 		break;
@@ -1512,8 +1488,8 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 
 	/* Set ACCEPT_ANY_VLAN as we do not enable filtering by VLAN */
 	if (rx_mode != BNX2X_RX_MODE_NONE) {
-		bnx2x_set_bit(ECORE_ACCEPT_ANY_VLAN, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ANY_VLAN, tx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_ANY_VLAN, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_ANY_VLAN, tx_accept_flags);
 	}
 
 	return 0;
@@ -1542,7 +1518,7 @@ bnx2x_set_q_rx_mode(struct bnx2x_softc *sc, uint8_t cl_id,
 	ramrod_param.rdata = BNX2X_SP(sc, rx_mode_rdata);
 	ramrod_param.rdata_mapping =
 	    (rte_iova_t)BNX2X_SP_MAPPING(sc, rx_mode_rdata),
-	    bnx2x_set_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
+	    rte_set_bit64(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
 
 	ramrod_param.ramrod_flags = ramrod_flags;
 	ramrod_param.rx_mode_flags = rx_mode_flags;
@@ -1571,9 +1547,9 @@ int bnx2x_set_storm_rx_mode(struct bnx2x_softc *sc)
 		return rc;
 	}
 
-	bnx2x_set_bit(RAMROD_RX, &ramrod_flags);
-	bnx2x_set_bit(RAMROD_TX, &ramrod_flags);
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	rte_set_bit64(RAMROD_RX, &ramrod_flags);
+	rte_set_bit64(RAMROD_TX, &ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &ramrod_flags);
 
 	return bnx2x_set_q_rx_mode(sc, sc->fp[0].cl_id, rx_mode_flags,
 				 rx_accept_flags, tx_accept_flags,
@@ -1698,7 +1674,7 @@ static int bnx2x_func_wait_started(struct bnx2x_softc *sc)
 			    "Forcing STARTED-->TX_STOPPED-->STARTED");
 
 		func_params.f_obj = &sc->func_obj;
-		bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
+		rte_set_bit64(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
 
 		/* STARTED-->TX_STOPPED */
 		func_params.cmd = ECORE_F_CMD_TX_STOP;
@@ -1722,7 +1698,7 @@ static int bnx2x_stop_queue(struct bnx2x_softc *sc, int index)
 
 	q_params.q_obj = &sc->sp_objs[fp->index].q_obj;
 	/* We want to wait for completion in this context */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
 
 	/* Stop the primary connection: */
 
@@ -1783,7 +1759,7 @@ static int bnx2x_func_stop(struct bnx2x_softc *sc)
 	int rc;
 
 	/* prepare parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_STOP;
 
@@ -1797,7 +1773,7 @@ static int bnx2x_func_stop(struct bnx2x_softc *sc)
 	if (rc) {
 		PMD_DRV_LOG(NOTICE, sc, "FUNC_STOP ramrod failed. "
 			    "Running a dry transaction");
-		bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
+		rte_set_bit64(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
 		return ecore_func_state_change(sc, &func_params);
 	}
 
@@ -1809,7 +1785,7 @@ static int bnx2x_reset_hw(struct bnx2x_softc *sc, uint32_t load_code)
 	struct ecore_func_state_params func_params = { NULL };
 
 	/* Prepare parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_HW_RESET;
@@ -1866,11 +1842,10 @@ bnx2x_chip_cleanup(struct bnx2x_softc *sc, uint32_t unload_mode, uint8_t keep_li
 	 * a race between the completion code and this code.
 	 */
 
-	if (bnx2x_test_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state)) {
-		bnx2x_set_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state);
-	} else {
+	if (rte_get_bit64(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state))
+		rte_set_bit64(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state);
+	else
 		bnx2x_set_storm_rx_mode(sc);
-	}
 
 	/* Clean up multicast configuration */
 	rparam.mcast_obj = &sc->mcast_obj;
@@ -1960,12 +1935,12 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 	/* Cleanup MACs' object first... */
 
 	/* Wait for completion of requested */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &ramrod_flags);
 	/* Perform a dry cleanup */
-	bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &ramrod_flags);
+	rte_set_bit64(RAMROD_DRV_CLR_ONLY, &ramrod_flags);
 
 	/* Clean ETH primary MAC */
-	bnx2x_set_bit(ECORE_ETH_MAC, &vlan_mac_flags);
+	rte_set_bit64(ECORE_ETH_MAC, &vlan_mac_flags);
 	rc = mac_obj->delete_all(sc, &sc->sp_objs->mac_obj, &vlan_mac_flags,
 				 &ramrod_flags);
 	if (rc != 0) {
@@ -1974,7 +1949,7 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 
 	/* Cleanup UC list */
 	vlan_mac_flags = 0;
-	bnx2x_set_bit(ECORE_UC_LIST_MAC, &vlan_mac_flags);
+	rte_set_bit64(ECORE_UC_LIST_MAC, &vlan_mac_flags);
 	rc = mac_obj->delete_all(sc, mac_obj, &vlan_mac_flags, &ramrod_flags);
 	if (rc != 0) {
 		PMD_DRV_LOG(NOTICE, sc,
@@ -1984,7 +1959,7 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 	/* Now clean mcast object... */
 
 	rparam.mcast_obj = &sc->mcast_obj;
-	bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags);
+	rte_set_bit64(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags);
 
 	/* Add a DEL command... */
 	rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_DEL);
@@ -4288,7 +4263,7 @@ bnx2x_handle_classification_eqe(struct bnx2x_softc *sc, union event_ring_elem *e
 	struct ecore_vlan_mac_obj *vlan_mac_obj;
 
 	/* always push next commands out, don't wait here */
-	bnx2x_set_bit(RAMROD_CONT, &ramrod_flags);
+	rte_set_bit64(RAMROD_CONT, &ramrod_flags);
 
 	switch (le32toh(elem->message.data.eth_event.echo) >> BNX2X_SWCID_SHIFT) {
 	case ECORE_FILTER_MAC_PENDING:
@@ -4319,12 +4294,12 @@ bnx2x_handle_classification_eqe(struct bnx2x_softc *sc, union event_ring_elem *e
 
 static void bnx2x_handle_rx_mode_eqe(struct bnx2x_softc *sc)
 {
-	bnx2x_clear_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
+	rte_clear_bit64(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
 
 	/* send rx_mode command again if was requested */
-	if (bnx2x_test_and_clear_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state)) {
+	if (rte_test_and_clear_bit64(ECORE_FILTER_RX_MODE_SCHED,
+						&sc->sp_state))
 		bnx2x_set_storm_rx_mode(sc);
-	}
 }
 
 static void bnx2x_update_eq_prod(struct bnx2x_softc *sc, uint16_t prod)
@@ -4693,7 +4668,7 @@ static int bnx2x_init_hw(struct bnx2x_softc *sc, uint32_t load_code)
 	PMD_INIT_FUNC_TRACE(sc);
 
 	/* prepare the parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_HW_INIT;
@@ -4988,8 +4963,8 @@ static void bnx2x_init_eth_fp(struct bnx2x_softc *sc, int idx)
 	bnx2x_update_fp_sb_idx(fp);
 
 	/* Configure Queue State object */
-	bnx2x_set_bit(ECORE_Q_TYPE_HAS_RX, &q_type);
-	bnx2x_set_bit(ECORE_Q_TYPE_HAS_TX, &q_type);
+	rte_set_bit64(ECORE_Q_TYPE_HAS_RX, &q_type);
+	rte_set_bit64(ECORE_Q_TYPE_HAS_TX, &q_type);
 
 	ecore_init_queue_obj(sc,
 			     &sc->sp_objs[idx].q_obj,
@@ -5803,7 +5778,7 @@ static int bnx2x_func_start(struct bnx2x_softc *sc)
 	    &func_params.params.start;
 
 	/* Prepare parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_START;
@@ -6379,11 +6354,11 @@ bnx2x_pf_q_prep_init(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp,
 	uint8_t cos;
 	int cxt_index, cxt_offset;
 
-	bnx2x_set_bit(ECORE_Q_FLG_HC, &init_params->rx.flags);
-	bnx2x_set_bit(ECORE_Q_FLG_HC, &init_params->tx.flags);
+	rte_set_bit64(ECORE_Q_FLG_HC, &init_params->rx.flags);
+	rte_set_bit64(ECORE_Q_FLG_HC, &init_params->tx.flags);
 
-	bnx2x_set_bit(ECORE_Q_FLG_HC_EN, &init_params->rx.flags);
-	bnx2x_set_bit(ECORE_Q_FLG_HC_EN, &init_params->tx.flags);
+	rte_set_bit64(ECORE_Q_FLG_HC_EN, &init_params->rx.flags);
+	rte_set_bit64(ECORE_Q_FLG_HC_EN, &init_params->tx.flags);
 
 	/* HC rate */
 	init_params->rx.hc_rate =
@@ -6417,7 +6392,7 @@ bnx2x_get_common_flags(struct bnx2x_softc *sc, uint8_t zero_stats)
 	unsigned long flags = 0;
 
 	/* PF driver will always initialize the Queue to an ACTIVE state */
-	bnx2x_set_bit(ECORE_Q_FLG_ACTIVE, &flags);
+	rte_set_bit64(ECORE_Q_FLG_ACTIVE, &flags);
 
 	/*
 	 * tx only connections collect statistics (on the same index as the
@@ -6425,9 +6400,9 @@ bnx2x_get_common_flags(struct bnx2x_softc *sc, uint8_t zero_stats)
 	 * connection is initialized.
 	 */
 
-	bnx2x_set_bit(ECORE_Q_FLG_STATS, &flags);
+	rte_set_bit64(ECORE_Q_FLG_STATS, &flags);
 	if (zero_stats) {
-		bnx2x_set_bit(ECORE_Q_FLG_ZERO_STATS, &flags);
+		rte_set_bit64(ECORE_Q_FLG_ZERO_STATS, &flags);
 	}
 
 	/*
@@ -6435,10 +6410,10 @@ bnx2x_get_common_flags(struct bnx2x_softc *sc, uint8_t zero_stats)
 	 * CoS-ness doesn't survive the loopback
 	 */
 	if (sc->flags & BNX2X_TX_SWITCHING) {
-		bnx2x_set_bit(ECORE_Q_FLG_TX_SWITCH, &flags);
+		rte_set_bit64(ECORE_Q_FLG_TX_SWITCH, &flags);
 	}
 
-	bnx2x_set_bit(ECORE_Q_FLG_PCSUM_ON_PKT, &flags);
+	rte_set_bit64(ECORE_Q_FLG_PCSUM_ON_PKT, &flags);
 
 	return flags;
 }
@@ -6448,15 +6423,15 @@ static unsigned long bnx2x_get_q_flags(struct bnx2x_softc *sc, uint8_t leading)
 	unsigned long flags = 0;
 
 	if (IS_MF_SD(sc)) {
-		bnx2x_set_bit(ECORE_Q_FLG_OV, &flags);
+		rte_set_bit64(ECORE_Q_FLG_OV, &flags);
 	}
 
 	if (leading) {
-		bnx2x_set_bit(ECORE_Q_FLG_LEADING_RSS, &flags);
-		bnx2x_set_bit(ECORE_Q_FLG_MCAST, &flags);
+		rte_set_bit64(ECORE_Q_FLG_LEADING_RSS, &flags);
+		rte_set_bit64(ECORE_Q_FLG_MCAST, &flags);
 	}
 
-	bnx2x_set_bit(ECORE_Q_FLG_VLAN, &flags);
+	rte_set_bit64(ECORE_Q_FLG_VLAN, &flags);
 
 	/* merge with common flags */
 	return flags | bnx2x_get_common_flags(sc, TRUE);
@@ -6577,7 +6552,7 @@ bnx2x_setup_queue(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp, uint8_t lea
 	q_params.q_obj = &BNX2X_SP_OBJ(sc, fp).q_obj;
 
 	/* we want to wait for completion in this context */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
 
 	/* prepare the INIT parameters */
 	bnx2x_pf_q_prep_init(sc, fp, &q_params.params.init);
@@ -6645,20 +6620,20 @@ bnx2x_config_rss_pf(struct bnx2x_softc *sc, struct ecore_rss_config_obj *rss_obj
 
 	params.rss_obj = rss_obj;
 
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &params.ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &params.ramrod_flags);
 
-	bnx2x_set_bit(ECORE_RSS_MODE_REGULAR, &params.rss_flags);
+	rte_set_bit64(ECORE_RSS_MODE_REGULAR, &params.rss_flags);
 
 	/* RSS configuration */
-	bnx2x_set_bit(ECORE_RSS_IPV4, &params.rss_flags);
-	bnx2x_set_bit(ECORE_RSS_IPV4_TCP, &params.rss_flags);
-	bnx2x_set_bit(ECORE_RSS_IPV6, &params.rss_flags);
-	bnx2x_set_bit(ECORE_RSS_IPV6_TCP, &params.rss_flags);
+	rte_set_bit64(ECORE_RSS_IPV4, &params.rss_flags);
+	rte_set_bit64(ECORE_RSS_IPV4_TCP, &params.rss_flags);
+	rte_set_bit64(ECORE_RSS_IPV6, &params.rss_flags);
+	rte_set_bit64(ECORE_RSS_IPV6_TCP, &params.rss_flags);
 	if (rss_obj->udp_rss_v4) {
-		bnx2x_set_bit(ECORE_RSS_IPV4_UDP, &params.rss_flags);
+		rte_set_bit64(ECORE_RSS_IPV4_UDP, &params.rss_flags);
 	}
 	if (rss_obj->udp_rss_v6) {
-		bnx2x_set_bit(ECORE_RSS_IPV6_UDP, &params.rss_flags);
+		rte_set_bit64(ECORE_RSS_IPV6_UDP, &params.rss_flags);
 	}
 
 	/* Hash bits */
@@ -6673,7 +6648,7 @@ bnx2x_config_rss_pf(struct bnx2x_softc *sc, struct ecore_rss_config_obj *rss_obj
 			params.rss_key[i] = (uint32_t) rte_rand();
 		}
 
-		bnx2x_set_bit(ECORE_RSS_SET_SRCH, &params.rss_flags);
+		rte_set_bit64(ECORE_RSS_SET_SRCH, &params.rss_flags);
 	}
 
 	if (IS_PF(sc))
@@ -6730,11 +6705,11 @@ bnx2x_set_mac_one(struct bnx2x_softc *sc, uint8_t * mac,
 	ramrod_param.ramrod_flags = *ramrod_flags;
 
 	/* fill a user request section if needed */
-	if (!bnx2x_test_bit(RAMROD_CONT, ramrod_flags)) {
+	if (!rte_get_bit64(RAMROD_CONT, ramrod_flags)) {
 		rte_memcpy(ramrod_param.user_req.u.mac.mac, mac,
 				 ETH_ALEN);
 
-		bnx2x_set_bit(mac_type, &ramrod_param.user_req.vlan_mac_flags);
+		rte_set_bit64(mac_type, &ramrod_param.user_req.vlan_mac_flags);
 
 /* Set the command: ADD or DEL */
 		ramrod_param.user_req.cmd = (set) ? ECORE_VLAN_MAC_ADD :
@@ -6761,7 +6736,7 @@ static int bnx2x_set_eth_mac(struct bnx2x_softc *sc, uint8_t set)
 
 	PMD_DRV_LOG(DEBUG, sc, "Adding Ethernet MAC");
 
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &ramrod_flags);
 
 	/* Eth MAC is set on RSS leading client (fp[0]) */
 	return bnx2x_set_mac_one(sc, sc->link_params.mac_addr,
@@ -6893,24 +6868,26 @@ bnx2x_fill_report_data(struct bnx2x_softc *sc, struct bnx2x_link_report_data *da
 
 	/* Link is down */
 	if (!sc->link_vars.link_up || (sc->flags & BNX2X_MF_FUNC_DIS)) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+		rte_set_bit64(BNX2X_LINK_REPORT_LINK_DOWN,
 			    &data->link_report_flags);
 	}
 
 	/* Full DUPLEX */
 	if (sc->link_vars.duplex == DUPLEX_FULL) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_FULL_DUPLEX,
+		rte_set_bit64(BNX2X_LINK_REPORT_FULL_DUPLEX,
 			    &data->link_report_flags);
 	}
 
 	/* Rx Flow Control is ON */
 	if (sc->link_vars.flow_ctrl & ELINK_FLOW_CTRL_RX) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_RX_FC_ON, &data->link_report_flags);
+		rte_set_bit64(BNX2X_LINK_REPORT_RX_FC_ON,
+				&data->link_report_flags);
 	}
 
 	/* Tx Flow Control is ON */
 	if (sc->link_vars.flow_ctrl & ELINK_FLOW_CTRL_TX) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_TX_FC_ON, &data->link_report_flags);
+		rte_set_bit64(BNX2X_LINK_REPORT_TX_FC_ON,
+				&data->link_report_flags);
 	}
 }
 
@@ -6929,9 +6906,9 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
 
 	/* Don't report link down or exactly the same link status twice */
 	if (!memcmp(&cur_data, &sc->last_reported_link, sizeof(cur_data)) ||
-	    (bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+	    (rte_get_bit64(BNX2X_LINK_REPORT_LINK_DOWN,
 			  &sc->last_reported_link.link_report_flags) &&
-	     bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+	     rte_get_bit64(BNX2X_LINK_REPORT_LINK_DOWN,
 			  &cur_data.link_report_flags))) {
 		return;
 	}
@@ -6946,14 +6923,14 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
 	/* report new link params and remember the state for the next time */
 	rte_memcpy(&sc->last_reported_link, &cur_data, sizeof(cur_data));
 
-	if (bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+	if (rte_get_bit64(BNX2X_LINK_REPORT_LINK_DOWN,
 			 &cur_data.link_report_flags)) {
 		ELINK_DEBUG_P0(sc, "NIC Link is Down");
 	} else {
 		__rte_unused const char *duplex;
 		__rte_unused const char *flow;
 
-		if (bnx2x_test_and_clear_bit(BNX2X_LINK_REPORT_FULL_DUPLEX,
+		if (rte_test_and_clear_bit64(BNX2X_LINK_REPORT_FULL_DUPLEX,
 					   &cur_data.link_report_flags)) {
 			duplex = "full";
 				ELINK_DEBUG_P0(sc, "link set to full duplex");
@@ -6968,19 +6945,19 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
  * enabled.
  */
 		if (cur_data.link_report_flags) {
-			if (bnx2x_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+			if (rte_get_bit64(BNX2X_LINK_REPORT_RX_FC_ON,
 					 &cur_data.link_report_flags) &&
-			    bnx2x_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+			    rte_get_bit64(BNX2X_LINK_REPORT_TX_FC_ON,
 					 &cur_data.link_report_flags)) {
 				flow = "ON - receive & transmit";
-			} else if (bnx2x_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+			} else if (rte_get_bit64(BNX2X_LINK_REPORT_RX_FC_ON,
 						&cur_data.link_report_flags) &&
-				   !bnx2x_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+				   !rte_get_bit64(BNX2X_LINK_REPORT_TX_FC_ON,
 						 &cur_data.link_report_flags)) {
 				flow = "ON - receive";
-			} else if (!bnx2x_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+			} else if (!rte_get_bit64(BNX2X_LINK_REPORT_RX_FC_ON,
 						 &cur_data.link_report_flags) &&
-				   bnx2x_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+				   rte_get_bit64(BNX2X_LINK_REPORT_TX_FC_ON,
 						&cur_data.link_report_flags)) {
 				flow = "ON - transmit";
 			} else {
diff --git a/drivers/net/bnx2x/bnx2x.h b/drivers/net/bnx2x/bnx2x.h
index 3383c76..e6e66e8 100644
--- a/drivers/net/bnx2x/bnx2x.h
+++ b/drivers/net/bnx2x/bnx2x.h
@@ -1809,10 +1809,6 @@ static const uint32_t dmae_reg_go_c[] = {
 #define PCI_PM_D0    1
 #define PCI_PM_D3hot 2
 
-int  bnx2x_test_bit(int nr, volatile unsigned long * addr);
-void bnx2x_set_bit(unsigned int nr, volatile unsigned long * addr);
-void bnx2x_clear_bit(int nr, volatile unsigned long * addr);
-int  bnx2x_test_and_clear_bit(int nr, volatile unsigned long * addr);
 int  bnx2x_cmpxchg(volatile int *addr, int old, int new);
 
 int bnx2x_dma_alloc(struct bnx2x_softc *sc, size_t size,
diff --git a/drivers/net/bnx2x/ecore_sp.h b/drivers/net/bnx2x/ecore_sp.h
index cc1db37..efbfdad 100644
--- a/drivers/net/bnx2x/ecore_sp.h
+++ b/drivers/net/bnx2x/ecore_sp.h
@@ -15,6 +15,7 @@
 #define ECORE_SP_H
 
 #include <rte_byteorder.h>
+#include <rte_bitops.h>
 
 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
 #ifndef __LITTLE_ENDIAN
@@ -73,10 +74,10 @@ typedef rte_spinlock_t ECORE_MUTEX_SPIN;
 #define ECORE_SET_BIT_NA(bit, var)         (*var |= (1 << bit))
 #define ECORE_CLEAR_BIT_NA(bit, var)       (*var &= ~(1 << bit))
 
-#define ECORE_TEST_BIT(bit, var)           bnx2x_test_bit(bit, var)
-#define ECORE_SET_BIT(bit, var)            bnx2x_set_bit(bit, var)
-#define ECORE_CLEAR_BIT(bit, var)          bnx2x_clear_bit(bit, var)
-#define ECORE_TEST_AND_CLEAR_BIT(bit, var) bnx2x_test_and_clear_bit(bit, var)
+#define ECORE_TEST_BIT(bit, var)           rte_get_bit64(bit, var)
+#define ECORE_SET_BIT(bit, var)            rte_set_bit64(bit, var)
+#define ECORE_CLEAR_BIT(bit, var)          rte_clear_bit64(bit, var)
+#define ECORE_TEST_AND_CLEAR_BIT(bit, var) rte_test_and_clear_bit64(bit, var)
 
 #define atomic_load_acq_int                (int)*
 #define atomic_store_rel_int(a, v)         (*a = v)
-- 
2.7.4


^ permalink raw reply	[flat|nested] 70+ messages in thread

* [dpdk-dev] [PATCH v4 5/6] net/qede: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (24 preceding siblings ...)
  2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 4/6] net/bnx2x: " Joyce Kong
@ 2019-11-20 10:12 ` " Joyce Kong
  2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 6/6] net/hinic: " Joyce Kong
                   ` (7 subsequent siblings)
  33 siblings, 0 replies; 70+ messages in thread
From: Joyce Kong @ 2019-11-20 10:12 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/qede/base/bcm_osal.c | 20 --------------------
 drivers/net/qede/base/bcm_osal.h | 10 ++++------
 2 files changed, 4 insertions(+), 26 deletions(-)

diff --git a/drivers/net/qede/base/bcm_osal.c b/drivers/net/qede/base/bcm_osal.c
index 48d016e..19457d7 100644
--- a/drivers/net/qede/base/bcm_osal.c
+++ b/drivers/net/qede/base/bcm_osal.c
@@ -46,26 +46,6 @@ u32 qede_osal_log2(u32 val)
 	return log;
 }
 
-inline void qede_set_bit(u32 nr, unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-inline void qede_clr_bit(u32 nr, unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-inline bool qede_test_bit(u32 nr, unsigned long *addr)
-{
-	bool res;
-
-	rte_mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	rte_mb();
-	return res;
-}
-
 static inline u32 qede_ffb(unsigned long word)
 {
 	unsigned long first_bit;
diff --git a/drivers/net/qede/base/bcm_osal.h b/drivers/net/qede/base/bcm_osal.h
index 0f09557..e7a7392 100644
--- a/drivers/net/qede/base/bcm_osal.h
+++ b/drivers/net/qede/base/bcm_osal.h
@@ -8,6 +8,7 @@
 #define __BCM_OSAL_H
 
 #include <rte_byteorder.h>
+#include <rte_bitops.h>
 #include <rte_spinlock.h>
 #include <rte_malloc.h>
 #include <rte_atomic.h>
@@ -311,17 +312,14 @@ typedef struct osal_list_t {
 #define OSAL_BITS_PER_UL_MASK		(OSAL_BITS_PER_UL - 1)
 
 /* Bitops */
-void qede_set_bit(u32, unsigned long *);
 #define OSAL_SET_BIT(bit, bitmap) \
-	qede_set_bit(bit, bitmap)
+	rte_set_bit64(bit, bitmap)
 
-void qede_clr_bit(u32, unsigned long *);
 #define OSAL_CLEAR_BIT(bit, bitmap) \
-	qede_clr_bit(bit, bitmap)
+	rte_clear_bit64(bit, bitmap)
 
-bool qede_test_bit(u32, unsigned long *);
 #define OSAL_TEST_BIT(bit, bitmap) \
-	qede_test_bit(bit, bitmap)
+	rte_get_bit64(bit, bitmap)
 
 u32 qede_find_first_bit(unsigned long *, u32);
 #define OSAL_FIND_FIRST_BIT(bitmap, length) \
-- 
2.7.4


^ permalink raw reply	[flat|nested] 70+ messages in thread

* [dpdk-dev] [PATCH v4 6/6] net/hinic: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (25 preceding siblings ...)
  2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 5/6] net/qede: " Joyce Kong
@ 2019-11-20 10:12 ` " Joyce Kong
  2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 0/6] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (6 subsequent siblings)
  33 siblings, 0 replies; 70+ messages in thread
From: Joyce Kong @ 2019-11-20 10:12 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/hinic/Makefile            |  1 +
 drivers/net/hinic/base/hinic_compat.h | 33 +--------------------------------
 drivers/net/hinic/hinic_pmd_ethdev.c  | 16 ++++++++--------
 drivers/net/hinic/meson.build         |  2 ++
 4 files changed, 12 insertions(+), 40 deletions(-)

diff --git a/drivers/net/hinic/Makefile b/drivers/net/hinic/Makefile
index b78fd8d..97f4298 100644
--- a/drivers/net/hinic/Makefile
+++ b/drivers/net/hinic/Makefile
@@ -9,6 +9,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 LIB = librte_pmd_hinic.a
 
 CFLAGS += -O3
+CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += $(WERROR_FLAGS)
 
 ifeq ($(CONFIG_RTE_ARCH_ARM64),y)
diff --git a/drivers/net/hinic/base/hinic_compat.h b/drivers/net/hinic/base/hinic_compat.h
index e4a7f12..c0a0b3e 100644
--- a/drivers/net/hinic/base/hinic_compat.h
+++ b/drivers/net/hinic/base/hinic_compat.h
@@ -11,6 +11,7 @@
 #include <pthread.h>
 #include <rte_common.h>
 #include <rte_byteorder.h>
+#include <rte_bitops.h>
 #include <rte_memzone.h>
 #include <rte_memcpy.h>
 #include <rte_malloc.h>
@@ -117,38 +118,6 @@ extern int hinic_logtype;
 
 #define HINIC_PAGE_SIZE_DPDK	6
 
-static inline int hinic_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	res = ((*addr) & (1UL << nr)) != 0;
-	return res;
-}
-
-static inline void hinic_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-static inline void hinic_clear_bit(int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-static inline int hinic_test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-
-	return __sync_fetch_and_and(addr, ~mask) & mask;
-}
-
-static inline int hinic_test_and_set_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-
-	return __sync_fetch_and_or(addr, mask) & mask;
-}
-
 void *dma_zalloc_coherent(void *dev, size_t size, dma_addr_t *dma_handle,
 			  gfp_t flag);
 void *dma_zalloc_coherent_aligned(void *dev, size_t size,
diff --git a/drivers/net/hinic/hinic_pmd_ethdev.c b/drivers/net/hinic/hinic_pmd_ethdev.c
index 072fec3..8181564 100644
--- a/drivers/net/hinic/hinic_pmd_ethdev.c
+++ b/drivers/net/hinic/hinic_pmd_ethdev.c
@@ -269,7 +269,7 @@ static void hinic_dev_interrupt_handler(void *param)
 	struct rte_eth_dev *dev = param;
 	struct hinic_nic_dev *nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
 
-	if (!hinic_test_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status)) {
+	if (!rte_get_bit64(HINIC_DEV_INTR_EN, &nic_dev->dev_status)) {
 		PMD_DRV_LOG(WARNING, "Device's interrupt is disabled, ignore interrupt event, dev_name: %s, port_id: %d",
 			    nic_dev->proc_dev_name, dev->data->port_id);
 		return;
@@ -1075,7 +1075,7 @@ static int hinic_dev_start(struct rte_eth_dev *dev)
 	if (dev->data->dev_conf.intr_conf.lsc != 0)
 		(void)hinic_link_update(dev, 0);
 
-	hinic_set_bit(HINIC_DEV_START, &nic_dev->dev_status);
+	rte_set_bit64(HINIC_DEV_START, &nic_dev->dev_status);
 
 	return 0;
 
@@ -1200,7 +1200,7 @@ static void hinic_dev_stop(struct rte_eth_dev *dev)
 	name = dev->data->name;
 	port_id = dev->data->port_id;
 
-	if (!hinic_test_and_clear_bit(HINIC_DEV_START, &nic_dev->dev_status)) {
+	if (!rte_test_and_clear_bit64(HINIC_DEV_START, &nic_dev->dev_status)) {
 		PMD_DRV_LOG(INFO, "Device %s already stopped", name);
 		return;
 	}
@@ -1245,7 +1245,7 @@ static void hinic_disable_interrupt(struct rte_eth_dev *dev)
 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
 	int ret, retries = 0;
 
-	hinic_clear_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
+	rte_clear_bit64(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
 
 	/* disable msix interrupt in hardware */
 	hinic_set_msix_state(nic_dev->hwdev, 0, HINIC_MSIX_DISABLE);
@@ -2844,7 +2844,7 @@ static void hinic_dev_close(struct rte_eth_dev *dev)
 {
 	struct hinic_nic_dev *nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
 
-	if (hinic_test_and_set_bit(HINIC_DEV_CLOSE, &nic_dev->dev_status)) {
+	if (rte_test_and_set_bit64(HINIC_DEV_CLOSE, &nic_dev->dev_status)) {
 		PMD_DRV_LOG(WARNING, "Device %s already closed",
 			    dev->data->name);
 		return;
@@ -3045,7 +3045,7 @@ static int hinic_func_init(struct rte_eth_dev *eth_dev)
 			    eth_dev->data->name);
 		goto enable_intr_fail;
 	}
-	hinic_set_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
+	rte_set_bit64(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
 
 	/* initialize filter info */
 	filter_info = &nic_dev->filter;
@@ -3057,7 +3057,7 @@ static int hinic_func_init(struct rte_eth_dev *eth_dev)
 	TAILQ_INIT(&nic_dev->filter_fdir_rule_list);
 	TAILQ_INIT(&nic_dev->hinic_flow_list);
 
-	hinic_set_bit(HINIC_DEV_INIT, &nic_dev->dev_status);
+	rte_set_bit64(HINIC_DEV_INIT, &nic_dev->dev_status);
 	PMD_DRV_LOG(INFO, "Initialize %s in primary successfully",
 		    eth_dev->data->name);
 
@@ -3113,7 +3113,7 @@ static int hinic_dev_uninit(struct rte_eth_dev *dev)
 	struct hinic_nic_dev *nic_dev;
 
 	nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
-	hinic_clear_bit(HINIC_DEV_INIT, &nic_dev->dev_status);
+	rte_clear_bit64(HINIC_DEV_INIT, &nic_dev->dev_status);
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
diff --git a/drivers/net/hinic/meson.build b/drivers/net/hinic/meson.build
index bc7e246..8c7ee9d 100644
--- a/drivers/net/hinic/meson.build
+++ b/drivers/net/hinic/meson.build
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Huawei Technologies Co., Ltd
 
+allow_experimental_apis = true
+
 subdir('base')
 objs = [base_objs]
 
-- 
2.7.4


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [dpdk-dev] [PATCH v4 1/6] lib/eal: implement the family of rte bitoperation APIs
  2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 1/6] lib/eal: implement the family of rte bit operation APIs Joyce Kong
@ 2019-11-20 13:40   ` Morten Brørup
  0 siblings, 0 replies; 70+ messages in thread
From: Morten Brørup @ 2019-11-20 13:40 UTC (permalink / raw)
  To: Joyce Kong, dev
  Cc: nd, thomas, jerinj, stephen, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Joyce Kong
> Sent: Wednesday, November 20, 2019 11:12 AM
> 
> There are a lot functions of bit operations scattered and
> duplicated in PMDs, consolidating them into a common API
> family is necessary. Furthermore, when the bit operation
> is applied to the IO devices, use __ATOMIC_ACQ_REL to
> ensure the ordering for io bit operation.
> 
> Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> Reviewed-by: Gavin Hu <gavin.hu@arm.com>
> Reviewed-by: Phil Yang <phil.yang@arm.com>
> ---
>  MAINTAINERS                                |   5 +
>  doc/api/doxy-api-index.md                  |   5 +-
>  lib/librte_eal/common/Makefile             |   1 +
>  lib/librte_eal/common/include/rte_bitops.h | 474
> +++++++++++++++++++++++++++++
>  lib/librte_eal/common/meson.build          |   3 +-
>  5 files changed, 485 insertions(+), 3 deletions(-)
>  create mode 100644 lib/librte_eal/common/include/rte_bitops.h
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index f2fdb93..4ee2712 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -233,6 +233,11 @@ M: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
>  F: lib/librte_eal/common/include/rte_bitmap.h
>  F: app/test/test_bitmap.c
> 
> +Bitops
> +M: Joyce Kong <joyce.kong@arm.com>
> +F: lib/librte_eal/common/include/rte_bitops.h
> +F: app/test/test_bitops.c
> +
>  MCSlock - EXPERIMENTAL
>  M: Phil Yang <phil.yang@arm.com>
>  F: lib/librte_eal/common/include/generic/rte_mcslock.h
> diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md
> index dff496b..ade7c01 100644
> --- a/doc/api/doxy-api-index.md
> +++ b/doc/api/doxy-api-index.md
> @@ -133,12 +133,13 @@ The public API headers are grouped by topics:
>    [BPF]                (@ref rte_bpf.h)
> 
>  - **containers**:
> +  [bitmap]             (@ref rte_bitmap.h),
> +  [bitops]             (@ref rte_bitops.h),
>    [mbuf]               (@ref rte_mbuf.h),
>    [mbuf pool ops]      (@ref rte_mbuf_pool_ops.h),
>    [ring]               (@ref rte_ring.h),
>    [stack]              (@ref rte_stack.h),
> -  [tailq]              (@ref rte_tailq.h),
> -  [bitmap]             (@ref rte_bitmap.h)
> +  [tailq]              (@ref rte_tailq.h)
> 
>  - **packet framework**:
>    * [port]             (@ref rte_port.h):
> diff --git a/lib/librte_eal/common/Makefile
> b/lib/librte_eal/common/Makefile
> index c2c6d92..dd025c1 100644
> --- a/lib/librte_eal/common/Makefile
> +++ b/lib/librte_eal/common/Makefile
> @@ -19,6 +19,7 @@ INC += rte_malloc.h rte_keepalive.h rte_time.h
>  INC += rte_service.h rte_service_component.h
>  INC += rte_bitmap.h rte_vfio.h rte_hypervisor.h rte_test.h
>  INC += rte_reciprocal.h rte_fbarray.h rte_uuid.h
> +INC += rte_bitops.h
> 
>  GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h
>  GENERIC_INC += rte_memcpy.h rte_cpuflags.h
> diff --git a/lib/librte_eal/common/include/rte_bitops.h
> b/lib/librte_eal/common/include/rte_bitops.h
> new file mode 100644
> index 0000000..34158d1
> --- /dev/null
> +++ b/lib/librte_eal/common/include/rte_bitops.h
> @@ -0,0 +1,474 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2019 Arm Limited
> + */
> +
> +#ifndef _RTE_BITOPS_H_
> +#define _RTE_BITOPS_H_
> +
> +/**
> + * @file
> + * Bit Operations
> + *
> + * This file defines a API for bit operations without/with memory
> ordering.
> + */
> +
> +#include <stdint.h>
> +#include <rte_debug.h>
> +#include <rte_compat.h>
> +
> +/*---------------------------- 32 bit operations -------------------------
> ---*/
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Get the target bit from a 32-bit value without memory ordering.
> + *
> + * @param nr
> + *   The target bit to get.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The target bit.
> + */
> +__rte_experimental
> +static inline uint32_t
> +rte_get_bit32_relaxed(unsigned int nr, uint32_t *addr)
> +{
> +	RTE_ASSERT(nr < 32);
> +
> +	uint32_t mask = UINT32_C(1) << nr;
> +	return __atomic_load_n(addr, __ATOMIC_RELAXED) & mask;
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Set the target bit in a 32-bit value to 1 without memory ordering.
> + *
> + * @param nr
> + *   The target bit to set.
> + * @param addr
> + *   The address holding the bit.
> + */
> +__rte_experimental
> +static inline void
> +rte_set_bit32_relaxed(unsigned int nr, uint32_t *addr)
> +{
> +	RTE_ASSERT(nr < 32);
> +
> +	uint32_t mask = UINT32_C(1) << nr;
> +	__atomic_fetch_or(addr, mask, __ATOMIC_RELAXED);
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Clear the target bit in a 32-bit value to 0 without memory ordering.
> + *
> + * @param nr
> + *   The target bit to clear.
> + * @param addr
> + *   The address holding the bit.
> + */
> +__rte_experimental
> +static inline void
> +rte_clear_bit32_relaxed(unsigned int nr, uint32_t *addr)
> +{
> +	RTE_ASSERT(nr < 32);
> +
> +	uint32_t mask = UINT32_C(1) << nr;
> +	__atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED);
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Return the original bit from a 32-bit value, then set it to 1 without
> + * memory ordering.
> + *
> + * @param nr
> + *   The target bit to get and set.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The original bit.
> + */
> +__rte_experimental
> +static inline uint32_t
> +rte_test_and_set_bit32_relaxed(unsigned int nr, uint32_t *addr)
> +{
> +	RTE_ASSERT(nr < 32);
> +
> +	uint32_t mask = UINT32_C(1) << nr;
> +	return __atomic_fetch_or(addr, mask, __ATOMIC_RELAXED) & mask;
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Return the original bit from a 32-bit value, then clear it to 0 without
> + * memory ordering.
> + *
> + * @param nr
> + *   The target bit to get and clear.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The original bit.
> + */
> +__rte_experimental
> +static inline uint32_t
> +rte_test_and_clear_bit32_relaxed(unsigned int nr, uint32_t *addr)
> +{
> +	RTE_ASSERT(nr < 32);
> +
> +	uint32_t mask = UINT32_C(1) << nr;
> +	return __atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED) & mask;
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Get the target bit from a 32-bit value with memory ordering.
> + *
> + * @param nr
> + *   The target bit to get.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The target bit.
> + */
> +__rte_experimental
> +static inline uint32_t
> +rte_get_bit32(unsigned int nr, uint32_t *addr)
> +{
> +	RTE_ASSERT(nr < 32);
> +
> +	uint32_t mask = UINT32_C(1) << nr;
> +	return __atomic_load_n(addr, __ATOMIC_ACQUIRE) & mask;
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Set the target bit in a 32-bit value to 1 with memory ordering.
> + *
> + * @param nr
> + *   The target bit to set.
> + * @param addr
> + *   The address holding the bit.
> + */
> +__rte_experimental
> +static inline void
> +rte_set_bit32(unsigned int nr, uint32_t *addr)
> +{
> +	RTE_ASSERT(nr < 32);
> +
> +	uint32_t mask = UINT32_C(1) << nr;
> +	__atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL);
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Clear the target bit in a 32-bit value to 0 with memory ordering.
> + *
> + * @param nr
> + *   The target bit to clear.
> + * @param addr
> + *   The address holding the bit.
> + */
> +__rte_experimental
> +static inline void
> +rte_clear_bit32(unsigned int nr, uint32_t *addr)
> +{
> +	RTE_ASSERT(nr < 32);
> +
> +	uint32_t mask = UINT32_C(1) << nr;
> +	__atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL);
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Return the original bit from a 32-bit value, then set it to 1 with
> + * memory ordering.
> + *
> + * @param nr
> + *   The target bit to get and set.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The original bit.
> + */
> +__rte_experimental
> +static inline uint32_t
> +rte_test_and_set_bit32(unsigned int nr, uint32_t *addr)
> +{
> +	RTE_ASSERT(nr < 32);
> +
> +	uint32_t mask = UINT32_C(1) << nr;
> +	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) & mask;
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Return the original bit from a 32-bit value, then clear it to 0 with
> + * memory ordering.
> + *
> + * @param nr
> + *   The target bit to get and clear.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The original bit.
> + */
> +__rte_experimental
> +static inline uint32_t
> +rte_test_and_clear_bit32(unsigned int nr, uint32_t *addr)
> +{
> +	RTE_ASSERT(nr < 32);
> +
> +	uint32_t mask = UINT32_C(1) << nr;
> +	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) & mask;
> +}
> +
> +/*---------------------------- 64 bit operations -------------------------
> ---*/
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Get the target bit from a 64-bit value without memory ordering.
> + *
> + * @param nr
> + *   The target bit to get.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The target bit.
> + */
> +__rte_experimental
> +static inline uint64_t
> +rte_get_bit64_relaxed(unsigned int nr, uint64_t *addr)
> +{
> +	RTE_ASSERT(nr < 64);
> +
> +	uint64_t mask = UINT64_C(1) << nr;
> +	return __atomic_load_n(addr, __ATOMIC_RELAXED) & mask;
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Set the target bit in a 64-bit value to 1 without memory ordering.
> + *
> + * @param nr
> + *   The target bit to set.
> + * @param addr
> + *   The address holding the bit.
> + */
> +__rte_experimental
> +static inline void
> +rte_set_bit64_relaxed(unsigned int nr, uint64_t *addr)
> +{
> +	RTE_ASSERT(nr < 64);
> +
> +	uint64_t mask = UINT64_C(1) << nr;
> +	__atomic_fetch_or(addr, mask, __ATOMIC_RELAXED);
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Clear the target bit in a 64-bit value to 0 without memory ordering.
> + *
> + * @param nr
> + *   The target bit to clear.
> + * @param addr
> + *   The address holding the bit.
> + */
> +__rte_experimental
> +static inline void
> +rte_clear_bit64_relaxed(unsigned int nr, uint64_t *addr)
> +{
> +	RTE_ASSERT(nr < 64);
> +
> +	uint64_t mask = UINT64_C(1) << nr;
> +	__atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED);
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Return the original bit from a 64-bit value, then set it to 1 without
> + * memory ordering.
> + *
> + * @param nr
> + *   The target bit to get and set.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The original bit.
> + */
> +__rte_experimental
> +static inline uint64_t
> +rte_test_and_set_bit64_relaxed(unsigned int nr, uint64_t *addr)
> +{
> +	RTE_ASSERT(nr < 64);
> +
> +	uint64_t mask = UINT64_C(1) << nr;
> +	return __atomic_fetch_or(addr, mask, __ATOMIC_RELAXED) & mask;
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Return the original bit from a 64-bit value, then clear it to 0 without
> + * memory ordering.
> + *
> + * @param nr
> + *   The target bit to get and clear.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The original bit.
> + */
> +__rte_experimental
> +static inline uint64_t
> +rte_test_and_clear_bit64_relaxed(unsigned int nr, uint64_t *addr)
> +{
> +	RTE_ASSERT(nr < 64);
> +
> +	uint64_t mask = UINT64_C(1) << nr;
> +	return __atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED) & mask;
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Get the target bit from a 64-bit value with memory ordering.
> + *
> + * @param nr
> + *   The target bit to get.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The target bit.
> + */
> +__rte_experimental
> +static inline uint64_t
> +rte_get_bit64(unsigned int nr, uint64_t *addr)
> +{
> +	RTE_ASSERT(nr < 64);
> +
> +	uint64_t mask = UINT64_C(1) << nr;
> +	return __atomic_load_n(addr, __ATOMIC_ACQUIRE) & mask;
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Set the target bit in a 64-bit value to 1 with memory ordering.
> + *
> + * @param nr
> + *   The target bit to set.
> + * @param addr
> + *   The address holding the bit.
> + */
> +__rte_experimental
> +static inline void
> +rte_set_bit64(unsigned int nr, uint64_t *addr)
> +{
> +	RTE_ASSERT(nr < 64);
> +
> +	uint64_t mask = UINT64_C(1) << nr;
> +	__atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL);
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Clear the target bit in a 64-bit value to 0 with memory ordering.
> + *
> + * @param nr
> + *   The target bit to clear.
> + * @param addr
> + *   The address holding the bit.
> + */
> +__rte_experimental
> +static inline void
> +rte_clear_bit64(unsigned int nr, uint64_t *addr)
> +{
> +	RTE_ASSERT(nr < 64);
> +
> +	uint64_t mask = UINT64_C(1) << nr;
> +	__atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL);
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Return the original bit from a 64-bit value, then set it to 1 with
> + * memory ordering.
> + *
> + * @param nr
> + *   The target bit to get and set.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The original bit.
> + */
> +__rte_experimental
> +static inline uint64_t
> +rte_test_and_set_bit64(unsigned int nr, uint64_t *addr)
> +{
> +	RTE_ASSERT(nr < 64);
> +
> +	uint64_t mask = UINT64_C(1) << nr;
> +	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) & mask;
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + * Return the original bit from a 64-bit value, then clear it to 0 with
> + * memory ordering.
> + *
> + * @param nr
> + *   The target bit to get and clear.
> + * @param addr
> + *   The address holding the bit.
> + * @return
> + *   The original bit.
> + */
> +__rte_experimental
> +static inline uint64_t
> +rte_test_and_clear_bit64(unsigned int nr, uint64_t *addr)
> +{
> +	RTE_ASSERT(nr < 64);
> +
> +	uint64_t mask = UINT64_C(1) << nr;
> +	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) & mask;
> +}
> +#endif /* _RTE_BITOPS_H_ */
> diff --git a/lib/librte_eal/common/meson.build
> b/lib/librte_eal/common/meson.build
> index d6a149b..8a5197b 100644
> --- a/lib/librte_eal/common/meson.build
> +++ b/lib/librte_eal/common/meson.build
> @@ -50,9 +50,10 @@ common_objs += eal_common_arch_objs
> 
>  common_headers = files(
>  	'include/rte_alarm.h',
> +	'include/rte_bitmap.h',
> +	'include/rte_bitops.h',
>  	'include/rte_branch_prediction.h',
>  	'include/rte_bus.h',
> -	'include/rte_bitmap.h',
>  	'include/rte_class.h',
>  	'include/rte_common.h',
>  	'include/rte_compat.h',
> --
> 2.7.4
> 

Acked-by: Morten Brørup <mb@smartsharesystems.com>


^ permalink raw reply	[flat|nested] 70+ messages in thread

* [dpdk-dev] [PATCH v5 0/6] implement common rte bit operation APIs in PMDs
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (26 preceding siblings ...)
  2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 6/6] net/hinic: " Joyce Kong
@ 2019-11-28  6:44 ` Joyce Kong
  2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 1/6] lib/eal: implement the family of rte bit operation APIs Joyce Kong
                   ` (5 subsequent siblings)
  33 siblings, 0 replies; 70+ messages in thread
From: Joyce Kong @ 2019-11-28  6:44 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

There are a lot functions of bit operations scattered in PMDs,
consolidate them into a common API family and applied in different
PMDs to reduce code duplication.

v5:
 Correct the spelling mistake in test_bitops.c

v4:
  Introduce uint32_t/uint64_t *addr when definiting bit operation APIs(suggested by
  Morten Brørup).

v3:
  1. Change the API's head file back to rte_bitops.h, then implement both 32-bit and
     64-bit operations with and without C11 atomic memory ordering.
  2. Add multi-core test case for bit operations which implemented with memory ordering.
  3. Modify the doc of both APIs and test cases.

v2:
  1. Add doxygen comments for the rte bit operation API(suggested by Stephen Hemminger).
  2. Add test cases for common rte bit operation API(suggested by Stephen Hemminger).
  3. Change the header file to rte_io_bitops.h and the operation to rte_io_set_bit()etc.,
     as the API uses barriers inside and the barriers are only needed for IO operations
     (suggested by Jerin Jacob).
  4. Use an well defined uint_NN_t type(suggested by Morten Brørup).

Joyce Kong (6):
  lib/eal: implement the family of rte bit operation APIs
  test/bitops: add bit operation test case
  net/axgbe: use common rte bit operation APIs instead
  net/bnx2x: use common rte bit operation APIs instead
  net/qede: use common rte bit operation APIs instead
  net/hinic: use common rte bit operation APIs instead

 MAINTAINERS                                |   5 +
 app/test/Makefile                          |   1 +
 app/test/autotest_data.py                  |   6 +
 app/test/meson.build                       |   2 +
 app/test/test_bitops.c                     | 305 +++++++++++++++++++
 doc/api/doxy-api-index.md                  |   5 +-
 drivers/net/axgbe/axgbe_common.h           |  29 +-
 drivers/net/axgbe/axgbe_ethdev.c           |  14 +-
 drivers/net/axgbe/axgbe_mdio.c             |  14 +-
 drivers/net/bnx2x/bnx2x.c                  | 209 ++++++-------
 drivers/net/bnx2x/bnx2x.h                  |   4 -
 drivers/net/bnx2x/ecore_sp.h               |   9 +-
 drivers/net/hinic/Makefile                 |   1 +
 drivers/net/hinic/base/hinic_compat.h      |  33 +-
 drivers/net/hinic/hinic_pmd_ethdev.c       |  16 +-
 drivers/net/hinic/meson.build              |   2 +
 drivers/net/qede/base/bcm_osal.c           |  20 --
 drivers/net/qede/base/bcm_osal.h           |  10 +-
 lib/librte_eal/common/Makefile             |   1 +
 lib/librte_eal/common/include/rte_bitops.h | 474 +++++++++++++++++++++++++++++
 lib/librte_eal/common/meson.build          |   3 +-
 21 files changed, 928 insertions(+), 235 deletions(-)
 create mode 100644 app/test/test_bitops.c
 create mode 100644 lib/librte_eal/common/include/rte_bitops.h

-- 
2.7.4


^ permalink raw reply	[flat|nested] 70+ messages in thread

* [dpdk-dev] [PATCH v5 1/6] lib/eal: implement the family of rte bit operation APIs
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (27 preceding siblings ...)
  2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 0/6] implement common rte bit operation APIs in PMDs Joyce Kong
@ 2019-11-28  6:44 ` Joyce Kong
  2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 2/6] test/bitops: add bit operation test case Joyce Kong
                   ` (4 subsequent siblings)
  33 siblings, 0 replies; 70+ messages in thread
From: Joyce Kong @ 2019-11-28  6:44 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

There are a lot functions of bit operations scattered and
duplicated in PMDs, consolidating them into a common API
family is necessary. Furthermore, when the bit operation
is applied to the IO devices, use __ATOMIC_ACQ_REL to
ensure the ordering for io bit operation.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
---
 MAINTAINERS                                |   5 +
 doc/api/doxy-api-index.md                  |   5 +-
 lib/librte_eal/common/Makefile             |   1 +
 lib/librte_eal/common/include/rte_bitops.h | 474 +++++++++++++++++++++++++++++
 lib/librte_eal/common/meson.build          |   3 +-
 5 files changed, 485 insertions(+), 3 deletions(-)
 create mode 100644 lib/librte_eal/common/include/rte_bitops.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 4a0c9a4..043902b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -236,6 +236,11 @@ M: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
 F: lib/librte_eal/common/include/rte_bitmap.h
 F: app/test/test_bitmap.c
 
+Bitops
+M: Joyce Kong <joyce.kong@arm.com>
+F: lib/librte_eal/common/include/rte_bitops.h
+F: app/test/test_bitops.c
+
 MCSlock - EXPERIMENTAL
 M: Phil Yang <phil.yang@arm.com>
 F: lib/librte_eal/common/include/generic/rte_mcslock.h
diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md
index dff496b..ade7c01 100644
--- a/doc/api/doxy-api-index.md
+++ b/doc/api/doxy-api-index.md
@@ -133,12 +133,13 @@ The public API headers are grouped by topics:
   [BPF]                (@ref rte_bpf.h)
 
 - **containers**:
+  [bitmap]             (@ref rte_bitmap.h),
+  [bitops]             (@ref rte_bitops.h),
   [mbuf]               (@ref rte_mbuf.h),
   [mbuf pool ops]      (@ref rte_mbuf_pool_ops.h),
   [ring]               (@ref rte_ring.h),
   [stack]              (@ref rte_stack.h),
-  [tailq]              (@ref rte_tailq.h),
-  [bitmap]             (@ref rte_bitmap.h)
+  [tailq]              (@ref rte_tailq.h)
 
 - **packet framework**:
   * [port]             (@ref rte_port.h):
diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index c2c6d92..dd025c1 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -19,6 +19,7 @@ INC += rte_malloc.h rte_keepalive.h rte_time.h
 INC += rte_service.h rte_service_component.h
 INC += rte_bitmap.h rte_vfio.h rte_hypervisor.h rte_test.h
 INC += rte_reciprocal.h rte_fbarray.h rte_uuid.h
+INC += rte_bitops.h
 
 GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h
 GENERIC_INC += rte_memcpy.h rte_cpuflags.h
diff --git a/lib/librte_eal/common/include/rte_bitops.h b/lib/librte_eal/common/include/rte_bitops.h
new file mode 100644
index 0000000..34158d1
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_bitops.h
@@ -0,0 +1,474 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#ifndef _RTE_BITOPS_H_
+#define _RTE_BITOPS_H_
+
+/**
+ * @file
+ * Bit Operations
+ *
+ * This file defines a API for bit operations without/with memory ordering.
+ */
+
+#include <stdint.h>
+#include <rte_debug.h>
+#include <rte_compat.h>
+
+/*---------------------------- 32 bit operations ----------------------------*/
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Get the target bit from a 32-bit value without memory ordering.
+ *
+ * @param nr
+ *   The target bit to get.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The target bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_get_bit32_relaxed(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	return __atomic_load_n(addr, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Set the target bit in a 32-bit value to 1 without memory ordering.
+ *
+ * @param nr
+ *   The target bit to set.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_set_bit32_relaxed(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	__atomic_fetch_or(addr, mask, __ATOMIC_RELAXED);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Clear the target bit in a 32-bit value to 0 without memory ordering.
+ *
+ * @param nr
+ *   The target bit to clear.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_clear_bit32_relaxed(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	__atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 32-bit value, then set it to 1 without
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and set.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_test_and_set_bit32_relaxed(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	return __atomic_fetch_or(addr, mask, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 32-bit value, then clear it to 0 without
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and clear.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_test_and_clear_bit32_relaxed(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	return __atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Get the target bit from a 32-bit value with memory ordering.
+ *
+ * @param nr
+ *   The target bit to get.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The target bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_get_bit32(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	return __atomic_load_n(addr, __ATOMIC_ACQUIRE) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Set the target bit in a 32-bit value to 1 with memory ordering.
+ *
+ * @param nr
+ *   The target bit to set.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_set_bit32(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	__atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Clear the target bit in a 32-bit value to 0 with memory ordering.
+ *
+ * @param nr
+ *   The target bit to clear.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_clear_bit32(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	__atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 32-bit value, then set it to 1 with
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and set.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_test_and_set_bit32(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 32-bit value, then clear it to 0 with
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and clear.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint32_t
+rte_test_and_clear_bit32(unsigned int nr, uint32_t *addr)
+{
+	RTE_ASSERT(nr < 32);
+
+	uint32_t mask = UINT32_C(1) << nr;
+	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) & mask;
+}
+
+/*---------------------------- 64 bit operations ----------------------------*/
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Get the target bit from a 64-bit value without memory ordering.
+ *
+ * @param nr
+ *   The target bit to get.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The target bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_get_bit64_relaxed(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	return __atomic_load_n(addr, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Set the target bit in a 64-bit value to 1 without memory ordering.
+ *
+ * @param nr
+ *   The target bit to set.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_set_bit64_relaxed(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	__atomic_fetch_or(addr, mask, __ATOMIC_RELAXED);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Clear the target bit in a 64-bit value to 0 without memory ordering.
+ *
+ * @param nr
+ *   The target bit to clear.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_clear_bit64_relaxed(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	__atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 64-bit value, then set it to 1 without
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and set.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_test_and_set_bit64_relaxed(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	return __atomic_fetch_or(addr, mask, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 64-bit value, then clear it to 0 without
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and clear.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_test_and_clear_bit64_relaxed(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	return __atomic_fetch_and(addr, ~mask, __ATOMIC_RELAXED) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Get the target bit from a 64-bit value with memory ordering.
+ *
+ * @param nr
+ *   The target bit to get.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The target bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_get_bit64(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	return __atomic_load_n(addr, __ATOMIC_ACQUIRE) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Set the target bit in a 64-bit value to 1 with memory ordering.
+ *
+ * @param nr
+ *   The target bit to set.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_set_bit64(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	__atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Clear the target bit in a 64-bit value to 0 with memory ordering.
+ *
+ * @param nr
+ *   The target bit to clear.
+ * @param addr
+ *   The address holding the bit.
+ */
+__rte_experimental
+static inline void
+rte_clear_bit64(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	__atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 64-bit value, then set it to 1 with
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and set.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_test_and_set_bit64(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	return __atomic_fetch_or(addr, mask, __ATOMIC_ACQ_REL) & mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Return the original bit from a 64-bit value, then clear it to 0 with
+ * memory ordering.
+ *
+ * @param nr
+ *   The target bit to get and clear.
+ * @param addr
+ *   The address holding the bit.
+ * @return
+ *   The original bit.
+ */
+__rte_experimental
+static inline uint64_t
+rte_test_and_clear_bit64(unsigned int nr, uint64_t *addr)
+{
+	RTE_ASSERT(nr < 64);
+
+	uint64_t mask = UINT64_C(1) << nr;
+	return __atomic_fetch_and(addr, ~mask, __ATOMIC_ACQ_REL) & mask;
+}
+#endif /* _RTE_BITOPS_H_ */
diff --git a/lib/librte_eal/common/meson.build b/lib/librte_eal/common/meson.build
index 2b97715..766edbd 100644
--- a/lib/librte_eal/common/meson.build
+++ b/lib/librte_eal/common/meson.build
@@ -50,9 +50,10 @@ common_objs += eal_common_arch_objs
 
 common_headers = files(
 	'include/rte_alarm.h',
+	'include/rte_bitmap.h',
+	'include/rte_bitops.h',
 	'include/rte_branch_prediction.h',
 	'include/rte_bus.h',
-	'include/rte_bitmap.h',
 	'include/rte_class.h',
 	'include/rte_common.h',
 	'include/rte_compat.h',
-- 
2.7.4


^ permalink raw reply	[flat|nested] 70+ messages in thread

* [dpdk-dev] [PATCH v5 2/6] test/bitops: add bit operation test case
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (28 preceding siblings ...)
  2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 1/6] lib/eal: implement the family of rte bit operation APIs Joyce Kong
@ 2019-11-28  6:44 ` Joyce Kong
  2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 3/6] net/axgbe: use common rte bit operation APIs instead Joyce Kong
                   ` (3 subsequent siblings)
  33 siblings, 0 replies; 70+ messages in thread
From: Joyce Kong @ 2019-11-28  6:44 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

Add test cases for set bit, clear bit, test and set bit,
test and clear bit operations.

Change-Id: I5a00e885ea8455636c11cb9455b7e6102c9312b4
Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
---
 app/test/Makefile         |   1 +
 app/test/autotest_data.py |   6 +
 app/test/meson.build      |   2 +
 app/test/test_bitops.c    | 305 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 314 insertions(+)
 create mode 100644 app/test/test_bitops.c

diff --git a/app/test/Makefile b/app/test/Makefile
index 57930c0..4f33274 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -70,6 +70,7 @@ SRCS-y += test_ticketlock.c
 SRCS-y += test_memory.c
 SRCS-y += test_memzone.c
 SRCS-y += test_bitmap.c
+SRCS-y += test_bitops.c
 SRCS-y += test_reciprocal_division.c
 SRCS-y += test_reciprocal_division_perf.c
 SRCS-y += test_fbarray.c
diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py
index 6deb97b..7db2df1 100644
--- a/app/test/autotest_data.py
+++ b/app/test/autotest_data.py
@@ -405,6 +405,12 @@
         "Report":  None,
     },
     {
+        "Name":    "Bitops test",
+        "Command": "bitops_autotest",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
+    {
         "Name":    "Hash multiwriter autotest",
         "Command": "hash_multiwriter_autotest",
         "Func":    default_autotest,
diff --git a/app/test/meson.build b/app/test/meson.build
index fb49d80..ef340ca 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -14,6 +14,7 @@ test_sources = files('commands.c',
 	'test_atomic.c',
 	'test_barrier.c',
 	'test_bitratestats.c',
+	'test_bitops.c',
 	'test_bpf.c',
 	'test_byteorder.c',
 	'test_cmdline.c',
@@ -167,6 +168,7 @@ fast_test_names = [
         'alarm_autotest',
         'atomic_autotest',
         'byteorder_autotest',
+        'bitops_autotest',
         'cmdline_autotest',
         'common_autotest',
         'cpuflags_autotest',
diff --git a/app/test/test_bitops.c b/app/test/test_bitops.c
new file mode 100644
index 0000000..2a648aa
--- /dev/null
+++ b/app/test/test_bitops.c
@@ -0,0 +1,305 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#include <rte_bitops.h>
+#include <rte_launch.h>
+#include "test.h"
+
+uint32_t val32;
+uint64_t val64;
+unsigned int synchro;
+unsigned int count32;
+unsigned int count64;
+
+#define MAX_BITS_32 32
+#define MAX_BITS_64 64
+/*
+ * Bitops functions
+ * ================
+ *
+ * - The main test function performs several subtests.
+ * - For relaxed version, check bit operations on one core.
+ *   - Initialize valXX to specified values, then set each bit of valXX
+ *     to 1 one by one in "test_bitops_set_relaxed".
+ *
+ *   - Clear each bit of valXX to 0 one by one in "test_bitops_clear_relaxed".
+ *
+ *   - Function "test_bitops_test_set_clear_relaxed" checks whether each bit
+ *     of valXX can do "test and set" and "test and clear" correctly.
+ *
+ * - For C11 atomic barrier version, check bit operations on multi cores.
+ *   - Per bit of valXX is set to 1, then cleared to 0 on each core in
+ *     "test_bitops_set_clear". The function checks that once all lcores finish
+ *     their set_clear, the value of valXX would still be zero.
+ *
+ *   - The cores are waiting for a synchro which is triggered by the main test
+ *     function. Then all cores would do "rte_test_and_set_bitXX" or
+ *     "rte_test_and_clear_bitXX" at the same time, "countXX" which is checked
+ *     as the result later would inc by one or not according to the original
+ *     bit value.
+ *
+ */
+
+static int
+test_bitops_set_relaxed(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_set_bit32_relaxed(i, &val32);
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (!rte_get_bit32_relaxed(i, &val32)) {
+			printf("Failed to set bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_set_bit64_relaxed(i, &val64);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (!rte_get_bit64_relaxed(i, &val64)) {
+			printf("Failed to set bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_bitops_clear_relaxed(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_clear_bit32_relaxed(i, &val32);
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (rte_get_bit32_relaxed(i, &val32)) {
+			printf("Failed to clear bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_clear_bit64_relaxed(i, &val64);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (rte_get_bit64_relaxed(i, &val64)) {
+			printf("Failed to clear bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_bitops_test_set_clear_relaxed(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_test_and_set_bit32_relaxed(i, &val32);
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (!rte_test_and_clear_bit32_relaxed(i, &val32)) {
+			printf("Failed to set and test bit in relaxed version.\n");
+			return TEST_FAILED;
+	}
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (rte_get_bit32_relaxed(i, &val32)) {
+			printf("Failed to test and clear bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_test_and_set_bit64_relaxed(i, &val64);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (!rte_test_and_clear_bit64_relaxed(i, &val64)) {
+			printf("Failed to set and test bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (rte_get_bit64_relaxed(i, &val64)) {
+			printf("Failed to test and clear bit in relaxed version.\n");
+			return TEST_FAILED;
+		}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_bitops_set_clear(__attribute__((unused)) void *arg)
+{
+	while (__atomic_load_n(&synchro, __ATOMIC_RELAXED) == 0)
+		;
+
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_set_bit32(i, &val32);
+	for (i = 0; i < MAX_BITS_32; i++)
+		rte_clear_bit32(i, &val32);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_set_bit64(i, &val64);
+	for (i = 0; i < MAX_BITS_64; i++)
+		rte_clear_bit64(i, &val64);
+
+	return TEST_SUCCESS;
+}
+
+/*
+ * rte_test_and_set_bitXX() returns the original bit value, then set it to 1.
+ * This functions checks that if the target bit is equal to 0, set it to 1 and
+ * increase the variable of "countXX" by one. If it is equal to 1, do nothing
+ * for "countXX". The value of "countXX" would be checked as the result later.
+ */
+static int
+test_bitops_test_set(__attribute__((unused)) void *arg)
+
+{
+	while (__atomic_load_n(&synchro, __ATOMIC_RELAXED) == 0)
+		;
+
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (!rte_test_and_set_bit32(i, &val32))
+			__atomic_fetch_add(&count32, 1, __ATOMIC_ACQ_REL);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (!rte_test_and_set_bit64(i, &val64))
+			__atomic_fetch_add(&count64, 1, __ATOMIC_ACQ_REL);
+
+	return TEST_SUCCESS;
+}
+
+/*
+ * rte_test_and_set_bitXX() returns the original bit value, then clear it to 0.
+ * This functions checks that if the target bit is equal to 1, clear it to 0 and
+ * increase the variable of "countXX" by one. If it is equal to 0, do nothing
+ * for "countXX". The value of "countXX" would be checked as the result later.
+ */
+static int
+test_bitops_test_clear(__attribute__((unused)) void *arg)
+
+{
+	while (__atomic_load_n(&synchro, __ATOMIC_RELAXED) == 0)
+		;
+
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (rte_test_and_clear_bit32(i, &val32))
+			__atomic_fetch_add(&count32, 1, __ATOMIC_ACQ_REL);
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (rte_test_and_clear_bit64(i, &val64))
+			__atomic_fetch_add(&count64, 1, __ATOMIC_ACQ_REL);
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_bitops(void)
+{
+	__atomic_store_n(&val32, 0, __ATOMIC_RELAXED);
+	__atomic_store_n(&val64, 0, __ATOMIC_RELAXED);
+	__atomic_store_n(&synchro, 0,  __ATOMIC_RELAXED);
+	__atomic_store_n(&count32, 0, __ATOMIC_RELAXED);
+	__atomic_store_n(&count64, 0, __ATOMIC_RELAXED);
+
+	if (test_bitops_set_relaxed() < 0)
+		return TEST_FAILED;
+
+	if (test_bitops_clear_relaxed() < 0)
+		return TEST_FAILED;
+
+	if (test_bitops_test_set_clear_relaxed() < 0)
+		return TEST_FAILED;
+
+
+	rte_eal_mp_remote_launch(test_bitops_set_clear, NULL, SKIP_MASTER);
+	__atomic_store_n(&synchro, 1,  __ATOMIC_RELAXED);
+	rte_eal_mp_wait_lcore();
+	__atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+
+	unsigned int i;
+
+	for (i = 0; i < MAX_BITS_32; i++)
+		if (rte_get_bit32(i, &val32)) {
+			printf("Failed to set and clear bit on multi cores.\n");
+			return TEST_FAILED;
+		}
+
+	for (i = 0; i < MAX_BITS_64; i++)
+		if (rte_get_bit64(i, &val64)) {
+			printf("Failed to set and clear bit on multi cores.\n");
+			return TEST_FAILED;
+		}
+
+	/*
+	 * Launch all slave lcores to do "rte_bitops_test_and_set_bitXX"
+	 * respectively.
+	 * Each lcore should have MAX_BITS_XX chances to check the target bit.
+	 * If it's equal to 0, set it to 1 and "countXX (which is initialized
+	 * to 0)" would be increased by one. If the target bit is 1, still set
+	 * it to 1 and do nothing for "countXX". There would be only one lcore
+	 * that finds the target bit is 0.
+	 * If the final value of "countXX" is equal to MAX_BITS_XX, all slave
+	 * lcores performed "rte_bitops_test_and_set_bitXX" correctly.
+	 */
+	__atomic_store_n(&count32, 0, __ATOMIC_RELAXED);
+	__atomic_store_n(&count64, 0, __ATOMIC_RELAXED);
+
+	rte_eal_mp_remote_launch(test_bitops_test_set, NULL, SKIP_MASTER);
+	__atomic_store_n(&synchro, 1,  __ATOMIC_RELAXED);
+	rte_eal_mp_wait_lcore();
+	__atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+
+	if (__atomic_load_n(&count32, __ATOMIC_RELAXED) != MAX_BITS_32) {
+		printf("Failed to test and set on multi cores.\n");
+		return TEST_FAILED;
+	}
+	if (__atomic_load_n(&count64, __ATOMIC_RELAXED) != MAX_BITS_64) {
+		printf("Failed to test and set on multi cores.\n");
+		return TEST_FAILED;
+	}
+
+	/*
+	 * Launch all slave lcores to do "rte_bitops_test_and_clear_bitXX"
+	 * respectively.
+	 * Each lcore should have MAX_BITS_XX chances to check the target bit.
+	 * If it's equal to 1, clear it to 0 and "countXX (which is initialized
+	 * to 0)" would be increased by one. If the target bit is 0, still clear
+	 * it to 0 and do nothing for "countXX". There would be only one lcore
+	 * that finds the target bit is 1.
+	 * If the final value of "countXX" is equal to MAX_BITS_XX, all slave
+	 * lcores performed "rte_bitops_test_and_clear_bitXX" correctly.
+	 */
+
+	__atomic_store_n(&count32, 0, __ATOMIC_RELAXED);
+	__atomic_store_n(&count64, 0, __ATOMIC_RELAXED);
+
+	rte_eal_mp_remote_launch(test_bitops_test_clear, NULL, SKIP_MASTER);
+	__atomic_store_n(&synchro, 1,  __ATOMIC_RELAXED);
+	rte_eal_mp_wait_lcore();
+	__atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+
+	if (__atomic_load_n(&count32, __ATOMIC_RELAXED) != MAX_BITS_32) {
+		printf("Failed to test and clear on multi cores.\n");
+		return TEST_FAILED;
+	}
+	if (__atomic_load_n(&count64, __ATOMIC_RELAXED) != MAX_BITS_64) {
+		printf("Failed to test and clear on multi cores.\n");
+		return TEST_FAILED;
+	}
+
+	return TEST_SUCCESS;
+}
+
+REGISTER_TEST_COMMAND(bitops_autotest, test_bitops);
-- 
2.7.4


^ permalink raw reply	[flat|nested] 70+ messages in thread

* [dpdk-dev] [PATCH v5 3/6] net/axgbe: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (29 preceding siblings ...)
  2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 2/6] test/bitops: add bit operation test case Joyce Kong
@ 2019-11-28  6:44 ` Joyce Kong
  2019-12-02  6:09   ` Gavin Hu (Arm Technology China)
  2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 4/6] net/bnx2x: use common rte bit operation " Joyce Kong
                   ` (2 subsequent siblings)
  33 siblings, 1 reply; 70+ messages in thread
From: Joyce Kong @ 2019-11-28  6:44 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/axgbe/axgbe_common.h | 29 +----------------------------
 drivers/net/axgbe/axgbe_ethdev.c | 14 +++++++-------
 drivers/net/axgbe/axgbe_mdio.c   | 14 +++++++-------
 3 files changed, 15 insertions(+), 42 deletions(-)

diff --git a/drivers/net/axgbe/axgbe_common.h b/drivers/net/axgbe/axgbe_common.h
index 34f60f1..9cabda8 100644
--- a/drivers/net/axgbe/axgbe_common.h
+++ b/drivers/net/axgbe/axgbe_common.h
@@ -22,6 +22,7 @@
 #include <pthread.h>
 
 #include <rte_byteorder.h>
+#include <rte_bitops.h>
 #include <rte_memory.h>
 #include <rte_malloc.h>
 #include <rte_hexdump.h>
@@ -1674,34 +1675,6 @@ do {									\
 #define time_after_eq(a, b)     ((long)((a) - (b)) >= 0)
 #define time_before_eq(a, b)	time_after_eq(b, a)
 
-/*---bitmap support apis---*/
-static inline int axgbe_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	rte_mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	rte_mb();
-	return res;
-}
-
-static inline void axgbe_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-static inline void axgbe_clear_bit(int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-static inline int axgbe_test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-
-	return __sync_fetch_and_and(addr, ~mask) & mask;
-}
-
 static inline unsigned long msecs_to_timer_cycles(unsigned int m)
 {
 	return rte_get_timer_hz() * (m / 1000);
diff --git a/drivers/net/axgbe/axgbe_ethdev.c b/drivers/net/axgbe/axgbe_ethdev.c
index d1f160e..fa597f3 100644
--- a/drivers/net/axgbe/axgbe_ethdev.c
+++ b/drivers/net/axgbe/axgbe_ethdev.c
@@ -201,8 +201,8 @@ axgbe_dev_start(struct rte_eth_dev *dev)
 	axgbe_dev_enable_tx(dev);
 	axgbe_dev_enable_rx(dev);
 
-	axgbe_clear_bit(AXGBE_STOPPED, &pdata->dev_state);
-	axgbe_clear_bit(AXGBE_DOWN, &pdata->dev_state);
+	rte_clear_bit64(AXGBE_STOPPED, &pdata->dev_state);
+	rte_clear_bit64(AXGBE_DOWN, &pdata->dev_state);
 	return 0;
 }
 
@@ -216,17 +216,17 @@ axgbe_dev_stop(struct rte_eth_dev *dev)
 
 	rte_intr_disable(&pdata->pci_dev->intr_handle);
 
-	if (axgbe_test_bit(AXGBE_STOPPED, &pdata->dev_state))
+	if (rte_get_bit64(AXGBE_STOPPED, &pdata->dev_state))
 		return;
 
-	axgbe_set_bit(AXGBE_STOPPED, &pdata->dev_state);
+	rte_set_bit64(AXGBE_STOPPED, &pdata->dev_state);
 	axgbe_dev_disable_tx(dev);
 	axgbe_dev_disable_rx(dev);
 
 	pdata->phy_if.phy_stop(pdata);
 	pdata->hw_if.exit(pdata);
 	memset(&dev->data->dev_link, 0, sizeof(struct rte_eth_link));
-	axgbe_set_bit(AXGBE_DOWN, &pdata->dev_state);
+	rte_set_bit64(AXGBE_DOWN, &pdata->dev_state);
 }
 
 /* Clear all resources like TX/RX queues. */
@@ -598,8 +598,8 @@ eth_axgbe_dev_init(struct rte_eth_dev *eth_dev)
 
 	pdata = eth_dev->data->dev_private;
 	/* initial state */
-	axgbe_set_bit(AXGBE_DOWN, &pdata->dev_state);
-	axgbe_set_bit(AXGBE_STOPPED, &pdata->dev_state);
+	rte_set_bit64(AXGBE_DOWN, &pdata->dev_state);
+	rte_set_bit64(AXGBE_STOPPED, &pdata->dev_state);
 	pdata->eth_dev = eth_dev;
 
 	pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
diff --git a/drivers/net/axgbe/axgbe_mdio.c b/drivers/net/axgbe/axgbe_mdio.c
index 2721e5c..00394a7 100644
--- a/drivers/net/axgbe/axgbe_mdio.c
+++ b/drivers/net/axgbe/axgbe_mdio.c
@@ -743,7 +743,7 @@ static int __axgbe_phy_config_aneg(struct axgbe_port *pdata)
 {
 	int ret;
 
-	axgbe_set_bit(AXGBE_LINK_INIT, &pdata->dev_state);
+	rte_set_bit64(AXGBE_LINK_INIT, &pdata->dev_state);
 	pdata->link_check = rte_get_timer_cycles();
 
 	ret = pdata->phy_if.phy_impl.an_config(pdata);
@@ -807,9 +807,9 @@ static int axgbe_phy_config_aneg(struct axgbe_port *pdata)
 
 	ret = __axgbe_phy_config_aneg(pdata);
 	if (ret)
-		axgbe_set_bit(AXGBE_LINK_ERR, &pdata->dev_state);
+		rte_set_bit64(AXGBE_LINK_ERR, &pdata->dev_state);
 	else
-		axgbe_clear_bit(AXGBE_LINK_ERR, &pdata->dev_state);
+		rte_clear_bit64(AXGBE_LINK_ERR, &pdata->dev_state);
 
 	pthread_mutex_unlock(&pdata->an_mutex);
 
@@ -880,7 +880,7 @@ static void axgbe_phy_status(struct axgbe_port *pdata)
 	unsigned int link_aneg;
 	int an_restart;
 
-	if (axgbe_test_bit(AXGBE_LINK_ERR, &pdata->dev_state)) {
+	if (rte_get_bit64(AXGBE_LINK_ERR, &pdata->dev_state)) {
 		pdata->phy.link = 0;
 		goto adjust_link;
 	}
@@ -900,10 +900,10 @@ static void axgbe_phy_status(struct axgbe_port *pdata)
 			return;
 		}
 		axgbe_phy_status_result(pdata);
-		if (axgbe_test_bit(AXGBE_LINK_INIT, &pdata->dev_state))
-			axgbe_clear_bit(AXGBE_LINK_INIT, &pdata->dev_state);
+		if (rte_get_bit64(AXGBE_LINK_INIT, &pdata->dev_state))
+			rte_clear_bit64(AXGBE_LINK_INIT, &pdata->dev_state);
 	} else {
-		if (axgbe_test_bit(AXGBE_LINK_INIT, &pdata->dev_state)) {
+		if (rte_get_bit64(AXGBE_LINK_INIT, &pdata->dev_state)) {
 			axgbe_check_link_timeout(pdata);
 
 			if (link_aneg)
-- 
2.7.4


^ permalink raw reply	[flat|nested] 70+ messages in thread

* [dpdk-dev] [PATCH v5 4/6] net/bnx2x: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (30 preceding siblings ...)
  2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 3/6] net/axgbe: use common rte bit operation APIs instead Joyce Kong
@ 2019-11-28  6:44 ` " Joyce Kong
  2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 5/6] net/qede: " Joyce Kong
  2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 6/6] net/hinic: " Joyce Kong
  33 siblings, 0 replies; 70+ messages in thread
From: Joyce Kong @ 2019-11-28  6:44 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/bnx2x/bnx2x.c    | 209 +++++++++++++++++++------------------------
 drivers/net/bnx2x/bnx2x.h    |   4 -
 drivers/net/bnx2x/ecore_sp.h |   9 +-
 3 files changed, 98 insertions(+), 124 deletions(-)

diff --git a/drivers/net/bnx2x/bnx2x.c b/drivers/net/bnx2x/bnx2x.c
index ed31335..1c00a67 100644
--- a/drivers/net/bnx2x/bnx2x.c
+++ b/drivers/net/bnx2x/bnx2x.c
@@ -26,6 +26,7 @@
 #include <fcntl.h>
 #include <zlib.h>
 #include <rte_string_fns.h>
+#include <rte_bitops.h>
 
 #define BNX2X_PMD_VER_PREFIX "BNX2X PMD"
 #define BNX2X_PMD_VERSION_MAJOR 1
@@ -129,32 +130,6 @@ static void bnx2x_ack_sb(struct bnx2x_softc *sc, uint8_t igu_sb_id,
 			 uint8_t storm, uint16_t index, uint8_t op,
 			 uint8_t update);
 
-int bnx2x_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	mb();
-	return res;
-}
-
-void bnx2x_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-void bnx2x_clear_bit(int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-int bnx2x_test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-	return __sync_fetch_and_and(addr, ~mask) & mask;
-}
-
 int bnx2x_cmpxchg(volatile int *addr, int old, int new)
 {
 	return __sync_val_compare_and_swap(addr, old, new);
@@ -1427,11 +1402,11 @@ bnx2x_del_all_macs(struct bnx2x_softc *sc, struct ecore_vlan_mac_obj *mac_obj,
 
 	/* wait for completion of requested */
 	if (wait_for_comp) {
-		bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+		rte_set_bit64(RAMROD_COMP_WAIT, &ramrod_flags);
 	}
 
 	/* Set the mac type of addresses we want to clear */
-	bnx2x_set_bit(mac_type, &vlan_mac_flags);
+	rte_set_bit64(mac_type, &vlan_mac_flags);
 
 	rc = mac_obj->delete_all(sc, mac_obj, &vlan_mac_flags, &ramrod_flags);
 	if (rc < 0)
@@ -1458,26 +1433,26 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 		break;
 
 	case BNX2X_RX_MODE_NORMAL:
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_MULTICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_UNICAST, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_MULTICAST, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
 
 		/* internal switching mode */
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_MULTICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_UNICAST, tx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_MULTICAST, tx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
 
 		break;
 
 	case BNX2X_RX_MODE_ALLMULTI:
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_UNICAST, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
 
 		/* internal switching mode */
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_UNICAST, tx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
 
 		break;
 
@@ -1488,19 +1463,20 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 		 * should receive matched and unmatched (in resolution of port)
 		 * unicast packets.
 		 */
-		bnx2x_set_bit(ECORE_ACCEPT_UNMATCHED, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_UNMATCHED, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_UNICAST, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_BROADCAST, rx_accept_flags);
 
 		/* internal switching mode */
-		bnx2x_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_BROADCAST, tx_accept_flags);
 
 		if (IS_MF_SI(sc)) {
-			bnx2x_set_bit(ECORE_ACCEPT_ALL_UNICAST, tx_accept_flags);
+			rte_set_bit64(ECORE_ACCEPT_ALL_UNICAST,
+					tx_accept_flags);
 		} else {
-			bnx2x_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags);
+			rte_set_bit64(ECORE_ACCEPT_UNICAST, tx_accept_flags);
 		}
 
 		break;
@@ -1512,8 +1488,8 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 
 	/* Set ACCEPT_ANY_VLAN as we do not enable filtering by VLAN */
 	if (rx_mode != BNX2X_RX_MODE_NONE) {
-		bnx2x_set_bit(ECORE_ACCEPT_ANY_VLAN, rx_accept_flags);
-		bnx2x_set_bit(ECORE_ACCEPT_ANY_VLAN, tx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_ANY_VLAN, rx_accept_flags);
+		rte_set_bit64(ECORE_ACCEPT_ANY_VLAN, tx_accept_flags);
 	}
 
 	return 0;
@@ -1542,7 +1518,7 @@ bnx2x_set_q_rx_mode(struct bnx2x_softc *sc, uint8_t cl_id,
 	ramrod_param.rdata = BNX2X_SP(sc, rx_mode_rdata);
 	ramrod_param.rdata_mapping =
 	    (rte_iova_t)BNX2X_SP_MAPPING(sc, rx_mode_rdata),
-	    bnx2x_set_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
+	    rte_set_bit64(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
 
 	ramrod_param.ramrod_flags = ramrod_flags;
 	ramrod_param.rx_mode_flags = rx_mode_flags;
@@ -1571,9 +1547,9 @@ int bnx2x_set_storm_rx_mode(struct bnx2x_softc *sc)
 		return rc;
 	}
 
-	bnx2x_set_bit(RAMROD_RX, &ramrod_flags);
-	bnx2x_set_bit(RAMROD_TX, &ramrod_flags);
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	rte_set_bit64(RAMROD_RX, &ramrod_flags);
+	rte_set_bit64(RAMROD_TX, &ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &ramrod_flags);
 
 	return bnx2x_set_q_rx_mode(sc, sc->fp[0].cl_id, rx_mode_flags,
 				 rx_accept_flags, tx_accept_flags,
@@ -1698,7 +1674,7 @@ static int bnx2x_func_wait_started(struct bnx2x_softc *sc)
 			    "Forcing STARTED-->TX_STOPPED-->STARTED");
 
 		func_params.f_obj = &sc->func_obj;
-		bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
+		rte_set_bit64(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
 
 		/* STARTED-->TX_STOPPED */
 		func_params.cmd = ECORE_F_CMD_TX_STOP;
@@ -1722,7 +1698,7 @@ static int bnx2x_stop_queue(struct bnx2x_softc *sc, int index)
 
 	q_params.q_obj = &sc->sp_objs[fp->index].q_obj;
 	/* We want to wait for completion in this context */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
 
 	/* Stop the primary connection: */
 
@@ -1783,7 +1759,7 @@ static int bnx2x_func_stop(struct bnx2x_softc *sc)
 	int rc;
 
 	/* prepare parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_STOP;
 
@@ -1797,7 +1773,7 @@ static int bnx2x_func_stop(struct bnx2x_softc *sc)
 	if (rc) {
 		PMD_DRV_LOG(NOTICE, sc, "FUNC_STOP ramrod failed. "
 			    "Running a dry transaction");
-		bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
+		rte_set_bit64(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
 		return ecore_func_state_change(sc, &func_params);
 	}
 
@@ -1809,7 +1785,7 @@ static int bnx2x_reset_hw(struct bnx2x_softc *sc, uint32_t load_code)
 	struct ecore_func_state_params func_params = { NULL };
 
 	/* Prepare parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_HW_RESET;
@@ -1866,11 +1842,10 @@ bnx2x_chip_cleanup(struct bnx2x_softc *sc, uint32_t unload_mode, uint8_t keep_li
 	 * a race between the completion code and this code.
 	 */
 
-	if (bnx2x_test_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state)) {
-		bnx2x_set_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state);
-	} else {
+	if (rte_get_bit64(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state))
+		rte_set_bit64(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state);
+	else
 		bnx2x_set_storm_rx_mode(sc);
-	}
 
 	/* Clean up multicast configuration */
 	rparam.mcast_obj = &sc->mcast_obj;
@@ -1960,12 +1935,12 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 	/* Cleanup MACs' object first... */
 
 	/* Wait for completion of requested */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &ramrod_flags);
 	/* Perform a dry cleanup */
-	bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &ramrod_flags);
+	rte_set_bit64(RAMROD_DRV_CLR_ONLY, &ramrod_flags);
 
 	/* Clean ETH primary MAC */
-	bnx2x_set_bit(ECORE_ETH_MAC, &vlan_mac_flags);
+	rte_set_bit64(ECORE_ETH_MAC, &vlan_mac_flags);
 	rc = mac_obj->delete_all(sc, &sc->sp_objs->mac_obj, &vlan_mac_flags,
 				 &ramrod_flags);
 	if (rc != 0) {
@@ -1974,7 +1949,7 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 
 	/* Cleanup UC list */
 	vlan_mac_flags = 0;
-	bnx2x_set_bit(ECORE_UC_LIST_MAC, &vlan_mac_flags);
+	rte_set_bit64(ECORE_UC_LIST_MAC, &vlan_mac_flags);
 	rc = mac_obj->delete_all(sc, mac_obj, &vlan_mac_flags, &ramrod_flags);
 	if (rc != 0) {
 		PMD_DRV_LOG(NOTICE, sc,
@@ -1984,7 +1959,7 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
 	/* Now clean mcast object... */
 
 	rparam.mcast_obj = &sc->mcast_obj;
-	bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags);
+	rte_set_bit64(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags);
 
 	/* Add a DEL command... */
 	rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_DEL);
@@ -4288,7 +4263,7 @@ bnx2x_handle_classification_eqe(struct bnx2x_softc *sc, union event_ring_elem *e
 	struct ecore_vlan_mac_obj *vlan_mac_obj;
 
 	/* always push next commands out, don't wait here */
-	bnx2x_set_bit(RAMROD_CONT, &ramrod_flags);
+	rte_set_bit64(RAMROD_CONT, &ramrod_flags);
 
 	switch (le32toh(elem->message.data.eth_event.echo) >> BNX2X_SWCID_SHIFT) {
 	case ECORE_FILTER_MAC_PENDING:
@@ -4319,12 +4294,12 @@ bnx2x_handle_classification_eqe(struct bnx2x_softc *sc, union event_ring_elem *e
 
 static void bnx2x_handle_rx_mode_eqe(struct bnx2x_softc *sc)
 {
-	bnx2x_clear_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
+	rte_clear_bit64(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
 
 	/* send rx_mode command again if was requested */
-	if (bnx2x_test_and_clear_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state)) {
+	if (rte_test_and_clear_bit64(ECORE_FILTER_RX_MODE_SCHED,
+						&sc->sp_state))
 		bnx2x_set_storm_rx_mode(sc);
-	}
 }
 
 static void bnx2x_update_eq_prod(struct bnx2x_softc *sc, uint16_t prod)
@@ -4693,7 +4668,7 @@ static int bnx2x_init_hw(struct bnx2x_softc *sc, uint32_t load_code)
 	PMD_INIT_FUNC_TRACE(sc);
 
 	/* prepare the parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_HW_INIT;
@@ -4988,8 +4963,8 @@ static void bnx2x_init_eth_fp(struct bnx2x_softc *sc, int idx)
 	bnx2x_update_fp_sb_idx(fp);
 
 	/* Configure Queue State object */
-	bnx2x_set_bit(ECORE_Q_TYPE_HAS_RX, &q_type);
-	bnx2x_set_bit(ECORE_Q_TYPE_HAS_TX, &q_type);
+	rte_set_bit64(ECORE_Q_TYPE_HAS_RX, &q_type);
+	rte_set_bit64(ECORE_Q_TYPE_HAS_TX, &q_type);
 
 	ecore_init_queue_obj(sc,
 			     &sc->sp_objs[idx].q_obj,
@@ -5803,7 +5778,7 @@ static int bnx2x_func_start(struct bnx2x_softc *sc)
 	    &func_params.params.start;
 
 	/* Prepare parameters for function state transitions */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
 
 	func_params.f_obj = &sc->func_obj;
 	func_params.cmd = ECORE_F_CMD_START;
@@ -6379,11 +6354,11 @@ bnx2x_pf_q_prep_init(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp,
 	uint8_t cos;
 	int cxt_index, cxt_offset;
 
-	bnx2x_set_bit(ECORE_Q_FLG_HC, &init_params->rx.flags);
-	bnx2x_set_bit(ECORE_Q_FLG_HC, &init_params->tx.flags);
+	rte_set_bit64(ECORE_Q_FLG_HC, &init_params->rx.flags);
+	rte_set_bit64(ECORE_Q_FLG_HC, &init_params->tx.flags);
 
-	bnx2x_set_bit(ECORE_Q_FLG_HC_EN, &init_params->rx.flags);
-	bnx2x_set_bit(ECORE_Q_FLG_HC_EN, &init_params->tx.flags);
+	rte_set_bit64(ECORE_Q_FLG_HC_EN, &init_params->rx.flags);
+	rte_set_bit64(ECORE_Q_FLG_HC_EN, &init_params->tx.flags);
 
 	/* HC rate */
 	init_params->rx.hc_rate =
@@ -6417,7 +6392,7 @@ bnx2x_get_common_flags(struct bnx2x_softc *sc, uint8_t zero_stats)
 	unsigned long flags = 0;
 
 	/* PF driver will always initialize the Queue to an ACTIVE state */
-	bnx2x_set_bit(ECORE_Q_FLG_ACTIVE, &flags);
+	rte_set_bit64(ECORE_Q_FLG_ACTIVE, &flags);
 
 	/*
 	 * tx only connections collect statistics (on the same index as the
@@ -6425,9 +6400,9 @@ bnx2x_get_common_flags(struct bnx2x_softc *sc, uint8_t zero_stats)
 	 * connection is initialized.
 	 */
 
-	bnx2x_set_bit(ECORE_Q_FLG_STATS, &flags);
+	rte_set_bit64(ECORE_Q_FLG_STATS, &flags);
 	if (zero_stats) {
-		bnx2x_set_bit(ECORE_Q_FLG_ZERO_STATS, &flags);
+		rte_set_bit64(ECORE_Q_FLG_ZERO_STATS, &flags);
 	}
 
 	/*
@@ -6435,10 +6410,10 @@ bnx2x_get_common_flags(struct bnx2x_softc *sc, uint8_t zero_stats)
 	 * CoS-ness doesn't survive the loopback
 	 */
 	if (sc->flags & BNX2X_TX_SWITCHING) {
-		bnx2x_set_bit(ECORE_Q_FLG_TX_SWITCH, &flags);
+		rte_set_bit64(ECORE_Q_FLG_TX_SWITCH, &flags);
 	}
 
-	bnx2x_set_bit(ECORE_Q_FLG_PCSUM_ON_PKT, &flags);
+	rte_set_bit64(ECORE_Q_FLG_PCSUM_ON_PKT, &flags);
 
 	return flags;
 }
@@ -6448,15 +6423,15 @@ static unsigned long bnx2x_get_q_flags(struct bnx2x_softc *sc, uint8_t leading)
 	unsigned long flags = 0;
 
 	if (IS_MF_SD(sc)) {
-		bnx2x_set_bit(ECORE_Q_FLG_OV, &flags);
+		rte_set_bit64(ECORE_Q_FLG_OV, &flags);
 	}
 
 	if (leading) {
-		bnx2x_set_bit(ECORE_Q_FLG_LEADING_RSS, &flags);
-		bnx2x_set_bit(ECORE_Q_FLG_MCAST, &flags);
+		rte_set_bit64(ECORE_Q_FLG_LEADING_RSS, &flags);
+		rte_set_bit64(ECORE_Q_FLG_MCAST, &flags);
 	}
 
-	bnx2x_set_bit(ECORE_Q_FLG_VLAN, &flags);
+	rte_set_bit64(ECORE_Q_FLG_VLAN, &flags);
 
 	/* merge with common flags */
 	return flags | bnx2x_get_common_flags(sc, TRUE);
@@ -6577,7 +6552,7 @@ bnx2x_setup_queue(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp, uint8_t lea
 	q_params.q_obj = &BNX2X_SP_OBJ(sc, fp).q_obj;
 
 	/* we want to wait for completion in this context */
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
 
 	/* prepare the INIT parameters */
 	bnx2x_pf_q_prep_init(sc, fp, &q_params.params.init);
@@ -6645,20 +6620,20 @@ bnx2x_config_rss_pf(struct bnx2x_softc *sc, struct ecore_rss_config_obj *rss_obj
 
 	params.rss_obj = rss_obj;
 
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &params.ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &params.ramrod_flags);
 
-	bnx2x_set_bit(ECORE_RSS_MODE_REGULAR, &params.rss_flags);
+	rte_set_bit64(ECORE_RSS_MODE_REGULAR, &params.rss_flags);
 
 	/* RSS configuration */
-	bnx2x_set_bit(ECORE_RSS_IPV4, &params.rss_flags);
-	bnx2x_set_bit(ECORE_RSS_IPV4_TCP, &params.rss_flags);
-	bnx2x_set_bit(ECORE_RSS_IPV6, &params.rss_flags);
-	bnx2x_set_bit(ECORE_RSS_IPV6_TCP, &params.rss_flags);
+	rte_set_bit64(ECORE_RSS_IPV4, &params.rss_flags);
+	rte_set_bit64(ECORE_RSS_IPV4_TCP, &params.rss_flags);
+	rte_set_bit64(ECORE_RSS_IPV6, &params.rss_flags);
+	rte_set_bit64(ECORE_RSS_IPV6_TCP, &params.rss_flags);
 	if (rss_obj->udp_rss_v4) {
-		bnx2x_set_bit(ECORE_RSS_IPV4_UDP, &params.rss_flags);
+		rte_set_bit64(ECORE_RSS_IPV4_UDP, &params.rss_flags);
 	}
 	if (rss_obj->udp_rss_v6) {
-		bnx2x_set_bit(ECORE_RSS_IPV6_UDP, &params.rss_flags);
+		rte_set_bit64(ECORE_RSS_IPV6_UDP, &params.rss_flags);
 	}
 
 	/* Hash bits */
@@ -6673,7 +6648,7 @@ bnx2x_config_rss_pf(struct bnx2x_softc *sc, struct ecore_rss_config_obj *rss_obj
 			params.rss_key[i] = (uint32_t) rte_rand();
 		}
 
-		bnx2x_set_bit(ECORE_RSS_SET_SRCH, &params.rss_flags);
+		rte_set_bit64(ECORE_RSS_SET_SRCH, &params.rss_flags);
 	}
 
 	if (IS_PF(sc))
@@ -6730,11 +6705,11 @@ bnx2x_set_mac_one(struct bnx2x_softc *sc, uint8_t * mac,
 	ramrod_param.ramrod_flags = *ramrod_flags;
 
 	/* fill a user request section if needed */
-	if (!bnx2x_test_bit(RAMROD_CONT, ramrod_flags)) {
+	if (!rte_get_bit64(RAMROD_CONT, ramrod_flags)) {
 		rte_memcpy(ramrod_param.user_req.u.mac.mac, mac,
 				 ETH_ALEN);
 
-		bnx2x_set_bit(mac_type, &ramrod_param.user_req.vlan_mac_flags);
+		rte_set_bit64(mac_type, &ramrod_param.user_req.vlan_mac_flags);
 
 /* Set the command: ADD or DEL */
 		ramrod_param.user_req.cmd = (set) ? ECORE_VLAN_MAC_ADD :
@@ -6761,7 +6736,7 @@ static int bnx2x_set_eth_mac(struct bnx2x_softc *sc, uint8_t set)
 
 	PMD_DRV_LOG(DEBUG, sc, "Adding Ethernet MAC");
 
-	bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	rte_set_bit64(RAMROD_COMP_WAIT, &ramrod_flags);
 
 	/* Eth MAC is set on RSS leading client (fp[0]) */
 	return bnx2x_set_mac_one(sc, sc->link_params.mac_addr,
@@ -6893,24 +6868,26 @@ bnx2x_fill_report_data(struct bnx2x_softc *sc, struct bnx2x_link_report_data *da
 
 	/* Link is down */
 	if (!sc->link_vars.link_up || (sc->flags & BNX2X_MF_FUNC_DIS)) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+		rte_set_bit64(BNX2X_LINK_REPORT_LINK_DOWN,
 			    &data->link_report_flags);
 	}
 
 	/* Full DUPLEX */
 	if (sc->link_vars.duplex == DUPLEX_FULL) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_FULL_DUPLEX,
+		rte_set_bit64(BNX2X_LINK_REPORT_FULL_DUPLEX,
 			    &data->link_report_flags);
 	}
 
 	/* Rx Flow Control is ON */
 	if (sc->link_vars.flow_ctrl & ELINK_FLOW_CTRL_RX) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_RX_FC_ON, &data->link_report_flags);
+		rte_set_bit64(BNX2X_LINK_REPORT_RX_FC_ON,
+				&data->link_report_flags);
 	}
 
 	/* Tx Flow Control is ON */
 	if (sc->link_vars.flow_ctrl & ELINK_FLOW_CTRL_TX) {
-		bnx2x_set_bit(BNX2X_LINK_REPORT_TX_FC_ON, &data->link_report_flags);
+		rte_set_bit64(BNX2X_LINK_REPORT_TX_FC_ON,
+				&data->link_report_flags);
 	}
 }
 
@@ -6929,9 +6906,9 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
 
 	/* Don't report link down or exactly the same link status twice */
 	if (!memcmp(&cur_data, &sc->last_reported_link, sizeof(cur_data)) ||
-	    (bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+	    (rte_get_bit64(BNX2X_LINK_REPORT_LINK_DOWN,
 			  &sc->last_reported_link.link_report_flags) &&
-	     bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+	     rte_get_bit64(BNX2X_LINK_REPORT_LINK_DOWN,
 			  &cur_data.link_report_flags))) {
 		return;
 	}
@@ -6946,14 +6923,14 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
 	/* report new link params and remember the state for the next time */
 	rte_memcpy(&sc->last_reported_link, &cur_data, sizeof(cur_data));
 
-	if (bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
+	if (rte_get_bit64(BNX2X_LINK_REPORT_LINK_DOWN,
 			 &cur_data.link_report_flags)) {
 		ELINK_DEBUG_P0(sc, "NIC Link is Down");
 	} else {
 		__rte_unused const char *duplex;
 		__rte_unused const char *flow;
 
-		if (bnx2x_test_and_clear_bit(BNX2X_LINK_REPORT_FULL_DUPLEX,
+		if (rte_test_and_clear_bit64(BNX2X_LINK_REPORT_FULL_DUPLEX,
 					   &cur_data.link_report_flags)) {
 			duplex = "full";
 				ELINK_DEBUG_P0(sc, "link set to full duplex");
@@ -6968,19 +6945,19 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
  * enabled.
  */
 		if (cur_data.link_report_flags) {
-			if (bnx2x_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+			if (rte_get_bit64(BNX2X_LINK_REPORT_RX_FC_ON,
 					 &cur_data.link_report_flags) &&
-			    bnx2x_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+			    rte_get_bit64(BNX2X_LINK_REPORT_TX_FC_ON,
 					 &cur_data.link_report_flags)) {
 				flow = "ON - receive & transmit";
-			} else if (bnx2x_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+			} else if (rte_get_bit64(BNX2X_LINK_REPORT_RX_FC_ON,
 						&cur_data.link_report_flags) &&
-				   !bnx2x_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+				   !rte_get_bit64(BNX2X_LINK_REPORT_TX_FC_ON,
 						 &cur_data.link_report_flags)) {
 				flow = "ON - receive";
-			} else if (!bnx2x_test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
+			} else if (!rte_get_bit64(BNX2X_LINK_REPORT_RX_FC_ON,
 						 &cur_data.link_report_flags) &&
-				   bnx2x_test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
+				   rte_get_bit64(BNX2X_LINK_REPORT_TX_FC_ON,
 						&cur_data.link_report_flags)) {
 				flow = "ON - transmit";
 			} else {
diff --git a/drivers/net/bnx2x/bnx2x.h b/drivers/net/bnx2x/bnx2x.h
index 3383c76..e6e66e8 100644
--- a/drivers/net/bnx2x/bnx2x.h
+++ b/drivers/net/bnx2x/bnx2x.h
@@ -1809,10 +1809,6 @@ static const uint32_t dmae_reg_go_c[] = {
 #define PCI_PM_D0    1
 #define PCI_PM_D3hot 2
 
-int  bnx2x_test_bit(int nr, volatile unsigned long * addr);
-void bnx2x_set_bit(unsigned int nr, volatile unsigned long * addr);
-void bnx2x_clear_bit(int nr, volatile unsigned long * addr);
-int  bnx2x_test_and_clear_bit(int nr, volatile unsigned long * addr);
 int  bnx2x_cmpxchg(volatile int *addr, int old, int new);
 
 int bnx2x_dma_alloc(struct bnx2x_softc *sc, size_t size,
diff --git a/drivers/net/bnx2x/ecore_sp.h b/drivers/net/bnx2x/ecore_sp.h
index cc1db37..efbfdad 100644
--- a/drivers/net/bnx2x/ecore_sp.h
+++ b/drivers/net/bnx2x/ecore_sp.h
@@ -15,6 +15,7 @@
 #define ECORE_SP_H
 
 #include <rte_byteorder.h>
+#include <rte_bitops.h>
 
 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
 #ifndef __LITTLE_ENDIAN
@@ -73,10 +74,10 @@ typedef rte_spinlock_t ECORE_MUTEX_SPIN;
 #define ECORE_SET_BIT_NA(bit, var)         (*var |= (1 << bit))
 #define ECORE_CLEAR_BIT_NA(bit, var)       (*var &= ~(1 << bit))
 
-#define ECORE_TEST_BIT(bit, var)           bnx2x_test_bit(bit, var)
-#define ECORE_SET_BIT(bit, var)            bnx2x_set_bit(bit, var)
-#define ECORE_CLEAR_BIT(bit, var)          bnx2x_clear_bit(bit, var)
-#define ECORE_TEST_AND_CLEAR_BIT(bit, var) bnx2x_test_and_clear_bit(bit, var)
+#define ECORE_TEST_BIT(bit, var)           rte_get_bit64(bit, var)
+#define ECORE_SET_BIT(bit, var)            rte_set_bit64(bit, var)
+#define ECORE_CLEAR_BIT(bit, var)          rte_clear_bit64(bit, var)
+#define ECORE_TEST_AND_CLEAR_BIT(bit, var) rte_test_and_clear_bit64(bit, var)
 
 #define atomic_load_acq_int                (int)*
 #define atomic_store_rel_int(a, v)         (*a = v)
-- 
2.7.4


^ permalink raw reply	[flat|nested] 70+ messages in thread

* [dpdk-dev] [PATCH v5 5/6] net/qede: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (31 preceding siblings ...)
  2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 4/6] net/bnx2x: use common rte bit operation " Joyce Kong
@ 2019-11-28  6:44 ` " Joyce Kong
  2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 6/6] net/hinic: " Joyce Kong
  33 siblings, 0 replies; 70+ messages in thread
From: Joyce Kong @ 2019-11-28  6:44 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/qede/base/bcm_osal.c | 20 --------------------
 drivers/net/qede/base/bcm_osal.h | 10 ++++------
 2 files changed, 4 insertions(+), 26 deletions(-)

diff --git a/drivers/net/qede/base/bcm_osal.c b/drivers/net/qede/base/bcm_osal.c
index 48d016e..19457d7 100644
--- a/drivers/net/qede/base/bcm_osal.c
+++ b/drivers/net/qede/base/bcm_osal.c
@@ -46,26 +46,6 @@ u32 qede_osal_log2(u32 val)
 	return log;
 }
 
-inline void qede_set_bit(u32 nr, unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-inline void qede_clr_bit(u32 nr, unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-inline bool qede_test_bit(u32 nr, unsigned long *addr)
-{
-	bool res;
-
-	rte_mb();
-	res = ((*addr) & (1UL << nr)) != 0;
-	rte_mb();
-	return res;
-}
-
 static inline u32 qede_ffb(unsigned long word)
 {
 	unsigned long first_bit;
diff --git a/drivers/net/qede/base/bcm_osal.h b/drivers/net/qede/base/bcm_osal.h
index 0f09557..e7a7392 100644
--- a/drivers/net/qede/base/bcm_osal.h
+++ b/drivers/net/qede/base/bcm_osal.h
@@ -8,6 +8,7 @@
 #define __BCM_OSAL_H
 
 #include <rte_byteorder.h>
+#include <rte_bitops.h>
 #include <rte_spinlock.h>
 #include <rte_malloc.h>
 #include <rte_atomic.h>
@@ -311,17 +312,14 @@ typedef struct osal_list_t {
 #define OSAL_BITS_PER_UL_MASK		(OSAL_BITS_PER_UL - 1)
 
 /* Bitops */
-void qede_set_bit(u32, unsigned long *);
 #define OSAL_SET_BIT(bit, bitmap) \
-	qede_set_bit(bit, bitmap)
+	rte_set_bit64(bit, bitmap)
 
-void qede_clr_bit(u32, unsigned long *);
 #define OSAL_CLEAR_BIT(bit, bitmap) \
-	qede_clr_bit(bit, bitmap)
+	rte_clear_bit64(bit, bitmap)
 
-bool qede_test_bit(u32, unsigned long *);
 #define OSAL_TEST_BIT(bit, bitmap) \
-	qede_test_bit(bit, bitmap)
+	rte_get_bit64(bit, bitmap)
 
 u32 qede_find_first_bit(unsigned long *, u32);
 #define OSAL_FIND_FIRST_BIT(bitmap, length) \
-- 
2.7.4


^ permalink raw reply	[flat|nested] 70+ messages in thread

* [dpdk-dev] [PATCH v5 6/6] net/hinic: use common rte bit operation APIs instead
  2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
                   ` (32 preceding siblings ...)
  2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 5/6] net/qede: " Joyce Kong
@ 2019-11-28  6:44 ` " Joyce Kong
  33 siblings, 0 replies; 70+ messages in thread
From: Joyce Kong @ 2019-11-28  6:44 UTC (permalink / raw)
  To: dev
  Cc: nd, thomas, jerinj, stephen, mb, david.marchand,
	honnappa.nagarahalli, gavin.hu, ravi1.kumar, rmody, shshaikh,
	xuanziyang2, cloud.wangxiaoyun, zhouguoyang

Remove its own bit operation APIs and use the common one,
this can reduce the code duplication largely.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 drivers/net/hinic/Makefile            |  1 +
 drivers/net/hinic/base/hinic_compat.h | 33 +--------------------------------
 drivers/net/hinic/hinic_pmd_ethdev.c  | 16 ++++++++--------
 drivers/net/hinic/meson.build         |  2 ++
 4 files changed, 12 insertions(+), 40 deletions(-)

diff --git a/drivers/net/hinic/Makefile b/drivers/net/hinic/Makefile
index 87fd843..f087baa 100644
--- a/drivers/net/hinic/Makefile
+++ b/drivers/net/hinic/Makefile
@@ -9,6 +9,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 LIB = librte_pmd_hinic.a
 
 CFLAGS += -O3
+CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += $(WERROR_FLAGS)
 
 ifeq ($(CONFIG_RTE_ARCH_ARM64),y)
diff --git a/drivers/net/hinic/base/hinic_compat.h b/drivers/net/hinic/base/hinic_compat.h
index e4a7f12..c0a0b3e 100644
--- a/drivers/net/hinic/base/hinic_compat.h
+++ b/drivers/net/hinic/base/hinic_compat.h
@@ -11,6 +11,7 @@
 #include <pthread.h>
 #include <rte_common.h>
 #include <rte_byteorder.h>
+#include <rte_bitops.h>
 #include <rte_memzone.h>
 #include <rte_memcpy.h>
 #include <rte_malloc.h>
@@ -117,38 +118,6 @@ extern int hinic_logtype;
 
 #define HINIC_PAGE_SIZE_DPDK	6
 
-static inline int hinic_test_bit(int nr, volatile unsigned long *addr)
-{
-	int res;
-
-	res = ((*addr) & (1UL << nr)) != 0;
-	return res;
-}
-
-static inline void hinic_set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_or(addr, (1UL << nr));
-}
-
-static inline void hinic_clear_bit(int nr, volatile unsigned long *addr)
-{
-	__sync_fetch_and_and(addr, ~(1UL << nr));
-}
-
-static inline int hinic_test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-
-	return __sync_fetch_and_and(addr, ~mask) & mask;
-}
-
-static inline int hinic_test_and_set_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = (1UL << nr);
-
-	return __sync_fetch_and_or(addr, mask) & mask;
-}
-
 void *dma_zalloc_coherent(void *dev, size_t size, dma_addr_t *dma_handle,
 			  gfp_t flag);
 void *dma_zalloc_coherent_aligned(void *dev, size_t size,
diff --git a/drivers/net/hinic/hinic_pmd_ethdev.c b/drivers/net/hinic/hinic_pmd_ethdev.c
index 803a39e..6858535 100644
--- a/drivers/net/hinic/hinic_pmd_ethdev.c
+++ b/drivers/net/hinic/hinic_pmd_ethdev.c
@@ -269,7 +269,7 @@ static void hinic_dev_interrupt_handler(void *param)
 	struct rte_eth_dev *dev = param;
 	struct hinic_nic_dev *nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
 
-	if (!hinic_test_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status)) {
+	if (!rte_get_bit64(HINIC_DEV_INTR_EN, &nic_dev->dev_status)) {
 		PMD_DRV_LOG(WARNING, "Device's interrupt is disabled, ignore interrupt event, dev_name: %s, port_id: %d",
 			    nic_dev->proc_dev_name, dev->data->port_id);
 		return;
@@ -1076,7 +1076,7 @@ static int hinic_dev_start(struct rte_eth_dev *dev)
 	if (dev->data->dev_conf.intr_conf.lsc != 0)
 		(void)hinic_link_update(dev, 0);
 
-	hinic_set_bit(HINIC_DEV_START, &nic_dev->dev_status);
+	rte_set_bit64(HINIC_DEV_START, &nic_dev->dev_status);
 
 	return 0;
 
@@ -1201,7 +1201,7 @@ static void hinic_dev_stop(struct rte_eth_dev *dev)
 	name = dev->data->name;
 	port_id = dev->data->port_id;
 
-	if (!hinic_test_and_clear_bit(HINIC_DEV_START, &nic_dev->dev_status)) {
+	if (!rte_test_and_clear_bit64(HINIC_DEV_START, &nic_dev->dev_status)) {
 		PMD_DRV_LOG(INFO, "Device %s already stopped", name);
 		return;
 	}
@@ -1246,7 +1246,7 @@ static void hinic_disable_interrupt(struct rte_eth_dev *dev)
 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
 	int ret, retries = 0;
 
-	hinic_clear_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
+	rte_clear_bit64(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
 
 	/* disable msix interrupt in hardware */
 	hinic_set_msix_state(nic_dev->hwdev, 0, HINIC_MSIX_DISABLE);
@@ -2845,7 +2845,7 @@ static void hinic_dev_close(struct rte_eth_dev *dev)
 {
 	struct hinic_nic_dev *nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
 
-	if (hinic_test_and_set_bit(HINIC_DEV_CLOSE, &nic_dev->dev_status)) {
+	if (rte_test_and_set_bit64(HINIC_DEV_CLOSE, &nic_dev->dev_status)) {
 		PMD_DRV_LOG(WARNING, "Device %s already closed",
 			    dev->data->name);
 		return;
@@ -3042,7 +3042,7 @@ static int hinic_func_init(struct rte_eth_dev *eth_dev)
 			    eth_dev->data->name);
 		goto enable_intr_fail;
 	}
-	hinic_set_bit(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
+	rte_set_bit64(HINIC_DEV_INTR_EN, &nic_dev->dev_status);
 
 	/* initialize filter info */
 	filter_info = &nic_dev->filter;
@@ -3054,7 +3054,7 @@ static int hinic_func_init(struct rte_eth_dev *eth_dev)
 	TAILQ_INIT(&nic_dev->filter_fdir_rule_list);
 	TAILQ_INIT(&nic_dev->hinic_flow_list);
 
-	hinic_set_bit(HINIC_DEV_INIT, &nic_dev->dev_status);
+	rte_set_bit64(HINIC_DEV_INIT, &nic_dev->dev_status);
 	PMD_DRV_LOG(INFO, "Initialize %s in primary successfully",
 		    eth_dev->data->name);
 
@@ -3110,7 +3110,7 @@ static int hinic_dev_uninit(struct rte_eth_dev *dev)
 	struct hinic_nic_dev *nic_dev;
 
 	nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
-	hinic_clear_bit(HINIC_DEV_INIT, &nic_dev->dev_status);
+	rte_clear_bit64(HINIC_DEV_INIT, &nic_dev->dev_status);
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
diff --git a/drivers/net/hinic/meson.build b/drivers/net/hinic/meson.build
index bc7e246..8c7ee9d 100644
--- a/drivers/net/hinic/meson.build
+++ b/drivers/net/hinic/meson.build
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Huawei Technologies Co., Ltd
 
+allow_experimental_apis = true
+
 subdir('base')
 objs = [base_objs]
 
-- 
2.7.4


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [dpdk-dev] [PATCH v5 3/6] net/axgbe: use common rte bit operation APIs instead
  2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 3/6] net/axgbe: use common rte bit operation APIs instead Joyce Kong
@ 2019-12-02  6:09   ` Gavin Hu (Arm Technology China)
  2019-12-02  9:12     ` Thomas Monjalon
  0 siblings, 1 reply; 70+ messages in thread
From: Gavin Hu (Arm Technology China) @ 2019-12-02  6:09 UTC (permalink / raw)
  To: Joyce Kong (Arm Technology China),
	dev, jerinj, stephen, thomas, Bruce Richardson,
	Morten Brørup
  Cc: nd, mb, david.marchand, Honnappa Nagarahalli, ravi1.kumar, rmody,
	shshaikh, xuanziyang2, cloud.wangxiaoyun, zhouguoyang, nd

Hi Bruce, Thomas,

This series of patches was reported a compilation issue[1] on 32bit Ubuntu. 
On mainstream 64-bit OS,  "unsigned long" is 64-bit in size and we uses the 64-bit variant of APIs. But the 32-bit OS expect 32-bit 'unsigned long' arguments. 
This is where the error happens. 

My question is how 32-bit OSes shall we support, put another way, can we ignore this compilation issue? 
If we still need to care, how about making 'obsolete' of 'unsigned long' and use 'uint32' instead to be multi-OS friendly? 

*Meson Build Failed #1:
OS: UB1604-32
Target:build-gcc-static
[1] http://mails.dpdk.org/archives/test-report/2019-November/109515.html 

> -----Original Message-----
> From: Joyce Kong <joyce.kong@arm.com>
> Sent: Thursday, November 28, 2019 2:44 PM
> To: dev@dpdk.org
> Cc: nd <nd@arm.com>; thomas@monjalon.net; jerinj@marvell.com;
> stephen@networkplumber.org; mb@smartsharesystems.com;
> david.marchand@redhat.com; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; Gavin Hu (Arm Technology China)
> <Gavin.Hu@arm.com>; ravi1.kumar@amd.com; rmody@marvell.com;
> shshaikh@marvell.com; xuanziyang2@huawei.com;
> cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com
> Subject: [PATCH v5 3/6] net/axgbe: use common rte bit operation APIs
> instead
> 
> Remove its own bit operation APIs and use the common one,
> this can reduce the code duplication largely.
> 
> Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> Reviewed-by: Gavin Hu <gavin.hu@arm.com>
> ---
>  drivers/net/axgbe/axgbe_common.h | 29 +----------------------------
>  drivers/net/axgbe/axgbe_ethdev.c | 14 +++++++-------
>  drivers/net/axgbe/axgbe_mdio.c   | 14 +++++++-------
>  3 files changed, 15 insertions(+), 42 deletions(-)
> 
> diff --git a/drivers/net/axgbe/axgbe_common.h
> b/drivers/net/axgbe/axgbe_common.h
> index 34f60f1..9cabda8 100644
> --- a/drivers/net/axgbe/axgbe_common.h
> +++ b/drivers/net/axgbe/axgbe_common.h
> @@ -22,6 +22,7 @@
>  #include <pthread.h>
> 
>  #include <rte_byteorder.h>
> +#include <rte_bitops.h>
>  #include <rte_memory.h>
>  #include <rte_malloc.h>
>  #include <rte_hexdump.h>
> @@ -1674,34 +1675,6 @@ do {
> 			\
>  #define time_after_eq(a, b)     ((long)((a) - (b)) >= 0)
>  #define time_before_eq(a, b)	time_after_eq(b, a)
> 
> -/*---bitmap support apis---*/
> -static inline int axgbe_test_bit(int nr, volatile unsigned long *addr)
> -{
> -	int res;
> -
> -	rte_mb();
> -	res = ((*addr) & (1UL << nr)) != 0;
> -	rte_mb();
> -	return res;
> -}
> -
> -static inline void axgbe_set_bit(unsigned int nr, volatile unsigned long *addr)
> -{
> -	__sync_fetch_and_or(addr, (1UL << nr));
> -}
> -
> -static inline void axgbe_clear_bit(int nr, volatile unsigned long *addr)
> -{
> -	__sync_fetch_and_and(addr, ~(1UL << nr));
> -}
> -
> -static inline int axgbe_test_and_clear_bit(int nr, volatile unsigned long
> *addr)
> -{
> -	unsigned long mask = (1UL << nr);
> -
> -	return __sync_fetch_and_and(addr, ~mask) & mask;
> -}
> -
>  static inline unsigned long msecs_to_timer_cycles(unsigned int m)
>  {
>  	return rte_get_timer_hz() * (m / 1000);
> diff --git a/drivers/net/axgbe/axgbe_ethdev.c
> b/drivers/net/axgbe/axgbe_ethdev.c
> index d1f160e..fa597f3 100644
> --- a/drivers/net/axgbe/axgbe_ethdev.c
> +++ b/drivers/net/axgbe/axgbe_ethdev.c
> @@ -201,8 +201,8 @@ axgbe_dev_start(struct rte_eth_dev *dev)
>  	axgbe_dev_enable_tx(dev);
>  	axgbe_dev_enable_rx(dev);
> 
> -	axgbe_clear_bit(AXGBE_STOPPED, &pdata->dev_state);
> -	axgbe_clear_bit(AXGBE_DOWN, &pdata->dev_state);
> +	rte_clear_bit64(AXGBE_STOPPED, &pdata->dev_state);
> +	rte_clear_bit64(AXGBE_DOWN, &pdata->dev_state);
>  	return 0;
>  }
> 
> @@ -216,17 +216,17 @@ axgbe_dev_stop(struct rte_eth_dev *dev)
> 
>  	rte_intr_disable(&pdata->pci_dev->intr_handle);
> 
> -	if (axgbe_test_bit(AXGBE_STOPPED, &pdata->dev_state))
> +	if (rte_get_bit64(AXGBE_STOPPED, &pdata->dev_state))
>  		return;
> 
> -	axgbe_set_bit(AXGBE_STOPPED, &pdata->dev_state);
> +	rte_set_bit64(AXGBE_STOPPED, &pdata->dev_state);
>  	axgbe_dev_disable_tx(dev);
>  	axgbe_dev_disable_rx(dev);
> 
>  	pdata->phy_if.phy_stop(pdata);
>  	pdata->hw_if.exit(pdata);
>  	memset(&dev->data->dev_link, 0, sizeof(struct rte_eth_link));
> -	axgbe_set_bit(AXGBE_DOWN, &pdata->dev_state);
> +	rte_set_bit64(AXGBE_DOWN, &pdata->dev_state);
>  }
> 
>  /* Clear all resources like TX/RX queues. */
> @@ -598,8 +598,8 @@ eth_axgbe_dev_init(struct rte_eth_dev *eth_dev)
> 
>  	pdata = eth_dev->data->dev_private;
>  	/* initial state */
> -	axgbe_set_bit(AXGBE_DOWN, &pdata->dev_state);
> -	axgbe_set_bit(AXGBE_STOPPED, &pdata->dev_state);
> +	rte_set_bit64(AXGBE_DOWN, &pdata->dev_state);
> +	rte_set_bit64(AXGBE_STOPPED, &pdata->dev_state);
>  	pdata->eth_dev = eth_dev;
> 
>  	pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
> diff --git a/drivers/net/axgbe/axgbe_mdio.c
> b/drivers/net/axgbe/axgbe_mdio.c
> index 2721e5c..00394a7 100644
> --- a/drivers/net/axgbe/axgbe_mdio.c
> +++ b/drivers/net/axgbe/axgbe_mdio.c
> @@ -743,7 +743,7 @@ static int __axgbe_phy_config_aneg(struct
> axgbe_port *pdata)
>  {
>  	int ret;
> 
> -	axgbe_set_bit(AXGBE_LINK_INIT, &pdata->dev_state);
> +	rte_set_bit64(AXGBE_LINK_INIT, &pdata->dev_state);
>  	pdata->link_check = rte_get_timer_cycles();
> 
>  	ret = pdata->phy_if.phy_impl.an_config(pdata);
> @@ -807,9 +807,9 @@ static int axgbe_phy_config_aneg(struct axgbe_port
> *pdata)
> 
>  	ret = __axgbe_phy_config_aneg(pdata);
>  	if (ret)
> -		axgbe_set_bit(AXGBE_LINK_ERR, &pdata->dev_state);
> +		rte_set_bit64(AXGBE_LINK_ERR, &pdata->dev_state);
>  	else
> -		axgbe_clear_bit(AXGBE_LINK_ERR, &pdata->dev_state);
> +		rte_clear_bit64(AXGBE_LINK_ERR, &pdata->dev_state);
> 
>  	pthread_mutex_unlock(&pdata->an_mutex);
> 
> @@ -880,7 +880,7 @@ static void axgbe_phy_status(struct axgbe_port
> *pdata)
>  	unsigned int link_aneg;
>  	int an_restart;
> 
> -	if (axgbe_test_bit(AXGBE_LINK_ERR, &pdata->dev_state)) {
> +	if (rte_get_bit64(AXGBE_LINK_ERR, &pdata->dev_state)) {
>  		pdata->phy.link = 0;
>  		goto adjust_link;
>  	}
> @@ -900,10 +900,10 @@ static void axgbe_phy_status(struct axgbe_port
> *pdata)
>  			return;
>  		}
>  		axgbe_phy_status_result(pdata);
> -		if (axgbe_test_bit(AXGBE_LINK_INIT, &pdata->dev_state))
> -			axgbe_clear_bit(AXGBE_LINK_INIT, &pdata-
> >dev_state);
> +		if (rte_get_bit64(AXGBE_LINK_INIT, &pdata->dev_state))
> +			rte_clear_bit64(AXGBE_LINK_INIT, &pdata-
> >dev_state);
>  	} else {
> -		if (axgbe_test_bit(AXGBE_LINK_INIT, &pdata->dev_state)) {
> +		if (rte_get_bit64(AXGBE_LINK_INIT, &pdata->dev_state)) {
>  			axgbe_check_link_timeout(pdata);
> 
>  			if (link_aneg)
> --
> 2.7.4


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [dpdk-dev] [PATCH v5 3/6] net/axgbe: use common rte bit operation APIs instead
  2019-12-02  6:09   ` Gavin Hu (Arm Technology China)
@ 2019-12-02  9:12     ` Thomas Monjalon
  2019-12-02  9:24       ` [dpdk-dev] [PATCH v5 3/6] net/axgbe: use common rte bitoperation " Morten Brørup
  0 siblings, 1 reply; 70+ messages in thread
From: Thomas Monjalon @ 2019-12-02  9:12 UTC (permalink / raw)
  To: Gavin Hu (Arm Technology China)
  Cc: Joyce Kong (Arm Technology China),
	dev, jerinj, stephen, Bruce Richardson, Morten Brørup, nd,
	david.marchand, Honnappa Nagarahalli, ravi1.kumar, rmody,
	shshaikh, xuanziyang2, cloud.wangxiaoyun, zhouguoyang

02/12/2019 07:09, Gavin Hu (Arm Technology China):
> Hi Bruce, Thomas,
> 
> This series of patches was reported a compilation issue[1] on 32bit Ubuntu. 
> On mainstream 64-bit OS,  "unsigned long" is 64-bit in size and we uses the 64-bit variant of APIs. But the 32-bit OS expect 32-bit 'unsigned long' arguments. 
> This is where the error happens. 

Please could you be more specific? What is the exact error?

> My question is how 32-bit OSes shall we support, put another way, can we ignore this compilation issue? 
> If we still need to care, how about making 'obsolete' of 'unsigned long' and use 'uint32' instead to be multi-OS friendly? 

Which unsigned long?
If it is in the (not merged) bit API, it can still be changed no?




^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [dpdk-dev] [PATCH v5 3/6] net/axgbe: use common rte bitoperation APIs instead
  2019-12-02  9:12     ` Thomas Monjalon
@ 2019-12-02  9:24       ` " Morten Brørup
  2019-12-02  9:30         ` Thomas Monjalon
  2019-12-02 16:53         ` Stephen Hemminger
  0 siblings, 2 replies; 70+ messages in thread
From: Morten Brørup @ 2019-12-02  9:24 UTC (permalink / raw)
  To: Thomas Monjalon, Gavin Hu (Arm Technology China)
  Cc: Joyce Kong (Arm Technology China),
	dev, jerinj, stephen, Bruce Richardson, nd, david.marchand,
	Honnappa Nagarahalli, ravi1.kumar, rmody, shshaikh, xuanziyang2,
	cloud.wangxiaoyun, zhouguoyang

Thomas,

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Thomas Monjalon
> Sent: Monday, December 2, 2019 10:12 AM
> 
> 02/12/2019 07:09, Gavin Hu (Arm Technology China):
> > Hi Bruce, Thomas,
> >
> > This series of patches was reported a compilation issue[1] on 32bit
> Ubuntu.
> > On mainstream 64-bit OS,  "unsigned long" is 64-bit in size and we
> uses the 64-bit variant of APIs. But the 32-bit OS expect 32-bit
> 'unsigned long' arguments.
> > This is where the error happens.
> 
> Please could you be more specific? What is the exact error?

The PMD has a private structure with an unsigned long field.

The patch for the PMD uses the 64 bit operations on this field. The patch fails to compile for a 32 bit target, because the struct field is only 32 bit there.

> 
> > My question is how 32-bit OSes shall we support, put another way, can
> we ignore this compilation issue?
> > If we still need to care, how about making 'obsolete' of 'unsigned
> long' and use 'uint32' instead to be multi-OS friendly?
> 
> Which unsigned long?
> If it is in the (not merged) bit API, it can still be changed no?
> 

The patch for the PMD can be changed to use the 64 or 32 bit operations depending on whether it is being compiled for a 64 or 32 bit target.

However, the question seems to be if we want to either 1) do something like that, or 2) drop support for 32 bit targets, or 3) make these target dependent fields obsolete (i.e. ban the use of unsigned long) and require explicit sizes, e.g. uint32_t.



^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [dpdk-dev] [PATCH v5 3/6] net/axgbe: use common rte bitoperation APIs instead
  2019-12-02  9:24       ` [dpdk-dev] [PATCH v5 3/6] net/axgbe: use common rte bitoperation " Morten Brørup
@ 2019-12-02  9:30         ` Thomas Monjalon
  2019-12-02 16:53         ` Stephen Hemminger
  1 sibling, 0 replies; 70+ messages in thread
From: Thomas Monjalon @ 2019-12-02  9:30 UTC (permalink / raw)
  To: Morten Brørup
  Cc: Gavin Hu (Arm Technology China),
	Joyce Kong (Arm Technology China),
	dev, jerinj, stephen, Bruce Richardson, nd, david.marchand,
	Honnappa Nagarahalli, ravi1.kumar, rmody, shshaikh, xuanziyang2,
	cloud.wangxiaoyun, zhouguoyang

02/12/2019 10:24, Morten Brørup:
> Thomas,
> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Thomas Monjalon
> > Sent: Monday, December 2, 2019 10:12 AM
> > 
> > 02/12/2019 07:09, Gavin Hu (Arm Technology China):
> > > Hi Bruce, Thomas,
> > >
> > > This series of patches was reported a compilation issue[1] on 32bit
> > Ubuntu.
> > > On mainstream 64-bit OS,  "unsigned long" is 64-bit in size and we
> > uses the 64-bit variant of APIs. But the 32-bit OS expect 32-bit
> > 'unsigned long' arguments.
> > > This is where the error happens.
> > 
> > Please could you be more specific? What is the exact error?
> 
> The PMD has a private structure with an unsigned long field.
> 
> The patch for the PMD uses the 64 bit operations on this field. The patch fails to compile for a 32 bit target, because the struct field is only 32 bit there.
> 
> > 
> > > My question is how 32-bit OSes shall we support, put another way, can
> > we ignore this compilation issue?
> > > If we still need to care, how about making 'obsolete' of 'unsigned
> > long' and use 'uint32' instead to be multi-OS friendly?
> > 
> > Which unsigned long?
> > If it is in the (not merged) bit API, it can still be changed no?
> > 
> 
> The patch for the PMD can be changed to use the 64 or 32 bit operations depending on whether it is being compiled for a 64 or 32 bit target.
> 
> However, the question seems to be if we want to either 1) do something like that, or 2) drop support for 32 bit targets, or 3) make these target dependent fields obsolete (i.e. ban the use of unsigned long) and require explicit sizes, e.g. uint32_t.

We should support both,
and use the appropriate instruction.

But I wonder why this field has not a fixed size.
It would be probably better to change the field to uint32_t or uint64_t.



^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [dpdk-dev] [PATCH v5 3/6] net/axgbe: use common rte bitoperation APIs instead
  2019-12-02  9:24       ` [dpdk-dev] [PATCH v5 3/6] net/axgbe: use common rte bitoperation " Morten Brørup
  2019-12-02  9:30         ` Thomas Monjalon
@ 2019-12-02 16:53         ` Stephen Hemminger
  2019-12-03  6:52           ` Gavin Hu (Arm Technology China)
  1 sibling, 1 reply; 70+ messages in thread
From: Stephen Hemminger @ 2019-12-02 16:53 UTC (permalink / raw)
  To: Morten Brørup
  Cc: Thomas Monjalon, Gavin Hu (Arm Technology China),
	Joyce Kong (Arm Technology China),
	dev, jerinj, Bruce Richardson, nd, david.marchand,
	Honnappa Nagarahalli, ravi1.kumar, rmody, shshaikh, xuanziyang2,
	cloud.wangxiaoyun, zhouguoyang

On Mon, 2 Dec 2019 10:24:32 +0100
Morten Brørup <mb@smartsharesystems.com> wrote:

> Thomas,
> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Thomas Monjalon
> > Sent: Monday, December 2, 2019 10:12 AM
> > 
> > 02/12/2019 07:09, Gavin Hu (Arm Technology China):  
> > > Hi Bruce, Thomas,
> > >
> > > This series of patches was reported a compilation issue[1] on 32bit  
> > Ubuntu.  
> > > On mainstream 64-bit OS,  "unsigned long" is 64-bit in size and we  
> > uses the 64-bit variant of APIs. But the 32-bit OS expect 32-bit
> > 'unsigned long' arguments.  
> > > This is where the error happens.  
> > 
> > Please could you be more specific? What is the exact error?  
> 
> The PMD has a private structure with an unsigned long field.
> 
> The patch for the PMD uses the 64 bit operations on this field. The patch fails to compile for a 32 bit target, because the struct field is only 32 bit there.
> 
> >   
> > > My question is how 32-bit OSes shall we support, put another way, can  
> > we ignore this compilation issue?  
> > > If we still need to care, how about making 'obsolete' of 'unsigned  
> > long' and use 'uint32' instead to be multi-OS friendly?
> > 
> > Which unsigned long?
> > If it is in the (not merged) bit API, it can still be changed no?
> >   
> 
> The patch for the PMD can be changed to use the 64 or 32 bit operations depending on whether it is being compiled for a 64 or 32 bit target.
> 
> However, the question seems to be if we want to either 1) do something like that, or 2) drop support for 32 bit targets, or 3) make these target dependent fields obsolete (i.e. ban the use of unsigned long) and require explicit sizes, e.g. uint32_t.

The bitop library should not assume sizeof(unsigned long) == 32 bit.

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [dpdk-dev] [PATCH v5 3/6] net/axgbe: use common rte bitoperation APIs instead
  2019-12-02 16:53         ` Stephen Hemminger
@ 2019-12-03  6:52           ` Gavin Hu (Arm Technology China)
  0 siblings, 0 replies; 70+ messages in thread
From: Gavin Hu (Arm Technology China) @ 2019-12-03  6:52 UTC (permalink / raw)
  To: Stephen Hemminger, Morten Brørup
  Cc: thomas, Joyce Kong (Arm Technology China),
	dev, jerinj, Bruce Richardson, nd, david.marchand,
	Honnappa Nagarahalli, ravi1.kumar, rmody, shshaikh, xuanziyang2,
	cloud.wangxiaoyun, zhouguoyang, Gavin Hu (Arm Technology China),
	nd



> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Tuesday, December 3, 2019 12:53 AM
> To: Morten Brørup <mb@smartsharesystems.com>
> Cc: thomas@monjalon.net; Gavin Hu (Arm Technology China)
> <Gavin.Hu@arm.com>; Joyce Kong (Arm Technology China)
> <Joyce.Kong@arm.com>; dev@dpdk.org; jerinj@marvell.com; Bruce
> Richardson <bruce.richardson@intel.com>; nd <nd@arm.com>;
> david.marchand@redhat.com; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; ravi1.kumar@amd.com;
> rmody@marvell.com; shshaikh@marvell.com; xuanziyang2@huawei.com;
> cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com
> Subject: Re: [dpdk-dev] [PATCH v5 3/6] net/axgbe: use common rte
> bitoperation APIs instead
> 
> On Mon, 2 Dec 2019 10:24:32 +0100
> Morten Brørup <mb@smartsharesystems.com> wrote:
> 
> > Thomas,
> >
> > > -----Original Message-----
> > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Thomas
> Monjalon
> > > Sent: Monday, December 2, 2019 10:12 AM
> > >
> > > 02/12/2019 07:09, Gavin Hu (Arm Technology China):
> > > > Hi Bruce, Thomas,
> > > >
> > > > This series of patches was reported a compilation issue[1] on 32bit
> > > Ubuntu.
> > > > On mainstream 64-bit OS,  "unsigned long" is 64-bit in size and we
> > > uses the 64-bit variant of APIs. But the 32-bit OS expect 32-bit
> > > 'unsigned long' arguments.
> > > > This is where the error happens.
> > >
> > > Please could you be more specific? What is the exact error?
> >
> > The PMD has a private structure with an unsigned long field.
> >
> > The patch for the PMD uses the 64 bit operations on this field. The patch
> fails to compile for a 32 bit target, because the struct field is only 32 bit there.
> >
> > >
> > > > My question is how 32-bit OSes shall we support, put another way, can
> > > we ignore this compilation issue?
> > > > If we still need to care, how about making 'obsolete' of 'unsigned
> > > long' and use 'uint32' instead to be multi-OS friendly?
> > >
> > > Which unsigned long?
> > > If it is in the (not merged) bit API, it can still be changed no?
> > >
> >
> > The patch for the PMD can be changed to use the 64 or 32 bit operations
> depending on whether it is being compiled for a 64 or 32 bit target.
> >
> > However, the question seems to be if we want to either 1) do something like
> that, or 2) drop support for 32 bit targets, or 3) make these target dependent
> fields obsolete (i.e. ban the use of unsigned long) and require explicit sizes, e.g.
> uint32_t.
> 
> The bitop library should not assume sizeof(unsigned long) == 32 bit.
As discussed, both 32-bit and 64-bit OSes should be supported, and their sizes of "unsigned long" are not fixed.
Taking all these into considerations, we will use "unsigned int" or uint32_t instead of "unsigned long" in the PMDs to be compatible across 32- or 64-bit OSes.
/Gavin

^ permalink raw reply	[flat|nested] 70+ messages in thread

end of thread, back to index

Thread overview: 70+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-15  7:49 [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Joyce Kong
2019-10-15  7:49 ` [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bit operation APIs Joyce Kong
2019-10-15 16:53   ` Stephen Hemminger
2019-10-18  9:00     ` Joyce Kong (Arm Technology China)
2019-10-16  7:54   ` Jerin Jacob
2019-10-18  9:02     ` Joyce Kong (Arm Technology China)
2019-10-23  3:12       ` Joyce Kong (Arm Technology China)
2019-10-16 19:05   ` Stephen Hemminger
2019-10-17 13:32   ` [dpdk-dev] [PATCH v1 1/5] lib/eal: implement the family of rte bitoperation APIs Morten Brørup
2019-10-18  8:58     ` Joyce Kong (Arm Technology China)
2019-10-23  3:07       ` Joyce Kong (Arm Technology China)
2019-10-23  7:45         ` Morten Brørup
2019-10-23 17:30           ` Honnappa Nagarahalli
2019-10-24  3:38             ` Gavin Hu (Arm Technology China)
2019-11-01 13:48               ` Honnappa Nagarahalli
2019-11-03 15:45                 ` Gavin Hu (Arm Technology China)
2019-10-15  7:49 ` [dpdk-dev] [PATCH v1 2/5] net/axgbe: use common rte bit operation APIs instead Joyce Kong
2019-10-15  7:49 ` [dpdk-dev] [PATCH v1 3/5] net/bnx2x: " Joyce Kong
2019-10-15  7:50 ` [dpdk-dev] [PATCH v1 4/5] net/hinic: " Joyce Kong
2019-10-15  7:50 ` [dpdk-dev] [PATCH v1 5/5] net/qede: " Joyce Kong
2019-10-15 16:51 ` [dpdk-dev] [PATCH v1 0/5] implement common rte bit operation APIs in PMDs Stephen Hemminger
2019-10-18  9:01   ` Joyce Kong (Arm Technology China)
2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 0/6] " Joyce Kong
2019-10-25 13:14   ` David Marchand
2019-10-29 16:42   ` Thomas Monjalon
2019-10-30  9:55     ` Gavin Hu (Arm Technology China)
2019-10-30 10:17       ` Thomas Monjalon
2019-10-30 12:32       ` Jerin Jacob
2019-10-30 13:02         ` Morten Brørup
2019-10-31 10:39           ` Gavin Hu (Arm Technology China)
2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 1/6] lib/eal: implement the family of rte bit operation APIs Joyce Kong
2019-10-23  3:09   ` Honnappa Nagarahalli
2019-10-23  4:56   ` Jerin Jacob
2019-10-23  7:46   ` Morten Brørup
2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 2/6] test/iobitops: add io bit operation test case Joyce Kong
2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 3/6] net/axgbe: use common rte bit operation APIs instead Joyce Kong
2019-10-23  3:16   ` Honnappa Nagarahalli
2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 4/6] net/bnx2x: " Joyce Kong
2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 5/6] net/hinic: " Joyce Kong
2019-10-23  2:54 ` [dpdk-dev] [PATCH v2 6/6] net/qede: " Joyce Kong
2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 0/6] implement common rte bit operation APIs in PMDs Joyce Kong
2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 1/6] lib/eal: implement the family of rte bit operation APIs Joyce Kong
2019-11-18 10:52   ` [dpdk-dev] [PATCH v3 1/6] lib/eal: implement the family of rte bitoperation APIs Morten Brørup
2019-11-19  9:22     ` Joyce Kong (Arm Technology China)
2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 2/6] test/bitops: add bit operation test case Joyce Kong
2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 3/6] net/axgbe: use common rte bit operation APIs instead Joyce Kong
2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 4/6] net/bnx2x: " Joyce Kong
2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 5/6] net/qede: " Joyce Kong
2019-11-18 10:06 ` [dpdk-dev] [PATCH v3 6/6] net/hinic: " Joyce Kong
2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 0/6] implement common rte bit operation APIs in PMDs Joyce Kong
2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 1/6] lib/eal: implement the family of rte bit operation APIs Joyce Kong
2019-11-20 13:40   ` [dpdk-dev] [PATCH v4 1/6] lib/eal: implement the family of rte bitoperation APIs Morten Brørup
2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 2/6] test/bitops: add bit operation test case Joyce Kong
2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 3/6] net/axgbe: use common rte bit operation APIs instead Joyce Kong
2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 4/6] net/bnx2x: " Joyce Kong
2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 5/6] net/qede: " Joyce Kong
2019-11-20 10:12 ` [dpdk-dev] [PATCH v4 6/6] net/hinic: " Joyce Kong
2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 0/6] implement common rte bit operation APIs in PMDs Joyce Kong
2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 1/6] lib/eal: implement the family of rte bit operation APIs Joyce Kong
2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 2/6] test/bitops: add bit operation test case Joyce Kong
2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 3/6] net/axgbe: use common rte bit operation APIs instead Joyce Kong
2019-12-02  6:09   ` Gavin Hu (Arm Technology China)
2019-12-02  9:12     ` Thomas Monjalon
2019-12-02  9:24       ` [dpdk-dev] [PATCH v5 3/6] net/axgbe: use common rte bitoperation " Morten Brørup
2019-12-02  9:30         ` Thomas Monjalon
2019-12-02 16:53         ` Stephen Hemminger
2019-12-03  6:52           ` Gavin Hu (Arm Technology China)
2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 4/6] net/bnx2x: use common rte bit operation " Joyce Kong
2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 5/6] net/qede: " Joyce Kong
2019-11-28  6:44 ` [dpdk-dev] [PATCH v5 6/6] net/hinic: " Joyce Kong

DPDK-dev Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/dpdk-dev/0 dpdk-dev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 dpdk-dev dpdk-dev/ https://lore.kernel.org/dpdk-dev \
		dev@dpdk.org
	public-inbox-index dpdk-dev

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.dpdk.dev


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git