From mboxrd@z Thu Jan 1 00:00:00 1970 From: Stephen Hemminger Subject: [PATCH v2 6/7] rte_sched: eliminate floating point in calculating byte clock Date: Wed, 4 Feb 2015 22:14:00 -0800 Message-ID: <1423116841-19799-6-git-send-email-stephen@networkplumber.org> References: <1423116841-19799-4-git-send-email-stephen@networkplumber.org> Cc: Stephen Hemminger To: dev-VfR2kkLFssw@public.gmane.org Return-path: In-Reply-To: <1423116841-19799-4-git-send-email-stephen-OTpzqLSitTUnbdJkjeBofR2eb7JE58TQ@public.gmane.org> List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces-VfR2kkLFssw@public.gmane.org Sender: "dev" From: Stephen Hemminger The old code was doing a floating point divide for each rte_dequeue() which is very expensive. Change to using fixed point scaled math instead. This improved performance from 5Gbit/sec to 10 Gbit/sec Signed-off-by: Stephen Hemminger --- lib/librte_sched/rte_sched.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c index 55fbc14..3023457 100644 --- a/lib/librte_sched/rte_sched.c +++ b/lib/librte_sched/rte_sched.c @@ -102,6 +102,9 @@ #define RTE_SCHED_BMP_POS_INVALID UINT32_MAX +/* For cycles_per_byte calculation */ +#define RTE_SCHED_TIME_SHIFT 20 + struct rte_sched_subport { /* Token bucket (TB) */ uint64_t tb_time; /* time of last update */ @@ -239,7 +242,7 @@ struct rte_sched_port { uint64_t time_cpu_cycles; /* Current CPU time measured in CPU cyles */ uint64_t time_cpu_bytes; /* Current CPU time measured in bytes */ uint64_t time; /* Current NIC TX time measured in bytes */ - double cycles_per_byte; /* CPU cycles per byte */ + uint32_t cycles_per_byte; /* CPU cycles per byte (scaled) */ /* Scheduling loop detection */ uint32_t pipe_loop; @@ -657,7 +660,9 @@ rte_sched_port_config(struct rte_sched_port_params *params) port->time_cpu_cycles = rte_get_tsc_cycles(); port->time_cpu_bytes = 0; port->time = 0; - port->cycles_per_byte = ((double) rte_get_tsc_hz()) / ((double) params->rate); + + port->cycles_per_byte = (rte_get_tsc_hz() << RTE_SCHED_TIME_SHIFT) + / params->rate; /* Scheduling loop detection */ port->pipe_loop = RTE_SCHED_PIPE_INVALID; @@ -2156,11 +2161,12 @@ rte_sched_port_time_resync(struct rte_sched_port *port) { uint64_t cycles = rte_get_tsc_cycles(); uint64_t cycles_diff = cycles - port->time_cpu_cycles; - double bytes_diff = ((double) cycles_diff) / port->cycles_per_byte; + uint64_t bytes_diff = (cycles_diff << RTE_SCHED_TIME_SHIFT) + / port->cycles_per_byte; /* Advance port time */ port->time_cpu_cycles = cycles; - port->time_cpu_bytes += (uint64_t) bytes_diff; + port->time_cpu_bytes += bytes_diff; if (port->time < port->time_cpu_bytes) { port->time = port->time_cpu_bytes; } -- 2.1.4