From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:48943) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1YECjP-0000B1-Qt for qemu-devel@nongnu.org; Thu, 22 Jan 2015 03:03:25 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1YECjC-0005sB-V9 for qemu-devel@nongnu.org; Thu, 22 Jan 2015 03:03:11 -0500 Received: from mail-pa0-x234.google.com ([2607:f8b0:400e:c03::234]:40270) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1YECjC-0005p9-21 for qemu-devel@nongnu.org; Thu, 22 Jan 2015 03:02:58 -0500 Received: by mail-pa0-f52.google.com with SMTP id kx10so212922pab.11 for ; Thu, 22 Jan 2015 00:02:57 -0800 (PST) From: sfeldma@gmail.com Date: Thu, 22 Jan 2015 00:03:55 -0800 Message-Id: <1421913839-22448-7-git-send-email-sfeldma@gmail.com> In-Reply-To: <1421913839-22448-1-git-send-email-sfeldma@gmail.com> References: <1421913839-22448-1-git-send-email-sfeldma@gmail.com> Subject: [Qemu-devel] [PATCH v5 06/10] rocker: add new rocker switch device List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org, jiri@resnulli.us, roopa@cumulusnetworks.com, john.fastabend@gmail.com, eblake@redhat.com, pbonzini@redhat.com, stefanha@gmail.com, dsahern@gmail.com, jasowang@redhat.com From: Scott Feldman Rocker is a simulated ethernet switch device. The device supports up to 62 front-panel ports and supports L2 switching and L3 routing functions, as well as L2/L3/L4 ACLs. The device presents a single PCI device for each switch, with a memory-mapped register space for device driver access. Rocker device is invoked with -device, for example a 4-port switch: -device rocker,name=sw1,len-ports=4,ports[0]=dev0,ports[1]=dev1, \ ports[2]=dev2,ports[3]=dev3 Each port is a netdev and can be paired with using -netdev id=. Signed-off-by: Scott Feldman Signed-off-by: Jiri Pirko --- default-configs/pci.mak | 1 + hw/net/Makefile.objs | 3 + hw/net/rocker/rocker.c | 1472 ++++++++++++++++++++++++++ hw/net/rocker/rocker.h | 76 ++ hw/net/rocker/rocker_desc.c | 379 +++++++ hw/net/rocker/rocker_desc.h | 57 + hw/net/rocker/rocker_fp.c | 232 +++++ hw/net/rocker/rocker_fp.h | 53 + hw/net/rocker/rocker_hw.h | 491 +++++++++ hw/net/rocker/rocker_of_dpa.c | 2314 +++++++++++++++++++++++++++++++++++++++++ hw/net/rocker/rocker_of_dpa.h | 25 + hw/net/rocker/rocker_tlv.h | 244 +++++ hw/net/rocker/rocker_world.c | 108 ++ hw/net/rocker/rocker_world.h | 63 ++ 14 files changed, 5518 insertions(+) create mode 100644 hw/net/rocker/rocker.c create mode 100644 hw/net/rocker/rocker.h create mode 100644 hw/net/rocker/rocker_desc.c create mode 100644 hw/net/rocker/rocker_desc.h create mode 100644 hw/net/rocker/rocker_fp.c create mode 100644 hw/net/rocker/rocker_fp.h create mode 100644 hw/net/rocker/rocker_hw.h create mode 100644 hw/net/rocker/rocker_of_dpa.c create mode 100644 hw/net/rocker/rocker_of_dpa.h create mode 100644 hw/net/rocker/rocker_tlv.h create mode 100644 hw/net/rocker/rocker_world.c create mode 100644 hw/net/rocker/rocker_world.h diff --git a/default-configs/pci.mak b/default-configs/pci.mak index a186c39..a7f3278 100644 --- a/default-configs/pci.mak +++ b/default-configs/pci.mak @@ -32,3 +32,4 @@ CONFIG_PCI_TESTDEV=y CONFIG_NVME_PCI=y CONFIG_SD=y CONFIG_SDHCI=y +CONFIG_ROCKER=y diff --git a/hw/net/Makefile.objs b/hw/net/Makefile.objs index ea93293..4f8e826 100644 --- a/hw/net/Makefile.objs +++ b/hw/net/Makefile.objs @@ -35,3 +35,6 @@ obj-y += vhost_net.o obj-$(CONFIG_ETSEC) += fsl_etsec/etsec.o fsl_etsec/registers.o \ fsl_etsec/rings.o fsl_etsec/miim.o + +common-obj-y += rocker/rocker.o rocker/rocker_fp.o rocker/rocker_desc.o \ + rocker/rocker_world.o rocker/rocker_of_dpa.o diff --git a/hw/net/rocker/rocker.c b/hw/net/rocker/rocker.c new file mode 100644 index 0000000..d8a1a8e --- /dev/null +++ b/hw/net/rocker/rocker.c @@ -0,0 +1,1472 @@ +/* + * QEMU rocker switch emulation - PCI device + * + * Copyright (c) 2014 Scott Feldman + * Copyright (c) 2014 Jiri Pirko + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "hw/hw.h" +#include "hw/pci/pci.h" +#include "hw/pci/msix.h" +#include "net/net.h" +#include "net/eth.h" +#include "qemu/iov.h" +#include "qemu/bitops.h" +#include "qmp-commands.h" + +#include "rocker.h" +#include "rocker_hw.h" +#include "rocker_fp.h" +#include "rocker_desc.h" +#include "rocker_tlv.h" +#include "rocker_world.h" +#include "rocker_of_dpa.h" + +typedef struct rocker { + /* private */ + PCIDevice parent_obj; + /* public */ + + MemoryRegion mmio; + MemoryRegion msix_bar; + + /* switch configuration */ + char *name; /* switch name */ + uint32_t fp_ports; /* front-panel port count */ + NICPeers *fp_ports_peers; + MACAddr fp_start_macaddr; /* front-panel port 0 mac addr */ + uint64_t switch_id; /* switch id */ + + /* front-panel ports */ + FpPort *fp_port[ROCKER_FP_PORTS_MAX]; + + /* register backings */ + uint32_t test_reg; + uint64_t test_reg64; + dma_addr_t test_dma_addr; + uint32_t test_dma_size; + uint64_t lower32; /* lower 32-bit val in 2-part 64-bit access */ + + /* desc rings */ + DescRing **rings; + + /* switch worlds */ + World *worlds[ROCKER_WORLD_TYPE_MAX]; + World *world_dflt; + + QLIST_ENTRY(rocker) next; +} Rocker; + +#define ROCKER "rocker" + +#define to_rocker(obj) \ + OBJECT_CHECK(Rocker, (obj), ROCKER) + +static QLIST_HEAD(, rocker) rockers; + +Rocker *rocker_find(const char *name) +{ + Rocker *r; + + QLIST_FOREACH(r, &rockers, next) + if (strcmp(r->name, name) == 0) { + return r; + } + + return NULL; +} + +World *rocker_get_world(Rocker *r, enum rocker_world_type type) +{ + if (type < ROCKER_WORLD_TYPE_MAX) { + return r->worlds[type]; + } + return NULL; +} + +uint32_t rocker_fp_ports(Rocker *r) +{ + return r->fp_ports; +} + +static uint32_t rocker_get_pport_by_tx_ring(Rocker *r, + DescRing *ring) +{ + return (desc_ring_index(ring) - 2) / 2 + 1; +} + +static int tx_consume(Rocker *r, DescInfo *info) +{ + PCIDevice *dev = PCI_DEVICE(r); + char *buf = desc_get_buf(info, true); + RockerTlv *tlv_frag; + RockerTlv *tlvs[ROCKER_TLV_TX_MAX + 1]; + struct iovec iov[ROCKER_TX_FRAGS_MAX] = { { 0, }, }; + uint32_t pport; + uint32_t port; + uint16_t tx_offload = ROCKER_TX_OFFLOAD_NONE; + uint16_t tx_l3_csum_off = 0; + uint16_t tx_tso_mss = 0; + uint16_t tx_tso_hdr_len = 0; + int iovcnt = 0; + int err = ROCKER_OK; + int rem; + int i; + + if (!buf) { + return -ROCKER_ENXIO; + } + + rocker_tlv_parse(tlvs, ROCKER_TLV_TX_MAX, buf, desc_tlv_size(info)); + + if (!tlvs[ROCKER_TLV_TX_FRAGS]) { + return -ROCKER_EINVAL; + } + + pport = rocker_get_pport_by_tx_ring(r, desc_get_ring(info)); + if (!fp_port_from_pport(pport, &port)) { + return -ROCKER_EINVAL; + } + + if (tlvs[ROCKER_TLV_TX_OFFLOAD]) { + tx_offload = rocker_tlv_get_u8(tlvs[ROCKER_TLV_TX_OFFLOAD]); + } + + switch (tx_offload) { + case ROCKER_TX_OFFLOAD_L3_CSUM: + if (!tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) { + return -ROCKER_EINVAL; + } + case ROCKER_TX_OFFLOAD_TSO: + if (!tlvs[ROCKER_TLV_TX_TSO_MSS] || + !tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) { + return -ROCKER_EINVAL; + } + } + + if (tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) { + tx_l3_csum_off = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]); + } + + if (tlvs[ROCKER_TLV_TX_TSO_MSS]) { + tx_tso_mss = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_MSS]); + } + + if (tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) { + tx_tso_hdr_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]); + } + + rocker_tlv_for_each_nested(tlv_frag, tlvs[ROCKER_TLV_TX_FRAGS], rem) { + hwaddr frag_addr; + uint16_t frag_len; + + if (rocker_tlv_type(tlv_frag) != ROCKER_TLV_TX_FRAG) { + return -ROCKER_EINVAL; + } + + rocker_tlv_parse_nested(tlvs, ROCKER_TLV_TX_FRAG_ATTR_MAX, tlv_frag); + + if (!tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR] || + !tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]) { + return -ROCKER_EINVAL; + } + + frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR]); + frag_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]); + + iov[iovcnt].iov_len = frag_len; + iov[iovcnt].iov_base = g_malloc(frag_len); + if (!iov[iovcnt].iov_base) { + err = -ROCKER_ENOMEM; + goto err_no_mem; + } + + if (pci_dma_read(dev, frag_addr, iov[iovcnt].iov_base, + iov[iovcnt].iov_len)) { + err = -ROCKER_ENXIO; + goto err_bad_io; + } + + if (++iovcnt > ROCKER_TX_FRAGS_MAX) { + goto err_too_many_frags; + } + } + + if (iovcnt) { + /* XXX perform Tx offloads */ + /* XXX silence compiler for now */ + tx_l3_csum_off += tx_tso_mss = tx_tso_hdr_len = 0; + } + + err = fp_port_eg(r->fp_port[port], iov, iovcnt); + +err_no_mem: +err_bad_io: +err_too_many_frags: + for (i = 0; i < iovcnt; i++) { + if (iov[i].iov_base) { + g_free(iov[i].iov_base); + } + } + + return err; +} + +static int cmd_get_port_settings(Rocker *r, + DescInfo *info, char *buf, + RockerTlv *cmd_info_tlv) +{ + RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1]; + RockerTlv *nest; + FpPort *fp_port; + uint32_t pport; + uint32_t port; + uint32_t speed; + uint8_t duplex; + uint8_t autoneg; + uint8_t learning; + MACAddr macaddr; + enum rocker_world_type mode; + size_t tlv_size; + int pos; + int err; + + rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX, + cmd_info_tlv); + + if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) { + return -ROCKER_EINVAL; + } + + pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]); + if (!fp_port_from_pport(pport, &port)) { + return -ROCKER_EINVAL; + } + fp_port = r->fp_port[port]; + + err = fp_port_get_settings(fp_port, &speed, &duplex, &autoneg); + if (err) { + return err; + } + + fp_port_get_macaddr(fp_port, &macaddr); + mode = world_type(fp_port_get_world(fp_port)); + learning = fp_port_get_learning(fp_port); + + tlv_size = rocker_tlv_total_size(0) + /* nest */ + rocker_tlv_total_size(sizeof(uint32_t)) + /* pport */ + rocker_tlv_total_size(sizeof(uint32_t)) + /* speed */ + rocker_tlv_total_size(sizeof(uint8_t)) + /* duplex */ + rocker_tlv_total_size(sizeof(uint8_t)) + /* autoneg */ + rocker_tlv_total_size(sizeof(macaddr.a)) + /* macaddr */ + rocker_tlv_total_size(sizeof(uint8_t)) + /* mode */ + rocker_tlv_total_size(sizeof(uint8_t)); /* learning */ + + if (tlv_size > desc_buf_size(info)) { + return -ROCKER_EMSGSIZE; + } + + pos = 0; + nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_CMD_INFO); + rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, pport); + rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, speed); + rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, duplex); + rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, autoneg); + rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR, + sizeof(macaddr.a), macaddr.a); + rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MODE, mode); + rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING, + learning); + rocker_tlv_nest_end(buf, &pos, nest); + + return desc_set_buf(info, tlv_size); +} + +static int cmd_set_port_settings(Rocker *r, + RockerTlv *cmd_info_tlv) +{ + RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1]; + FpPort *fp_port; + uint32_t pport; + uint32_t port; + uint32_t speed; + uint8_t duplex; + uint8_t autoneg; + uint8_t learning; + MACAddr macaddr; + enum rocker_world_type mode; + int err; + + rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX, + cmd_info_tlv); + + if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) { + return -ROCKER_EINVAL; + } + + pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]); + if (!fp_port_from_pport(pport, &port)) { + return -ROCKER_EINVAL; + } + fp_port = r->fp_port[port]; + + if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED] && + tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX] && + tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]) { + + speed = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED]); + duplex = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX]); + autoneg = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]); + + err = fp_port_set_settings(fp_port, speed, duplex, autoneg); + if (err) { + return err; + } + } + + if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) { + if (rocker_tlv_len(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) != + sizeof(macaddr.a)) { + return -ROCKER_EINVAL; + } + memcpy(macaddr.a, + rocker_tlv_data(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]), + sizeof(macaddr.a)); + fp_port_set_macaddr(fp_port, &macaddr); + } + + if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]) { + mode = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]); + fp_port_set_world(fp_port, r->worlds[mode]); + } + + if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]) { + learning = + rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]); + fp_port_set_learning(fp_port, learning); + } + + return ROCKER_OK; +} + +static int cmd_consume(Rocker *r, DescInfo *info) +{ + char *buf = desc_get_buf(info, false); + RockerTlv *tlvs[ROCKER_TLV_CMD_MAX + 1]; + RockerTlv *info_tlv; + World *world; + uint16_t cmd; + int err; + + if (!buf) { + return -ROCKER_ENXIO; + } + + rocker_tlv_parse(tlvs, ROCKER_TLV_CMD_MAX, buf, desc_tlv_size(info)); + + if (!tlvs[ROCKER_TLV_CMD_TYPE] || !tlvs[ROCKER_TLV_CMD_INFO]) { + return -ROCKER_EINVAL; + } + + cmd = rocker_tlv_get_le16(tlvs[ROCKER_TLV_CMD_TYPE]); + info_tlv = tlvs[ROCKER_TLV_CMD_INFO]; + + /* This might be reworked to something like this: + * Every world will have an array of command handlers from + * ROCKER_TLV_CMD_TYPE_UNSPEC to ROCKER_TLV_CMD_TYPE_MAX. There is + * up to each world to implement whatever command it want. + * It can reference "generic" commands as cmd_set_port_settings or + * cmd_get_port_settings + */ + + switch (cmd) { + case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD: + case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD: + case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL: + case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS: + case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD: + case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD: + case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL: + case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS: + world = r->worlds[ROCKER_WORLD_TYPE_OF_DPA]; + err = world_do_cmd(world, info, buf, cmd, info_tlv); + break; + case ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS: + err = cmd_get_port_settings(r, info, buf, info_tlv); + break; + case ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS: + err = cmd_set_port_settings(r, info_tlv); + break; + default: + err = -ROCKER_EINVAL; + break; + } + + return err; +} + +static void rocker_msix_irq(Rocker *r, unsigned vector) +{ + PCIDevice *dev = PCI_DEVICE(r); + + DPRINTF("MSI-X notify request for vector %d\n", vector); + if (vector >= ROCKER_MSIX_VEC_COUNT(r->fp_ports)) { + DPRINTF("incorrect vector %d\n", vector); + return; + } + msix_notify(dev, vector); +} + +int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up) +{ + DescRing *ring = r->rings[ROCKER_RING_EVENT]; + DescInfo *info = desc_ring_fetch_desc(ring); + RockerTlv *nest; + char *buf; + size_t tlv_size; + int pos; + int err; + + if (!info) { + return -ROCKER_ENOBUFS; + } + + tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* event type */ + rocker_tlv_total_size(0) + /* nest */ + rocker_tlv_total_size(sizeof(uint32_t)) + /* pport */ + rocker_tlv_total_size(sizeof(uint8_t)); /* link up */ + + if (tlv_size > desc_buf_size(info)) { + err = -ROCKER_EMSGSIZE; + goto err_too_big; + } + + buf = desc_get_buf(info, false); + if (!buf) { + err = -ROCKER_ENOMEM; + goto err_no_mem; + } + + pos = 0; + rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE, + ROCKER_TLV_EVENT_TYPE_LINK_CHANGED); + nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO); + rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_PPORT, pport); + rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP, + link_up ? 1 : 0); + rocker_tlv_nest_end(buf, &pos, nest); + + err = desc_set_buf(info, tlv_size); + +err_too_big: +err_no_mem: + if (desc_ring_post_desc(ring, err)) { + rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT); + } + + return err; +} + +int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr, + uint16_t vlan_id) +{ + DescRing *ring = r->rings[ROCKER_RING_EVENT]; + DescInfo *info; + FpPort *fp_port; + uint32_t port; + RockerTlv *nest; + char *buf; + size_t tlv_size; + int pos; + int err; + + if (!fp_port_from_pport(pport, &port)) { + return -ROCKER_EINVAL; + } + fp_port = r->fp_port[port]; + if (!fp_port_get_learning(fp_port)) { + return ROCKER_OK; + } + + info = desc_ring_fetch_desc(ring); + if (!info) { + return -ROCKER_ENOBUFS; + } + + tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* event type */ + rocker_tlv_total_size(0) + /* nest */ + rocker_tlv_total_size(sizeof(uint32_t)) + /* pport */ + rocker_tlv_total_size(ETH_ALEN) + /* mac addr */ + rocker_tlv_total_size(sizeof(uint16_t)); /* vlan_id */ + + if (tlv_size > desc_buf_size(info)) { + err = -ROCKER_EMSGSIZE; + goto err_too_big; + } + + buf = desc_get_buf(info, false); + if (!buf) { + err = -ROCKER_ENOMEM; + goto err_no_mem; + } + + pos = 0; + rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE, + ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN); + nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO); + rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_PPORT, pport); + rocker_tlv_put(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_MAC, ETH_ALEN, addr); + rocker_tlv_put_u16(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID, vlan_id); + rocker_tlv_nest_end(buf, &pos, nest); + + err = desc_set_buf(info, tlv_size); + +err_too_big: +err_no_mem: + if (desc_ring_post_desc(ring, err)) { + rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT); + } + + return err; +} + +static DescRing *rocker_get_rx_ring_by_pport(Rocker *r, + uint32_t pport) +{ + return r->rings[(pport - 1) * 2 + 3]; +} + +int rx_produce(World *world, uint32_t pport, + const struct iovec *iov, int iovcnt) +{ + Rocker *r = world_rocker(world); + PCIDevice *dev = (PCIDevice *)r; + DescRing *ring = rocker_get_rx_ring_by_pport(r, pport); + DescInfo *info = desc_ring_fetch_desc(ring); + char *data; + size_t data_size = iov_size(iov, iovcnt); + char *buf; + uint16_t rx_flags = 0; + uint16_t rx_csum = 0; + size_t tlv_size; + RockerTlv *tlvs[ROCKER_TLV_RX_MAX + 1]; + hwaddr frag_addr; + uint16_t frag_max_len; + int pos; + int err; + + if (!info) { + return -ROCKER_ENOBUFS; + } + + buf = desc_get_buf(info, false); + if (!buf) { + err = -ROCKER_ENXIO; + goto out; + } + rocker_tlv_parse(tlvs, ROCKER_TLV_RX_MAX, buf, desc_tlv_size(info)); + + if (!tlvs[ROCKER_TLV_RX_FRAG_ADDR] || + !tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]) { + err = -ROCKER_EINVAL; + goto out; + } + + frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_RX_FRAG_ADDR]); + frag_max_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]); + + if (data_size > frag_max_len) { + err = -ROCKER_EMSGSIZE; + goto out; + } + + /* XXX calc rx flags/csum */ + + tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* flags */ + rocker_tlv_total_size(sizeof(uint16_t)) + /* scum */ + rocker_tlv_total_size(sizeof(uint64_t)) + /* frag addr */ + rocker_tlv_total_size(sizeof(uint16_t)) + /* frag max len */ + rocker_tlv_total_size(sizeof(uint16_t)); /* frag len */ + + if (tlv_size > desc_buf_size(info)) { + err = -ROCKER_EMSGSIZE; + goto out; + } + + /* TODO: + * iov dma write can be optimized in similar way e1000 does it in + * e1000_receive_iov. But maybe if would make sense to introduce + * generic helper iov_dma_write. + */ + + data = g_malloc(data_size); + if (!data) { + err = -ROCKER_ENOMEM; + goto out; + } + iov_to_buf(iov, iovcnt, 0, data, data_size); + pci_dma_write(dev, frag_addr, data, data_size); + g_free(data); + + pos = 0; + rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FLAGS, rx_flags); + rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_CSUM, rx_csum); + rocker_tlv_put_le64(buf, &pos, ROCKER_TLV_RX_FRAG_ADDR, frag_addr); + rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_MAX_LEN, frag_max_len); + rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_LEN, data_size); + + err = desc_set_buf(info, tlv_size); + +out: + if (desc_ring_post_desc(ring, err)) { + rocker_msix_irq(r, ROCKER_MSIX_VEC_RX(pport - 1)); + } + + return err; +} + +int rocker_port_eg(Rocker *r, uint32_t pport, + const struct iovec *iov, int iovcnt) +{ + FpPort *fp_port; + uint32_t port; + + if (!fp_port_from_pport(pport, &port)) { + return -ROCKER_EINVAL; + } + + fp_port = r->fp_port[port]; + + return fp_port_eg(fp_port, iov, iovcnt); +} + +static void rocker_test_dma_ctrl(Rocker *r, uint32_t val) +{ + PCIDevice *dev = PCI_DEVICE(r); + char *buf; + int i; + + buf = malloc(r->test_dma_size); + + if (!buf) { + DPRINTF("test dma buffer alloc failed"); + return; + } + + switch (val) { + case ROCKER_TEST_DMA_CTRL_CLEAR: + memset(buf, 0, r->test_dma_size); + break; + case ROCKER_TEST_DMA_CTRL_FILL: + memset(buf, 0x96, r->test_dma_size); + break; + case ROCKER_TEST_DMA_CTRL_INVERT: + pci_dma_read(dev, r->test_dma_addr, buf, r->test_dma_size); + for (i = 0; i < r->test_dma_size; i++) { + buf[i] = ~buf[i]; + } + break; + default: + DPRINTF("not test dma control val=0x%08x\n", val); + return; + } + pci_dma_write(dev, r->test_dma_addr, buf, r->test_dma_size); + + rocker_msix_irq(r, ROCKER_MSIX_VEC_TEST); +} + +static void rocker_reset(DeviceState *dev); + +static void rocker_control(Rocker *r, uint32_t val) +{ + if (val & ROCKER_CONTROL_RESET) { + rocker_reset(DEVICE(r)); + } +} + +static int rocker_pci_ring_count(Rocker *r) +{ + /* There are: + * - command ring + * - event ring + * - tx and rx ring per each port + */ + return 2 + (2 * r->fp_ports); +} + +static bool rocker_addr_is_desc_reg(Rocker *r, hwaddr addr) +{ + hwaddr start = ROCKER_DMA_DESC_BASE; + hwaddr end = start + (ROCKER_DMA_DESC_SIZE * rocker_pci_ring_count(r)); + + return addr >= start && addr < end; +} + +static void rocker_port_phys_enable_write(Rocker *r, uint64_t new) +{ + int i; + bool old_enabled; + bool new_enabled; + FpPort *fp_port; + + for (i = 0; i < r->fp_ports; i++) { + fp_port = r->fp_port[i]; + old_enabled = fp_port_enabled(fp_port); + new_enabled = (new >> (i + 1)) & 0x1; + if (new_enabled == old_enabled) { + continue; + } + if (new_enabled) { + fp_port_enable(r->fp_port[i]); + } else { + fp_port_disable(r->fp_port[i]); + } + } +} + +static void rocker_io_writel(void *opaque, hwaddr addr, uint32_t val) +{ + Rocker *r = opaque; + + if (rocker_addr_is_desc_reg(r, addr)) { + unsigned index = ROCKER_RING_INDEX(addr); + unsigned offset = addr & ROCKER_DMA_DESC_MASK; + + switch (offset) { + case ROCKER_DMA_DESC_ADDR_OFFSET: + r->lower32 = (uint64_t)val; + break; + case ROCKER_DMA_DESC_ADDR_OFFSET + 4: + desc_ring_set_base_addr(r->rings[index], + ((uint64_t)val) << 32 | r->lower32); + r->lower32 = 0; + break; + case ROCKER_DMA_DESC_SIZE_OFFSET: + desc_ring_set_size(r->rings[index], val); + break; + case ROCKER_DMA_DESC_HEAD_OFFSET: + if (desc_ring_set_head(r->rings[index], val)) { + rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index])); + } + break; + case ROCKER_DMA_DESC_CTRL_OFFSET: + desc_ring_set_ctrl(r->rings[index], val); + break; + case ROCKER_DMA_DESC_CREDITS_OFFSET: + if (desc_ring_ret_credits(r->rings[index], val)) { + rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index])); + } + break; + default: + DPRINTF("not implemented dma reg write(l) addr=0x%lx " + "val=0x%08x (ring %d, addr=0x%02x)\n", + addr, val, index, offset); + break; + } + return; + } + + switch (addr) { + case ROCKER_TEST_REG: + r->test_reg = val; + break; + case ROCKER_TEST_REG64: + case ROCKER_TEST_DMA_ADDR: + case ROCKER_PORT_PHYS_ENABLE: + r->lower32 = (uint64_t)val; + break; + case ROCKER_TEST_REG64 + 4: + r->test_reg64 = ((uint64_t)val) << 32 | r->lower32; + r->lower32 = 0; + break; + case ROCKER_TEST_IRQ: + rocker_msix_irq(r, val); + break; + case ROCKER_TEST_DMA_SIZE: + r->test_dma_size = val; + break; + case ROCKER_TEST_DMA_ADDR + 4: + r->test_dma_addr = ((uint64_t)val) << 32 | r->lower32; + r->lower32 = 0; + break; + case ROCKER_TEST_DMA_CTRL: + rocker_test_dma_ctrl(r, val); + break; + case ROCKER_CONTROL: + rocker_control(r, val); + break; + case ROCKER_PORT_PHYS_ENABLE + 4: + rocker_port_phys_enable_write(r, ((uint64_t)val) << 32 | r->lower32); + r->lower32 = 0; + break; + default: + DPRINTF("not implemented write(l) addr=0x%lx val=0x%08x\n", addr, val); + break; + } +} + +static void rocker_io_writeq(void *opaque, hwaddr addr, uint64_t val) +{ + Rocker *r = opaque; + + if (rocker_addr_is_desc_reg(r, addr)) { + unsigned index = ROCKER_RING_INDEX(addr); + unsigned offset = addr & ROCKER_DMA_DESC_MASK; + + switch (offset) { + case ROCKER_DMA_DESC_ADDR_OFFSET: + desc_ring_set_base_addr(r->rings[index], val); + break; + default: + DPRINTF("not implemented dma reg write(q) addr=0x%lx " + "val=0x%016lx (ring %d, offset=0x%02x)\n", + addr, val, index, offset); + break; + } + return; + } + + switch (addr) { + case ROCKER_TEST_REG64: + r->test_reg64 = val; + break; + case ROCKER_TEST_DMA_ADDR: + r->test_dma_addr = val; + break; + case ROCKER_PORT_PHYS_ENABLE: + rocker_port_phys_enable_write(r, val); + break; + default: + DPRINTF("not implemented write(q) addr=0x%lx val=0x%016lx\n", + addr, val); + break; + } +} + +#ifdef DEBUG_ROCKER +#define regname(reg) case (reg): return #reg +static const char *rocker_reg_name(void *opaque, hwaddr addr) +{ + Rocker *r = opaque; + + if (rocker_addr_is_desc_reg(r, addr)) { + unsigned index = ROCKER_RING_INDEX(addr); + unsigned offset = addr & ROCKER_DMA_DESC_MASK; + static char buf[100]; + char ring_name[10]; + + switch (index) { + case 0: + sprintf(ring_name, "cmd"); + break; + case 1: + sprintf(ring_name, "event"); + break; + default: + sprintf(ring_name, "%s-%d", index % 2 ? "rx" : "tx", + (index - 2) / 2); + } + + switch (offset) { + case ROCKER_DMA_DESC_ADDR_OFFSET: + sprintf(buf, "Ring[%s] ADDR", ring_name); + return buf; + case ROCKER_DMA_DESC_ADDR_OFFSET+4: + sprintf(buf, "Ring[%s] ADDR+4", ring_name); + return buf; + case ROCKER_DMA_DESC_SIZE_OFFSET: + sprintf(buf, "Ring[%s] SIZE", ring_name); + return buf; + case ROCKER_DMA_DESC_HEAD_OFFSET: + sprintf(buf, "Ring[%s] HEAD", ring_name); + return buf; + case ROCKER_DMA_DESC_TAIL_OFFSET: + sprintf(buf, "Ring[%s] TAIL", ring_name); + return buf; + case ROCKER_DMA_DESC_CTRL_OFFSET: + sprintf(buf, "Ring[%s] CTRL", ring_name); + return buf; + case ROCKER_DMA_DESC_CREDITS_OFFSET: + sprintf(buf, "Ring[%s] CREDITS", ring_name); + return buf; + default: + sprintf(buf, "Ring[%s] ???", ring_name); + return buf; + } + } else { + switch (addr) { + regname(ROCKER_BOGUS_REG0); + regname(ROCKER_BOGUS_REG1); + regname(ROCKER_BOGUS_REG2); + regname(ROCKER_BOGUS_REG3); + regname(ROCKER_TEST_REG); + regname(ROCKER_TEST_REG64); + regname(ROCKER_TEST_REG64+4); + regname(ROCKER_TEST_IRQ); + regname(ROCKER_TEST_DMA_ADDR); + regname(ROCKER_TEST_DMA_ADDR+4); + regname(ROCKER_TEST_DMA_SIZE); + regname(ROCKER_TEST_DMA_CTRL); + regname(ROCKER_CONTROL); + regname(ROCKER_PORT_PHYS_COUNT); + regname(ROCKER_PORT_PHYS_LINK_STATUS); + regname(ROCKER_PORT_PHYS_LINK_STATUS+4); + regname(ROCKER_PORT_PHYS_ENABLE); + regname(ROCKER_PORT_PHYS_ENABLE+4); + regname(ROCKER_SWITCH_ID); + regname(ROCKER_SWITCH_ID+4); + } + } + return "???"; +} +#else +static const char *rocker_reg_name(void *opaque, hwaddr addr) +{ + return NULL; +} +#endif + +static void rocker_mmio_write(void *opaque, hwaddr addr, uint64_t val, + unsigned size) +{ + DPRINTF("Write %s addr %lx, size %u, val %lx\n", + rocker_reg_name(opaque, addr), addr, size, val); + + switch (size) { + case 4: + rocker_io_writel(opaque, addr, val); + break; + case 8: + rocker_io_writeq(opaque, addr, val); + break; + } +} + +static uint64_t rocker_port_phys_link_status(Rocker *r) +{ + int i; + uint64_t status = 0; + + for (i = 0; i < r->fp_ports; i++) { + FpPort *port = r->fp_port[i]; + + if (fp_port_get_link_up(port)) { + status |= 1 << (i + 1); + } + } + return status; +} + +static uint64_t rocker_port_phys_enable_read(Rocker *r) +{ + int i; + uint64_t ret = 0; + + for (i = 0; i < r->fp_ports; i++) { + FpPort *port = r->fp_port[i]; + + if (fp_port_enabled(port)) { + ret |= 1 << (i + 1); + } + } + return ret; +} + +static uint32_t rocker_io_readl(void *opaque, hwaddr addr) +{ + Rocker *r = opaque; + uint32_t ret; + + if (rocker_addr_is_desc_reg(r, addr)) { + unsigned index = ROCKER_RING_INDEX(addr); + unsigned offset = addr & ROCKER_DMA_DESC_MASK; + + switch (offset) { + case ROCKER_DMA_DESC_ADDR_OFFSET: + ret = (uint32_t)desc_ring_get_base_addr(r->rings[index]); + break; + case ROCKER_DMA_DESC_ADDR_OFFSET + 4: + ret = (uint32_t)(desc_ring_get_base_addr(r->rings[index]) >> 32); + break; + case ROCKER_DMA_DESC_SIZE_OFFSET: + ret = desc_ring_get_size(r->rings[index]); + break; + case ROCKER_DMA_DESC_HEAD_OFFSET: + ret = desc_ring_get_head(r->rings[index]); + break; + case ROCKER_DMA_DESC_TAIL_OFFSET: + ret = desc_ring_get_tail(r->rings[index]); + break; + case ROCKER_DMA_DESC_CREDITS_OFFSET: + ret = desc_ring_get_credits(r->rings[index]); + break; + default: + DPRINTF("not implemented dma reg read(l) addr=0x%lx " + "(ring %d, addr=0x%02x)\n", addr, index, offset); + ret = 0; + break; + } + return ret; + } + + switch (addr) { + case ROCKER_BOGUS_REG0: + case ROCKER_BOGUS_REG1: + case ROCKER_BOGUS_REG2: + case ROCKER_BOGUS_REG3: + ret = 0xDEADBABE; + break; + case ROCKER_TEST_REG: + ret = r->test_reg * 2; + break; + case ROCKER_TEST_REG64: + ret = (uint32_t)(r->test_reg64 * 2); + break; + case ROCKER_TEST_REG64 + 4: + ret = (uint32_t)((r->test_reg64 * 2) >> 32); + break; + case ROCKER_TEST_DMA_SIZE: + ret = r->test_dma_size; + break; + case ROCKER_TEST_DMA_ADDR: + ret = (uint32_t)r->test_dma_addr; + break; + case ROCKER_TEST_DMA_ADDR + 4: + ret = (uint32_t)(r->test_dma_addr >> 32); + break; + case ROCKER_PORT_PHYS_COUNT: + ret = r->fp_ports; + break; + case ROCKER_PORT_PHYS_LINK_STATUS: + ret = (uint32_t)rocker_port_phys_link_status(r); + break; + case ROCKER_PORT_PHYS_LINK_STATUS + 4: + ret = (uint32_t)(rocker_port_phys_link_status(r) >> 32); + break; + case ROCKER_PORT_PHYS_ENABLE: + ret = (uint32_t)rocker_port_phys_enable_read(r); + break; + case ROCKER_PORT_PHYS_ENABLE + 4: + ret = (uint32_t)(rocker_port_phys_enable_read(r) >> 32); + break; + case ROCKER_SWITCH_ID: + ret = (uint32_t)r->switch_id; + break; + case ROCKER_SWITCH_ID + 4: + ret = (uint32_t)(r->switch_id >> 32); + break; + default: + DPRINTF("not implemented read(l) addr=0x%lx\n", addr); + ret = 0; + break; + } + return ret; +} + +static uint64_t rocker_io_readq(void *opaque, hwaddr addr) +{ + Rocker *r = opaque; + uint64_t ret; + + if (rocker_addr_is_desc_reg(r, addr)) { + unsigned index = ROCKER_RING_INDEX(addr); + unsigned offset = addr & ROCKER_DMA_DESC_MASK; + + switch (addr & ROCKER_DMA_DESC_MASK) { + case ROCKER_DMA_DESC_ADDR_OFFSET: + ret = desc_ring_get_base_addr(r->rings[index]); + break; + default: + DPRINTF("not implemented dma reg read(q) addr=0x%lx " + "(ring %d, addr=0x%02x)\n", addr, index, offset); + ret = 0; + break; + } + return ret; + } + + switch (addr) { + case ROCKER_BOGUS_REG0: + case ROCKER_BOGUS_REG2: + ret = 0xDEADBABEDEADBABE; + break; + case ROCKER_TEST_REG64: + ret = r->test_reg64 * 2; + break; + case ROCKER_TEST_DMA_ADDR: + ret = r->test_dma_addr; + break; + case ROCKER_PORT_PHYS_LINK_STATUS: + ret = rocker_port_phys_link_status(r); + break; + case ROCKER_PORT_PHYS_ENABLE: + ret = rocker_port_phys_enable_read(r); + break; + case ROCKER_SWITCH_ID: + ret = r->switch_id; + break; + default: + DPRINTF("not implemented read(q) addr=0x%lx\n", addr); + ret = 0; + break; + } + return ret; +} + +static uint64_t rocker_mmio_read(void *opaque, hwaddr addr, unsigned size) +{ + DPRINTF("Read %s addr %lx, size %u\n", + rocker_reg_name(opaque, addr), addr, size); + + switch (size) { + case 4: + return rocker_io_readl(opaque, addr); + case 8: + return rocker_io_readq(opaque, addr); + } + + return -1; +} + +static const MemoryRegionOps rocker_mmio_ops = { + .read = rocker_mmio_read, + .write = rocker_mmio_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 8, + }, + .impl = { + .min_access_size = 4, + .max_access_size = 8, + }, +}; + +static void rocker_msix_vectors_unuse(Rocker *r, + unsigned int num_vectors) +{ + PCIDevice *dev = PCI_DEVICE(r); + int i; + + for (i = 0; i < num_vectors; i++) { + msix_vector_unuse(dev, i); + } +} + +static int rocker_msix_vectors_use(Rocker *r, + unsigned int num_vectors) +{ + PCIDevice *dev = PCI_DEVICE(r); + int err; + int i; + + for (i = 0; i < num_vectors; i++) { + err = msix_vector_use(dev, i); + if (err) { + goto rollback; + } + } + return 0; + +rollback: + rocker_msix_vectors_unuse(r, i); + return err; +} + +static int rocker_msix_init(Rocker *r) +{ + PCIDevice *dev = PCI_DEVICE(r); + int err; + + err = msix_init(dev, ROCKER_MSIX_VEC_COUNT(r->fp_ports), + &r->msix_bar, + ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_TABLE_OFFSET, + &r->msix_bar, + ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_PBA_OFFSET, + 0); + if (err) { + return err; + } + + err = rocker_msix_vectors_use(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports)); + if (err) { + goto err_msix_vectors_use; + } + + return 0; + +err_msix_vectors_use: + msix_uninit(dev, &r->msix_bar, &r->msix_bar); + return err; +} + +static void rocker_msix_uninit(Rocker *r) +{ + PCIDevice *dev = PCI_DEVICE(r); + + msix_uninit(dev, &r->msix_bar, &r->msix_bar); + rocker_msix_vectors_unuse(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports)); +} + +static int pci_rocker_init(PCIDevice *dev) +{ + Rocker *r = to_rocker(dev); + const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } }; + const MACAddr dflt = { .a = { 0x52, 0x54, 0x00, 0x12, 0x35, 0x01 } }; + static int sw_index; + int i, err = 0; + + /* allocate worlds */ + + r->worlds[ROCKER_WORLD_TYPE_OF_DPA] = of_dpa_world_alloc(r); + r->world_dflt = r->worlds[ROCKER_WORLD_TYPE_OF_DPA]; + + for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) { + if (!r->worlds[i]) { + goto err_world_alloc; + } + } + + /* set up memory-mapped region at BAR0 */ + + memory_region_init_io(&r->mmio, OBJECT(r), &rocker_mmio_ops, r, + "rocker-mmio", ROCKER_PCI_BAR0_SIZE); + pci_register_bar(dev, ROCKER_PCI_BAR0_IDX, + PCI_BASE_ADDRESS_SPACE_MEMORY, &r->mmio); + + /* set up memory-mapped region for MSI-X */ + + memory_region_init(&r->msix_bar, OBJECT(r), "rocker-msix-bar", + ROCKER_PCI_MSIX_BAR_SIZE); + pci_register_bar(dev, ROCKER_PCI_MSIX_BAR_IDX, + PCI_BASE_ADDRESS_SPACE_MEMORY, &r->msix_bar); + + /* MSI-X init */ + + err = rocker_msix_init(r); + if (err) { + goto err_msix_init; + } + + /* validate switch properties */ + + if (!r->name) { + r->name = g_strdup(ROCKER); + } + + if (rocker_find(r->name)) { + err = -EEXIST; + goto err_duplicate; + } + + if (memcmp(&r->fp_start_macaddr, &zero, sizeof(zero)) == 0) { + memcpy(&r->fp_start_macaddr, &dflt, sizeof(dflt)); + r->fp_start_macaddr.a[4] += (sw_index++); + } + + if (!r->switch_id) { + memcpy(&r->switch_id, &r->fp_start_macaddr, + sizeof(r->fp_start_macaddr)); + } + + if (r->fp_ports > ROCKER_FP_PORTS_MAX) { + r->fp_ports = ROCKER_FP_PORTS_MAX; + } + + r->rings = g_malloc(sizeof(DescRing *) * rocker_pci_ring_count(r)); + if (!r->rings) { + goto err_rings_alloc; + } + + /* Rings are ordered like this: + * - command ring + * - event ring + * - port0 tx ring + * - port0 rx ring + * - port1 tx ring + * - port1 rx ring + * ..... + */ + + err = -ENOMEM; + for (i = 0; i < rocker_pci_ring_count(r); i++) { + DescRing *ring = desc_ring_alloc(r, i); + + if (!ring) { + goto err_ring_alloc; + } + + if (i == ROCKER_RING_CMD) { + desc_ring_set_consume(ring, cmd_consume, ROCKER_MSIX_VEC_CMD); + } else if (i == ROCKER_RING_EVENT) { + desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_EVENT); + } else if (i % 2 == 0) { + desc_ring_set_consume(ring, tx_consume, + ROCKER_MSIX_VEC_TX((i - 2) / 2)); + } else if (i % 2 == 1) { + desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_RX((i - 3) / 2)); + } + + r->rings[i] = ring; + } + + for (i = 0; i < r->fp_ports; i++) { + FpPort *port = + fp_port_alloc(r, r->name, &r->fp_start_macaddr, + i, &r->fp_ports_peers[i]); + + if (!port) { + goto err_port_alloc; + } + + r->fp_port[i] = port; + fp_port_set_world(port, r->world_dflt); + } + + QLIST_INSERT_HEAD(&rockers, r, next); + + return 0; + +err_port_alloc: + for (--i; i >= 0; i--) { + FpPort *port = r->fp_port[i]; + fp_port_free(port); + } + i = rocker_pci_ring_count(r); +err_ring_alloc: + for (--i; i >= 0; i--) { + desc_ring_free(r->rings[i]); + } + g_free(r->rings); +err_rings_alloc: +err_duplicate: + rocker_msix_uninit(r); +err_msix_init: + object_unparent(OBJECT(&r->msix_bar)); + object_unparent(OBJECT(&r->mmio)); +err_world_alloc: + for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) { + if (r->worlds[i]) { + world_free(r->worlds[i]); + } + } + return err; +} + +static void pci_rocker_uninit(PCIDevice *dev) +{ + Rocker *r = to_rocker(dev); + int i; + + QLIST_REMOVE(r, next); + + for (i = 0; i < r->fp_ports; i++) { + FpPort *port = r->fp_port[i]; + + fp_port_free(port); + r->fp_port[i] = NULL; + } + + for (i = 0; i < rocker_pci_ring_count(r); i++) { + if (r->rings[i]) { + desc_ring_free(r->rings[i]); + } + } + g_free(r->rings); + + rocker_msix_uninit(r); + object_unparent(OBJECT(&r->msix_bar)); + object_unparent(OBJECT(&r->mmio)); + + for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) { + if (r->worlds[i]) { + world_free(r->worlds[i]); + } + } + g_free(r->fp_ports_peers); +} + +static void rocker_reset(DeviceState *dev) +{ + Rocker *r = to_rocker(dev); + int i; + + for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) { + if (r->worlds[i]) { + world_reset(r->worlds[i]); + } + } + for (i = 0; i < r->fp_ports; i++) { + fp_port_reset(r->fp_port[i]); + fp_port_set_world(r->fp_port[i], r->world_dflt); + } + + r->test_reg = 0; + r->test_reg64 = 0; + r->test_dma_addr = 0; + r->test_dma_size = 0; + + for (i = 0; i < rocker_pci_ring_count(r); i++) { + desc_ring_reset(r->rings[i]); + } + + DPRINTF("Reset done\n"); +} + +static Property rocker_properties[] = { + DEFINE_PROP_STRING("name", Rocker, name), + DEFINE_PROP_MACADDR("fp_start_macaddr", Rocker, + fp_start_macaddr), + DEFINE_PROP_UINT64("switch_id", Rocker, + switch_id, 0), + DEFINE_PROP_ARRAY("ports", Rocker, fp_ports, + fp_ports_peers, qdev_prop_netdev, NICPeers), + DEFINE_PROP_END_OF_LIST(), +}; + +static const VMStateDescription rocker_vmsd = { + .name = ROCKER, + .unmigratable = 1, +}; + +static void rocker_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + + k->init = pci_rocker_init; + k->exit = pci_rocker_uninit; + k->vendor_id = PCI_VENDOR_ID_REDHAT; + k->device_id = PCI_DEVICE_ID_REDHAT_ROCKER; + k->revision = ROCKER_PCI_REVISION; + k->class_id = PCI_CLASS_NETWORK_OTHER; + set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); + dc->desc = "Rocker Switch"; + dc->reset = rocker_reset; + dc->props = rocker_properties; + dc->vmsd = &rocker_vmsd; +} + +static const TypeInfo rocker_info = { + .name = ROCKER, + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(Rocker), + .class_init = rocker_class_init, +}; + +static void rocker_register_types(void) +{ + type_register_static(&rocker_info); +} + +type_init(rocker_register_types) diff --git a/hw/net/rocker/rocker.h b/hw/net/rocker/rocker.h new file mode 100644 index 0000000..ef77487 --- /dev/null +++ b/hw/net/rocker/rocker.h @@ -0,0 +1,76 @@ +/* + * QEMU rocker switch emulation + * + * Copyright (c) 2014 Scott Feldman + * Copyright (c) 2014 Jiri Pirko + * Copyright (c) 2014 Neil Horman + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _ROCKER_H_ +#define _ROCKER_H_ + +#include + +#include "rocker_world.h" + +#if defined(DEBUG_ROCKER) +# define DPRINTF(fmt, ...) \ + do { fprintf(stderr, "ROCKER: " fmt, ## __VA_ARGS__); } while (0) +#else +static inline GCC_FMT_ATTR(1, 2) int DPRINTF(const char *fmt, ...) +{ + return 0; +} +#endif + +#define __le16 uint16_t +#define __le32 uint32_t +#define __le64 uint64_t + +#define __be16 uint16_t +#define __be32 uint32_t +#define __be64 uint64_t + +static inline bool ipv4_addr_is_multicast(__be32 addr) +{ + return (addr & htonl(0xf0000000)) == htonl(0xe0000000); +} + +typedef struct ipv6_addr { + union { + uint8_t addr8[16]; + __be16 addr16[8]; + __be32 addr32[4]; + }; +} Ipv6Addr; + +static inline bool ipv6_addr_is_multicast(const Ipv6Addr *addr) +{ + return (addr->addr32[0] & htonl(0xFF000000)) == htonl(0xFF000000); +} + +typedef struct world World; +typedef struct rocker Rocker; + +Rocker *rocker_find(const char *name); +World *rocker_get_world(Rocker *r, enum rocker_world_type type); +uint32_t rocker_fp_ports(Rocker *r); +int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up); +int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr, + uint16_t vlan_id); +int rx_produce(World *world, uint32_t pport, + const struct iovec *iov, int iovcnt); +int rocker_port_eg(Rocker *r, uint32_t pport, + const struct iovec *iov, int iovcnt); + +#endif /* _ROCKER_H_ */ diff --git a/hw/net/rocker/rocker_desc.c b/hw/net/rocker/rocker_desc.c new file mode 100644 index 0000000..83c2b98 --- /dev/null +++ b/hw/net/rocker/rocker_desc.c @@ -0,0 +1,379 @@ +/* + * QEMU rocker switch emulation - Descriptor ring support + * + * Copyright (c) 2014 Scott Feldman + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "net/net.h" +#include "hw/hw.h" +#include "hw/pci/pci.h" + +#include "rocker.h" +#include "rocker_hw.h" +#include "rocker_desc.h" + +typedef struct desc_info DescInfo; + +typedef struct desc_ring { + hwaddr base_addr; + uint32_t size; + uint32_t head; + uint32_t tail; + uint32_t ctrl; + uint32_t credits; + Rocker *r; + DescInfo *info; + int index; + desc_ring_consume *consume; + unsigned msix_vector; +} DescRing; + +typedef struct desc_info { + DescRing *ring; + RockerDesc desc; + char *buf; + size_t buf_size; +} DescInfo; + +uint16_t desc_buf_size(DescInfo *info) +{ + return le16_to_cpu(info->desc.buf_size); +} + +uint16_t desc_tlv_size(DescInfo *info) +{ + return le16_to_cpu(info->desc.tlv_size); +} + +char *desc_get_buf(DescInfo *info, bool read_only) +{ + PCIDevice *dev = PCI_DEVICE(info->ring->r); + size_t size = read_only ? le16_to_cpu(info->desc.tlv_size) : + le16_to_cpu(info->desc.buf_size); + + if (size > info->buf_size) { + info->buf = g_realloc(info->buf, size); + info->buf_size = size; + } + + if (!info->buf) { + return NULL; + } + + if (pci_dma_read(dev, le64_to_cpu(info->desc.buf_addr), info->buf, size)) { + return NULL; + } + + return info->buf; +} + +int desc_set_buf(DescInfo *info, size_t tlv_size) +{ + PCIDevice *dev = PCI_DEVICE(info->ring->r); + + if (tlv_size > info->buf_size) { + DPRINTF("ERROR: trying to write more to desc buf than it " + "can hold buf_size %ld tlv_size %ld\n", + info->buf_size, tlv_size); + return -ROCKER_EMSGSIZE; + } + + info->desc.tlv_size = cpu_to_le16(tlv_size); + pci_dma_write(dev, le64_to_cpu(info->desc.buf_addr), info->buf, tlv_size); + + return ROCKER_OK; +} + +DescRing *desc_get_ring(DescInfo *info) +{ + return info->ring; +} + +int desc_ring_index(DescRing *ring) +{ + return ring->index; +} + +static bool desc_ring_empty(DescRing *ring) +{ + return ring->head == ring->tail; +} + +bool desc_ring_set_base_addr(DescRing *ring, uint64_t base_addr) +{ + if (base_addr & 0x7) { + DPRINTF("ERROR: ring[%d] desc base addr (0x%lx) not 8-byte aligned\n", + ring->index, base_addr); + return false; + } + + ring->base_addr = base_addr; + + return true; +} + +uint64_t desc_ring_get_base_addr(DescRing *ring) +{ + return ring->base_addr; +} + +bool desc_ring_set_size(DescRing *ring, uint32_t size) +{ + int i; + + if (size < 2 || size > 0x10000 || (size & (size - 1))) { + DPRINTF("ERROR: ring[%d] size (%d) not a power of 2 " + "or in range [2, 64K]\n", ring->index, size); + return false; + } + + for (i = 0; i < ring->size; i++) { + if (ring->info[i].buf) { + g_free(ring->info[i].buf); + } + } + + ring->size = size; + ring->head = ring->tail = 0; + + ring->info = g_realloc(ring->info, size * sizeof(DescInfo)); + if (!ring->info) { + return false; + } + + memset(ring->info, 0, size * sizeof(DescInfo)); + + for (i = 0; i < size; i++) { + ring->info[i].ring = ring; + } + + return true; +} + +uint32_t desc_ring_get_size(DescRing *ring) +{ + return ring->size; +} + +static DescInfo *desc_read(DescRing *ring, uint32_t index) +{ + PCIDevice *dev = PCI_DEVICE(ring->r); + DescInfo *info = &ring->info[index]; + hwaddr addr = ring->base_addr + (sizeof(RockerDesc) * index); + + pci_dma_read(dev, addr, &info->desc, sizeof(info->desc)); + + return info; +} + +static void desc_write(DescRing *ring, uint32_t index) +{ + PCIDevice *dev = PCI_DEVICE(ring->r); + DescInfo *info = &ring->info[index]; + hwaddr addr = ring->base_addr + (sizeof(RockerDesc) * index); + + pci_dma_write(dev, addr, &info->desc, sizeof(info->desc)); +} + +static bool desc_ring_base_addr_check(DescRing *ring) +{ + if (!ring->base_addr) { + DPRINTF("ERROR: ring[%d] not-initialized desc base address!\n", + ring->index); + return false; + } + return true; +} + +static DescInfo *__desc_ring_fetch_desc(DescRing *ring) +{ + return desc_read(ring, ring->tail); +} + +DescInfo *desc_ring_fetch_desc(DescRing *ring) +{ + if (desc_ring_empty(ring) || !desc_ring_base_addr_check(ring)) { + return NULL; + } + + return desc_read(ring, ring->tail); +} + +static bool __desc_ring_post_desc(DescRing *ring, int err) +{ + uint16_t comp_err = 0x8000 | (uint16_t)-err; + DescInfo *info = &ring->info[ring->tail]; + + info->desc.comp_err = cpu_to_le16(comp_err); + desc_write(ring, ring->tail); + ring->tail = (ring->tail + 1) % ring->size; + + /* return true if starting credit count */ + + return ring->credits++ == 0; +} + +bool desc_ring_post_desc(DescRing *ring, int err) +{ + if (desc_ring_empty(ring)) { + DPRINTF("ERROR: ring[%d] trying to post desc to empty ring\n", + ring->index); + return false; + } + + if (!desc_ring_base_addr_check(ring)) { + return false; + } + + return __desc_ring_post_desc(ring, err); +} + +static bool ring_pump(DescRing *ring) +{ + DescInfo *info; + bool primed = false; + int err; + + /* If the ring has a consumer, call consumer for each + * desc starting at tail and stopping when tail reaches + * head (the empty ring condition). + */ + + if (ring->consume) { + while (ring->head != ring->tail) { + info = __desc_ring_fetch_desc(ring); + err = ring->consume(ring->r, info); + if (__desc_ring_post_desc(ring, err)) { + primed = true; + } + } + } + + return primed; +} + +bool desc_ring_set_head(DescRing *ring, uint32_t new) +{ + uint32_t tail = ring->tail; + uint32_t head = ring->head; + + if (!desc_ring_base_addr_check(ring)) { + return false; + } + + if (new >= ring->size) { + DPRINTF("ERROR: trying to set head (%d) past ring[%d] size (%d)\n", + new, ring->index, ring->size); + return false; + } + + if (((head < tail) && ((new >= tail) || (new < head))) || + ((head > tail) && ((new >= tail) && (new < head)))) { + DPRINTF("ERROR: trying to wrap ring[%d] " + "(head %d, tail %d, new head %d)\n", + ring->index, head, tail, new); + return false; + } + + if (new == ring->head) { + DPRINTF("WARNING: setting head (%d) to current head position\n", new); + } + + ring->head = new; + + return ring_pump(ring); +} + +uint32_t desc_ring_get_head(DescRing *ring) +{ + return ring->head; +} + +uint32_t desc_ring_get_tail(DescRing *ring) +{ + return ring->tail; +} + +void desc_ring_set_ctrl(DescRing *ring, uint32_t val) +{ + if (val & ROCKER_DMA_DESC_CTRL_RESET) { + DPRINTF("ring[%d] resetting\n", ring->index); + desc_ring_reset(ring); + } +} + +bool desc_ring_ret_credits(DescRing *ring, uint32_t credits) +{ + if (credits > ring->credits) { + DPRINTF("ERROR: trying to return more credits (%d) " + "than are outstanding (%d)\n", credits, ring->credits); + ring->credits = 0; + return false; + } + + ring->credits -= credits; + + /* return true if credits are still outstanding */ + + return ring->credits > 0; +} + +uint32_t desc_ring_get_credits(DescRing *ring) +{ + return ring->credits; +} + +void desc_ring_set_consume(DescRing *ring, desc_ring_consume *consume, + unsigned vector) +{ + ring->consume = consume; + ring->msix_vector = vector; +} + +unsigned desc_ring_get_msix_vector(DescRing *ring) +{ + return ring->msix_vector; +} + +DescRing *desc_ring_alloc(Rocker *r, int index) +{ + DescRing *ring; + + ring = g_malloc0(sizeof(DescRing)); + if (!ring) { + return NULL; + } + + ring->r = r; + ring->index = index; + + return ring; +} + +void desc_ring_free(DescRing *ring) +{ + if (ring->info) { + g_free(ring->info); + } + g_free(ring); +} + +void desc_ring_reset(DescRing *ring) +{ + ring->base_addr = 0; + ring->size = 0; + ring->head = 0; + ring->tail = 0; + ring->ctrl = 0; + ring->credits = 0; +} diff --git a/hw/net/rocker/rocker_desc.h b/hw/net/rocker/rocker_desc.h new file mode 100644 index 0000000..0b57ba7 --- /dev/null +++ b/hw/net/rocker/rocker_desc.h @@ -0,0 +1,57 @@ +/* + * QEMU rocker switch emulation - Descriptor ring support + * + * Copyright (c) 2014 Scott Feldman + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#ifndef _ROCKER_DESC_H_ +#define _ROCKER_DESC_H_ + +#include "rocker_hw.h" + +typedef struct rocker Rocker; +typedef struct desc_ring DescRing; +typedef struct desc_info DescInfo; + +typedef int (desc_ring_consume)(Rocker *r, DescInfo *info); + +uint16_t desc_buf_size(DescInfo *info); +uint16_t desc_tlv_size(DescInfo *info); +char *desc_get_buf(DescInfo *info, bool read_only); +int desc_set_buf(DescInfo *info, size_t tlv_size); +DescRing *desc_get_ring(DescInfo *info); + +int desc_ring_index(DescRing *ring); +bool desc_ring_set_base_addr(DescRing *ring, uint64_t base_addr); +uint64_t desc_ring_get_base_addr(DescRing *ring); +bool desc_ring_set_size(DescRing *ring, uint32_t size); +uint32_t desc_ring_get_size(DescRing *ring); +bool desc_ring_set_head(DescRing *ring, uint32_t new); +uint32_t desc_ring_get_head(DescRing *ring); +uint32_t desc_ring_get_tail(DescRing *ring); +void desc_ring_set_ctrl(DescRing *ring, uint32_t val); +bool desc_ring_ret_credits(DescRing *ring, uint32_t credits); +uint32_t desc_ring_get_credits(DescRing *ring); + +DescInfo *desc_ring_fetch_desc(DescRing *ring); +bool desc_ring_post_desc(DescRing *ring, int status); + +void desc_ring_set_consume(DescRing *ring, desc_ring_consume *consume, + unsigned vector); +unsigned desc_ring_get_msix_vector(DescRing *ring); +DescRing *desc_ring_alloc(Rocker *r, int index); +void desc_ring_free(DescRing *ring); +void desc_ring_reset(DescRing *ring); + +#endif diff --git a/hw/net/rocker/rocker_fp.c b/hw/net/rocker/rocker_fp.c new file mode 100644 index 0000000..8d41190 --- /dev/null +++ b/hw/net/rocker/rocker_fp.c @@ -0,0 +1,232 @@ +/* + * QEMU rocker switch emulation - front-panel ports + * + * Copyright (c) 2014 Scott Feldman + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "net/clients.h" + +#include "rocker.h" +#include "rocker_hw.h" +#include "rocker_fp.h" +#include "rocker_world.h" + +enum duplex { + DUPLEX_HALF = 0, + DUPLEX_FULL +}; + +typedef struct fp_port { + Rocker *r; + World *world; + uint index; + char *name; + uint32_t pport; + bool enabled; + uint32_t speed; + uint8_t duplex; + uint8_t autoneg; + uint8_t learning; + NICState *nic; + NICConf conf; +} FpPort; + +bool fp_port_get_link_up(FpPort *port) +{ + return !qemu_get_queue(port->nic)->link_down; +} + +void fp_port_get_macaddr(FpPort *port, MACAddr *macaddr) +{ + memcpy(macaddr->a, port->conf.macaddr.a, sizeof(macaddr->a)); +} + +void fp_port_set_macaddr(FpPort *port, MACAddr *macaddr) +{ +/*XXX memcpy(port->conf.macaddr.a, macaddr.a, sizeof(port->conf.macaddr.a)); */ +} + +uint8_t fp_port_get_learning(FpPort *port) +{ + return port->learning; +} + +void fp_port_set_learning(FpPort *port, uint8_t learning) +{ + port->learning = learning; +} + +int fp_port_get_settings(FpPort *port, uint32_t *speed, + uint8_t *duplex, uint8_t *autoneg) +{ + *speed = port->speed; + *duplex = port->duplex; + *autoneg = port->autoneg; + + return ROCKER_OK; +} + +int fp_port_set_settings(FpPort *port, uint32_t speed, + uint8_t duplex, uint8_t autoneg) +{ + /* XXX validate inputs */ + + port->speed = speed; + port->duplex = duplex; + port->autoneg = autoneg; + + return ROCKER_OK; +} + +bool fp_port_from_pport(uint32_t pport, uint32_t *port) +{ + if (pport < 1 || pport > ROCKER_FP_PORTS_MAX) { + return false; + } + *port = pport - 1; + return true; +} + +int fp_port_eg(FpPort *port, const struct iovec *iov, int iovcnt) +{ + NetClientState *nc = qemu_get_queue(port->nic); + + if (port->enabled) { + qemu_sendv_packet(nc, iov, iovcnt); + } + + return ROCKER_OK; +} + +static int fp_port_can_receive(NetClientState *nc) +{ + FpPort *port = qemu_get_nic_opaque(nc); + + return port->enabled; +} + +static ssize_t fp_port_receive_iov(NetClientState *nc, const struct iovec *iov, + int iovcnt) +{ + FpPort *port = qemu_get_nic_opaque(nc); + + return world_ingress(port->world, port->pport, iov, iovcnt); +} + +static ssize_t fp_port_receive(NetClientState *nc, const uint8_t *buf, + size_t size) +{ + const struct iovec iov = { + .iov_base = (uint8_t *)buf, + .iov_len = size + }; + + return fp_port_receive_iov(nc, &iov, 1); +} + +static void fp_port_cleanup(NetClientState *nc) +{ +} + +static void fp_port_set_link_status(NetClientState *nc) +{ + FpPort *port = qemu_get_nic_opaque(nc); + + rocker_event_link_changed(port->r, port->pport, !nc->link_down); +} + +static NetClientInfo fp_port_info = { + .type = NET_CLIENT_OPTIONS_KIND_NIC, + .size = sizeof(NICState), + .can_receive = fp_port_can_receive, + .receive = fp_port_receive, + .receive_iov = fp_port_receive_iov, + .cleanup = fp_port_cleanup, + .link_status_changed = fp_port_set_link_status, +}; + +World *fp_port_get_world(FpPort *port) +{ + return port->world; +} + +void fp_port_set_world(FpPort *port, World *world) +{ + DPRINTF("port %d setting world \"%s\"\n", port->index, world_name(world)); + port->world = world; +} + +bool fp_port_enabled(FpPort *port) +{ + return port->enabled; +} + +void fp_port_enable(FpPort *port) +{ + port->enabled = true; + DPRINTF("port %d enabled\n", port->index); +} + +void fp_port_disable(FpPort *port) +{ + port->enabled = false; + DPRINTF("port %d disabled\n", port->index); +} + +FpPort *fp_port_alloc(Rocker *r, char *sw_name, + MACAddr *start_mac, uint index, + NICPeers *peers) +{ + FpPort *port = g_malloc0(sizeof(FpPort)); + + if (!port) { + return NULL; + } + + port->r = r; + port->index = index; + port->pport = index + 1; + + /* front-panel switch port names are 1-based */ + + port->name = g_strdup_printf("%s.%d", sw_name, port->pport); + + memcpy(port->conf.macaddr.a, start_mac, sizeof(port->conf.macaddr.a)); + port->conf.macaddr.a[5] += index; + port->conf.bootindex = -1; + port->conf.peers = *peers; + + port->nic = qemu_new_nic(&fp_port_info, &port->conf, + sw_name, NULL, port); + qemu_format_nic_info_str(qemu_get_queue(port->nic), + port->conf.macaddr.a); + + fp_port_reset(port); + + return port; +} + +void fp_port_free(FpPort *port) +{ + qemu_del_nic(port->nic); + g_free(port->name); + g_free(port); +} + +void fp_port_reset(FpPort *port) +{ + fp_port_disable(port); + port->speed = 10000; /* 10Gbps */ + port->duplex = DUPLEX_FULL; + port->autoneg = 0; +} diff --git a/hw/net/rocker/rocker_fp.h b/hw/net/rocker/rocker_fp.h new file mode 100644 index 0000000..f827f8d --- /dev/null +++ b/hw/net/rocker/rocker_fp.h @@ -0,0 +1,53 @@ +/* + * QEMU rocker switch emulation - front-panel ports + * + * Copyright (c) 2014 Scott Feldman + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _ROCKER_FP_H_ +#define _ROCKER_FP_H_ + +#include "net/net.h" +#include "qemu/iov.h" + +#define ROCKER_FP_PORTS_MAX 62 + +typedef struct rocker Rocker; +typedef struct fp_port FpPort; +typedef struct world World; + +int fp_port_eg(FpPort *port, const struct iovec *iov, int iovcnt); + +bool fp_port_get_link_up(FpPort *port); +void fp_port_get_macaddr(FpPort *port, MACAddr *macaddr); +void fp_port_set_macaddr(FpPort *port, MACAddr *macaddr); +uint8_t fp_port_get_learning(FpPort *port); +void fp_port_set_learning(FpPort *port, uint8_t learning); +int fp_port_get_settings(FpPort *port, uint32_t *speed, + uint8_t *duplex, uint8_t *autoneg); +int fp_port_set_settings(FpPort *port, uint32_t speed, + uint8_t duplex, uint8_t autoneg); +bool fp_port_from_pport(uint32_t pport, uint32_t *port); +World *fp_port_get_world(FpPort *port); +void fp_port_set_world(FpPort *port, World *world); +bool fp_port_enabled(FpPort *port); +void fp_port_enable(FpPort *port); +void fp_port_disable(FpPort *port); + +FpPort *fp_port_alloc(Rocker *r, char *sw_name, + MACAddr *start_mac, uint index, + NICPeers *peers); +void fp_port_free(FpPort *port); +void fp_port_reset(FpPort *port); + +#endif /* _ROCKER_FP_H_ */ diff --git a/hw/net/rocker/rocker_hw.h b/hw/net/rocker/rocker_hw.h new file mode 100644 index 0000000..c9c85a7 --- /dev/null +++ b/hw/net/rocker/rocker_hw.h @@ -0,0 +1,491 @@ +/* + * Rocker switch hardware register and descriptor definitions. + * + * Copyright (c) 2014 Scott Feldman + * Copyright (c) 2014 Jiri Pirko + * + */ + +#ifndef _ROCKER_HW_ +#define _ROCKER_HW_ + +#define __le16 uint16_t +#define __le32 uint32_t +#define __le64 uint64_t + +/* + * Return codes + */ + +enum { + ROCKER_OK = 0, + ROCKER_ENOENT = 2, + ROCKER_ENXIO = 6, + ROCKER_ENOMEM = 12, + ROCKER_EEXIST = 17, + ROCKER_EINVAL = 22, + ROCKER_EMSGSIZE = 90, + ROCKER_ENOTSUP = 95, + ROCKER_ENOBUFS = 105, +}; + +/* + * PCI configuration space + */ + +#define ROCKER_PCI_REVISION 0x1 +#define ROCKER_PCI_BAR0_IDX 0 +#define ROCKER_PCI_BAR0_SIZE 0x2000 +#define ROCKER_PCI_MSIX_BAR_IDX 1 +#define ROCKER_PCI_MSIX_BAR_SIZE 0x2000 +#define ROCKER_PCI_MSIX_TABLE_OFFSET 0x0000 +#define ROCKER_PCI_MSIX_PBA_OFFSET 0x1000 + +/* + * MSI-X vectors + */ + +enum { + ROCKER_MSIX_VEC_CMD, + ROCKER_MSIX_VEC_EVENT, + ROCKER_MSIX_VEC_TEST, + ROCKER_MSIX_VEC_RESERVED0, + __ROCKER_MSIX_VEC_TX, + __ROCKER_MSIX_VEC_RX, +#define ROCKER_MSIX_VEC_TX(port) \ + (__ROCKER_MSIX_VEC_TX + ((port) * 2)) +#define ROCKER_MSIX_VEC_RX(port) \ + (__ROCKER_MSIX_VEC_RX + ((port) * 2)) +#define ROCKER_MSIX_VEC_COUNT(portcnt) \ + (ROCKER_MSIX_VEC_RX((portcnt) - 1) + 1) +}; + +/* + * Rocker bogus registers + */ +#define ROCKER_BOGUS_REG0 0x0000 +#define ROCKER_BOGUS_REG1 0x0004 +#define ROCKER_BOGUS_REG2 0x0008 +#define ROCKER_BOGUS_REG3 0x000c + +/* + * Rocker test registers + */ +#define ROCKER_TEST_REG 0x0010 +#define ROCKER_TEST_REG64 0x0018 /* 8-byte */ +#define ROCKER_TEST_IRQ 0x0020 +#define ROCKER_TEST_DMA_ADDR 0x0028 /* 8-byte */ +#define ROCKER_TEST_DMA_SIZE 0x0030 +#define ROCKER_TEST_DMA_CTRL 0x0034 + +/* + * Rocker test register ctrl + */ +#define ROCKER_TEST_DMA_CTRL_CLEAR (1 << 0) +#define ROCKER_TEST_DMA_CTRL_FILL (1 << 1) +#define ROCKER_TEST_DMA_CTRL_INVERT (1 << 2) + +/* + * Rocker DMA ring register offsets + */ +#define ROCKER_DMA_DESC_BASE 0x1000 +#define ROCKER_DMA_DESC_SIZE 32 +#define ROCKER_DMA_DESC_MASK 0x1F +#define ROCKER_DMA_DESC_TOTAL_SIZE \ + (ROCKER_DMA_DESC_SIZE * 64) /* 62 ports + event + cmd */ +#define ROCKER_DMA_DESC_ADDR_OFFSET 0x00 /* 8-byte */ +#define ROCKER_DMA_DESC_SIZE_OFFSET 0x08 +#define ROCKER_DMA_DESC_HEAD_OFFSET 0x0c +#define ROCKER_DMA_DESC_TAIL_OFFSET 0x10 +#define ROCKER_DMA_DESC_CTRL_OFFSET 0x14 +#define ROCKER_DMA_DESC_CREDITS_OFFSET 0x18 +#define ROCKER_DMA_DESC_RSVD_OFFSET 0x1c + +/* + * Rocker dma ctrl register bits + */ +#define ROCKER_DMA_DESC_CTRL_RESET (1 << 0) + +/* + * Rocker ring indices + */ +#define ROCKER_RING_CMD 0 +#define ROCKER_RING_EVENT 1 + +/* + * Helper macro to do convert a dma ring register + * to its index. Based on the fact that the register + * group stride is 32 bytes. + */ +#define ROCKER_RING_INDEX(reg) ((reg >> 5) & 0x7F) + +/* + * Rocker DMA Descriptor + */ + +typedef struct rocker_desc { + __le64 buf_addr; + uint64_t cookie; + __le16 buf_size; + __le16 tlv_size; + __le16 rsvd[5]; /* pad to 32 bytes */ + __le16 comp_err; +} __attribute__((packed, aligned(8))) RockerDesc; + +/* + * Rocker TLV type fields + */ + +typedef struct rocker_tlv { + __le32 type; + __le16 len; + __le16 rsvd; +} __attribute__((packed, aligned(8))) RockerTlv; + +/* cmd msg */ +enum { + ROCKER_TLV_CMD_UNSPEC, + ROCKER_TLV_CMD_TYPE, /* u16 */ + ROCKER_TLV_CMD_INFO, /* nest */ + + __ROCKER_TLV_CMD_MAX, + ROCKER_TLV_CMD_MAX = __ROCKER_TLV_CMD_MAX - 1, +}; + +enum { + ROCKER_TLV_CMD_TYPE_UNSPEC, + ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS, + ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS, + ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD, + ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD, + ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL, + ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS, + ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD, + ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD, + ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL, + ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS, + + __ROCKER_TLV_CMD_TYPE_MAX, + ROCKER_TLV_CMD_TYPE_MAX = __ROCKER_TLV_CMD_TYPE_MAX - 1, +}; + +/* cmd info nested for set/get port settings */ +enum { + ROCKER_TLV_CMD_PORT_SETTINGS_UNSPEC, + ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, /* u32 */ + ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, /* u32 */ + ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, /* u8 */ + ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, /* u8 */ + ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR, /* binary */ + ROCKER_TLV_CMD_PORT_SETTINGS_MODE, /* u8 */ + ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING, /* u8 */ + + __ROCKER_TLV_CMD_PORT_SETTINGS_MAX, + ROCKER_TLV_CMD_PORT_SETTINGS_MAX = __ROCKER_TLV_CMD_PORT_SETTINGS_MAX - 1, +}; + +enum { + ROCKER_PORT_MODE_OF_DPA, +}; + +/* event msg */ +enum { + ROCKER_TLV_EVENT_UNSPEC, + ROCKER_TLV_EVENT_TYPE, /* u16 */ + ROCKER_TLV_EVENT_INFO, /* nest */ + + __ROCKER_TLV_EVENT_MAX, + ROCKER_TLV_EVENT_MAX = __ROCKER_TLV_EVENT_MAX - 1, +}; + +enum { + ROCKER_TLV_EVENT_TYPE_UNSPEC, + ROCKER_TLV_EVENT_TYPE_LINK_CHANGED, + ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN, + + __ROCKER_TLV_EVENT_TYPE_MAX, + ROCKER_TLV_EVENT_TYPE_MAX = __ROCKER_TLV_EVENT_TYPE_MAX - 1, +}; + +/* event info nested for link changed */ +enum { + ROCKER_TLV_EVENT_LINK_CHANGED_UNSPEC, + ROCKER_TLV_EVENT_LINK_CHANGED_PPORT, /* u32 */ + ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP, /* u8 */ + + __ROCKER_TLV_EVENT_LINK_CHANGED_MAX, + ROCKER_TLV_EVENT_LINK_CHANGED_MAX = __ROCKER_TLV_EVENT_LINK_CHANGED_MAX - 1, +}; + +/* event info nested for MAC/VLAN */ +enum { + ROCKER_TLV_EVENT_MAC_VLAN_UNSPEC, + ROCKER_TLV_EVENT_MAC_VLAN_PPORT, /* u32 */ + ROCKER_TLV_EVENT_MAC_VLAN_MAC, /* binary */ + ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID, /* __be16 */ + + __ROCKER_TLV_EVENT_MAC_VLAN_MAX, + ROCKER_TLV_EVENT_MAC_VLAN_MAX = __ROCKER_TLV_EVENT_MAC_VLAN_MAX - 1, +}; + +/* Rx msg */ +enum { + ROCKER_TLV_RX_UNSPEC, + ROCKER_TLV_RX_FLAGS, /* u16, see RX_FLAGS_ */ + ROCKER_TLV_RX_CSUM, /* u16 */ + ROCKER_TLV_RX_FRAG_ADDR, /* u64 */ + ROCKER_TLV_RX_FRAG_MAX_LEN, /* u16 */ + ROCKER_TLV_RX_FRAG_LEN, /* u16 */ + + __ROCKER_TLV_RX_MAX, + ROCKER_TLV_RX_MAX = __ROCKER_TLV_RX_MAX - 1, +}; + +#define ROCKER_RX_FLAGS_IPV4 (1 << 0) +#define ROCKER_RX_FLAGS_IPV6 (1 << 1) +#define ROCKER_RX_FLAGS_CSUM_CALC (1 << 2) +#define ROCKER_RX_FLAGS_IPV4_CSUM_GOOD (1 << 3) +#define ROCKER_RX_FLAGS_IP_FRAG (1 << 4) +#define ROCKER_RX_FLAGS_TCP (1 << 5) +#define ROCKER_RX_FLAGS_UDP (1 << 6) +#define ROCKER_RX_FLAGS_TCP_UDP_CSUM_GOOD (1 << 7) + +/* Tx msg */ +enum { + ROCKER_TLV_TX_UNSPEC, + ROCKER_TLV_TX_OFFLOAD, /* u8, see TX_OFFLOAD_ */ + ROCKER_TLV_TX_L3_CSUM_OFF, /* u16 */ + ROCKER_TLV_TX_TSO_MSS, /* u16 */ + ROCKER_TLV_TX_TSO_HDR_LEN, /* u16 */ + ROCKER_TLV_TX_FRAGS, /* array */ + + __ROCKER_TLV_TX_MAX, + ROCKER_TLV_TX_MAX = __ROCKER_TLV_TX_MAX - 1, +}; + +#define ROCKER_TX_OFFLOAD_NONE 0 +#define ROCKER_TX_OFFLOAD_IP_CSUM 1 +#define ROCKER_TX_OFFLOAD_TCP_UDP_CSUM 2 +#define ROCKER_TX_OFFLOAD_L3_CSUM 3 +#define ROCKER_TX_OFFLOAD_TSO 4 + +#define ROCKER_TX_FRAGS_MAX 16 + +enum { + ROCKER_TLV_TX_FRAG_UNSPEC, + ROCKER_TLV_TX_FRAG, /* nest */ + + __ROCKER_TLV_TX_FRAG_MAX, + ROCKER_TLV_TX_FRAG_MAX = __ROCKER_TLV_TX_FRAG_MAX - 1, +}; + +enum { + ROCKER_TLV_TX_FRAG_ATTR_UNSPEC, + ROCKER_TLV_TX_FRAG_ATTR_ADDR, /* u64 */ + ROCKER_TLV_TX_FRAG_ATTR_LEN, /* u16 */ + + __ROCKER_TLV_TX_FRAG_ATTR_MAX, + ROCKER_TLV_TX_FRAG_ATTR_MAX = __ROCKER_TLV_TX_FRAG_ATTR_MAX - 1, +}; + +/* + * cmd info nested for OF-DPA msgs + */ + +enum { + ROCKER_TLV_OF_DPA_UNSPEC, + ROCKER_TLV_OF_DPA_TABLE_ID, /* u16 */ + ROCKER_TLV_OF_DPA_PRIORITY, /* u32 */ + ROCKER_TLV_OF_DPA_HARDTIME, /* u32 */ + ROCKER_TLV_OF_DPA_IDLETIME, /* u32 */ + ROCKER_TLV_OF_DPA_COOKIE, /* u64 */ + ROCKER_TLV_OF_DPA_IN_PPORT, /* u32 */ + ROCKER_TLV_OF_DPA_IN_PPORT_MASK, /* u32 */ + ROCKER_TLV_OF_DPA_OUT_PPORT, /* u32 */ + ROCKER_TLV_OF_DPA_GOTO_TABLE_ID, /* u16 */ + ROCKER_TLV_OF_DPA_GROUP_ID, /* u32 */ + ROCKER_TLV_OF_DPA_GROUP_ID_LOWER, /* u32 */ + ROCKER_TLV_OF_DPA_GROUP_COUNT, /* u16 */ + ROCKER_TLV_OF_DPA_GROUP_IDS, /* u32 array */ + ROCKER_TLV_OF_DPA_VLAN_ID, /* __be16 */ + ROCKER_TLV_OF_DPA_VLAN_ID_MASK, /* __be16 */ + ROCKER_TLV_OF_DPA_VLAN_PCP, /* __be16 */ + ROCKER_TLV_OF_DPA_VLAN_PCP_MASK, /* __be16 */ + ROCKER_TLV_OF_DPA_VLAN_PCP_ACTION, /* u8 */ + ROCKER_TLV_OF_DPA_NEW_VLAN_ID, /* __be16 */ + ROCKER_TLV_OF_DPA_NEW_VLAN_PCP, /* u8 */ + ROCKER_TLV_OF_DPA_TUNNEL_ID, /* u32 */ + ROCKER_TLV_OF_DPA_TUNNEL_LPORT, /* u32 */ + ROCKER_TLV_OF_DPA_ETHERTYPE, /* __be16 */ + ROCKER_TLV_OF_DPA_DST_MAC, /* binary */ + ROCKER_TLV_OF_DPA_DST_MAC_MASK, /* binary */ + ROCKER_TLV_OF_DPA_SRC_MAC, /* binary */ + ROCKER_TLV_OF_DPA_SRC_MAC_MASK, /* binary */ + ROCKER_TLV_OF_DPA_IP_PROTO, /* u8 */ + ROCKER_TLV_OF_DPA_IP_PROTO_MASK, /* u8 */ + ROCKER_TLV_OF_DPA_IP_DSCP, /* u8 */ + ROCKER_TLV_OF_DPA_IP_DSCP_MASK, /* u8 */ + ROCKER_TLV_OF_DPA_IP_DSCP_ACTION, /* u8 */ + ROCKER_TLV_OF_DPA_NEW_IP_DSCP, /* u8 */ + ROCKER_TLV_OF_DPA_IP_ECN, /* u8 */ + ROCKER_TLV_OF_DPA_IP_ECN_MASK, /* u8 */ + ROCKER_TLV_OF_DPA_DST_IP, /* __be32 */ + ROCKER_TLV_OF_DPA_DST_IP_MASK, /* __be32 */ + ROCKER_TLV_OF_DPA_SRC_IP, /* __be32 */ + ROCKER_TLV_OF_DPA_SRC_IP_MASK, /* __be32 */ + ROCKER_TLV_OF_DPA_DST_IPV6, /* binary */ + ROCKER_TLV_OF_DPA_DST_IPV6_MASK, /* binary */ + ROCKER_TLV_OF_DPA_SRC_IPV6, /* binary */ + ROCKER_TLV_OF_DPA_SRC_IPV6_MASK, /* binary */ + ROCKER_TLV_OF_DPA_SRC_ARP_IP, /* __be32 */ + ROCKER_TLV_OF_DPA_SRC_ARP_IP_MASK, /* __be32 */ + ROCKER_TLV_OF_DPA_L4_DST_PORT, /* __be16 */ + ROCKER_TLV_OF_DPA_L4_DST_PORT_MASK, /* __be16 */ + ROCKER_TLV_OF_DPA_L4_SRC_PORT, /* __be16 */ + ROCKER_TLV_OF_DPA_L4_SRC_PORT_MASK, /* __be16 */ + ROCKER_TLV_OF_DPA_ICMP_TYPE, /* u8 */ + ROCKER_TLV_OF_DPA_ICMP_TYPE_MASK, /* u8 */ + ROCKER_TLV_OF_DPA_ICMP_CODE, /* u8 */ + ROCKER_TLV_OF_DPA_ICMP_CODE_MASK, /* u8 */ + ROCKER_TLV_OF_DPA_IPV6_LABEL, /* __be32 */ + ROCKER_TLV_OF_DPA_IPV6_LABEL_MASK, /* __be32 */ + ROCKER_TLV_OF_DPA_QUEUE_ID_ACTION, /* u8 */ + ROCKER_TLV_OF_DPA_NEW_QUEUE_ID, /* u8 */ + ROCKER_TLV_OF_DPA_CLEAR_ACTIONS, /* u32 */ + ROCKER_TLV_OF_DPA_POP_VLAN, /* u8 */ + ROCKER_TLV_OF_DPA_TTL_CHECK, /* u8 */ + ROCKER_TLV_OF_DPA_COPY_CPU_ACTION, /* u8 */ + + __ROCKER_TLV_OF_DPA_MAX, + ROCKER_TLV_OF_DPA_MAX = __ROCKER_TLV_OF_DPA_MAX - 1, +}; + +/* + * OF-DPA table IDs + */ + +enum rocker_of_dpa_table_id { + ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT = 0, + ROCKER_OF_DPA_TABLE_ID_VLAN = 10, + ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC = 20, + ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING = 30, + ROCKER_OF_DPA_TABLE_ID_MULTICAST_ROUTING = 40, + ROCKER_OF_DPA_TABLE_ID_BRIDGING = 50, + ROCKER_OF_DPA_TABLE_ID_ACL_POLICY = 60, +}; + +/* + * OF-DPA flow stats + */ + +enum { + ROCKER_TLV_OF_DPA_FLOW_STAT_UNSPEC, + ROCKER_TLV_OF_DPA_FLOW_STAT_DURATION, /* u32 */ + ROCKER_TLV_OF_DPA_FLOW_STAT_RX_PKTS, /* u64 */ + ROCKER_TLV_OF_DPA_FLOW_STAT_TX_PKTS, /* u64 */ + + __ROCKER_TLV_OF_DPA_FLOW_STAT_MAX, + ROCKER_TLV_OF_DPA_FLOW_STAT_MAX = __ROCKER_TLV_OF_DPA_FLOW_STAT_MAX - 1, +}; + +/* + * OF-DPA group types + */ + +enum rocker_of_dpa_group_type { + ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE = 0, + ROCKER_OF_DPA_GROUP_TYPE_L2_REWRITE, + ROCKER_OF_DPA_GROUP_TYPE_L3_UCAST, + ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST, + ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD, + ROCKER_OF_DPA_GROUP_TYPE_L3_INTERFACE, + ROCKER_OF_DPA_GROUP_TYPE_L3_MCAST, + ROCKER_OF_DPA_GROUP_TYPE_L3_ECMP, + ROCKER_OF_DPA_GROUP_TYPE_L2_OVERLAY, +}; + +/* + * OF-DPA group L2 overlay types + */ + +enum rocker_of_dpa_overlay_type { + ROCKER_OF_DPA_OVERLAY_TYPE_FLOOD_UCAST = 0, + ROCKER_OF_DPA_OVERLAY_TYPE_FLOOD_MCAST, + ROCKER_OF_DPA_OVERLAY_TYPE_MCAST_UCAST, + ROCKER_OF_DPA_OVERLAY_TYPE_MCAST_MCAST, +}; + +/* + * OF-DPA group ID encoding + */ + +#define ROCKER_GROUP_TYPE_SHIFT 28 +#define ROCKER_GROUP_TYPE_MASK 0xf0000000 +#define ROCKER_GROUP_VLAN_ID_SHIFT 16 +#define ROCKER_GROUP_VLAN_ID_MASK 0x0fff0000 +#define ROCKER_GROUP_PORT_SHIFT 0 +#define ROCKER_GROUP_PORT_MASK 0x0000ffff +#define ROCKER_GROUP_TUNNEL_ID_SHIFT 12 +#define ROCKER_GROUP_TUNNEL_ID_MASK 0x0ffff000 +#define ROCKER_GROUP_SUBTYPE_SHIFT 10 +#define ROCKER_GROUP_SUBTYPE_MASK 0x00000c00 +#define ROCKER_GROUP_INDEX_SHIFT 0 +#define ROCKER_GROUP_INDEX_MASK 0x0000ffff +#define ROCKER_GROUP_INDEX_LONG_SHIFT 0 +#define ROCKER_GROUP_INDEX_LONG_MASK 0x0fffffff + +#define ROCKER_GROUP_TYPE_GET(group_id) \ + (((group_id) & ROCKER_GROUP_TYPE_MASK) >> ROCKER_GROUP_TYPE_SHIFT) +#define ROCKER_GROUP_TYPE_SET(type) \ + (((type) << ROCKER_GROUP_TYPE_SHIFT) & ROCKER_GROUP_TYPE_MASK) +#define ROCKER_GROUP_VLAN_GET(group_id) \ + (((group_id) & ROCKER_GROUP_VLAN_ID_MASK) >> ROCKER_GROUP_VLAN_ID_SHIFT) +#define ROCKER_GROUP_VLAN_SET(vlan_id) \ + (((vlan_id) << ROCKER_GROUP_VLAN_ID_SHIFT) & ROCKER_GROUP_VLAN_ID_MASK) +#define ROCKER_GROUP_PORT_GET(group_id) \ + (((group_id) & ROCKER_GROUP_PORT_MASK) >> ROCKER_GROUP_PORT_SHIFT) +#define ROCKER_GROUP_PORT_SET(port) \ + (((port) << ROCKER_GROUP_PORT_SHIFT) & ROCKER_GROUP_PORT_MASK) +#define ROCKER_GROUP_INDEX_GET(group_id) \ + (((group_id) & ROCKER_GROUP_INDEX_MASK) >> ROCKER_GROUP_INDEX_SHIFT) +#define ROCKER_GROUP_INDEX_SET(index) \ + (((index) << ROCKER_GROUP_INDEX_SHIFT) & ROCKER_GROUP_INDEX_MASK) +#define ROCKER_GROUP_INDEX_LONG_GET(group_id) \ + (((group_id) & ROCKER_GROUP_INDEX_LONG_MASK) >> \ + ROCKER_GROUP_INDEX_LONG_SHIFT) +#define ROCKER_GROUP_INDEX_LONG_SET(index) \ + (((index) << ROCKER_GROUP_INDEX_LONG_SHIFT) & \ + ROCKER_GROUP_INDEX_LONG_MASK) + +#define ROCKER_GROUP_NONE 0 +#define ROCKER_GROUP_L2_INTERFACE(vlan_id, port) \ + (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE) |\ + ROCKER_GROUP_VLAN_SET(ntohs(vlan_id)) | ROCKER_GROUP_PORT_SET(port)) +#define ROCKER_GROUP_L2_REWRITE(index) \ + (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L2_REWRITE) |\ + ROCKER_GROUP_INDEX_LONG_SET(index)) +#define ROCKER_GROUP_L2_MCAST(vlan_id, index) \ + (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST) |\ + ROCKER_GROUP_VLAN_SET(ntohs(vlan_id)) | ROCKER_GROUP_INDEX_SET(index)) +#define ROCKER_GROUP_L2_FLOOD(vlan_id, index) \ + (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD) |\ + ROCKER_GROUP_VLAN_SET(ntohs(vlan_id)) | ROCKER_GROUP_INDEX_SET(index)) +#define ROCKER_GROUP_L3_UNICAST(index) \ + (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L3_UCAST) |\ + ROCKER_GROUP_INDEX_LONG_SET(index)) + +/* + * Rocker general purpose registers + */ +#define ROCKER_CONTROL 0x0300 +#define ROCKER_PORT_PHYS_COUNT 0x0304 +#define ROCKER_PORT_PHYS_LINK_STATUS 0x0310 /* 8-byte */ +#define ROCKER_PORT_PHYS_ENABLE 0x0318 /* 8-byte */ +#define ROCKER_SWITCH_ID 0x0320 /* 8-byte */ + +/* + * Rocker control bits + */ +#define ROCKER_CONTROL_RESET (1 << 0) + +#endif /* _ROCKER_HW_ */ diff --git a/hw/net/rocker/rocker_of_dpa.c b/hw/net/rocker/rocker_of_dpa.c new file mode 100644 index 0000000..1cf7e4e --- /dev/null +++ b/hw/net/rocker/rocker_of_dpa.c @@ -0,0 +1,2314 @@ +/* + * QEMU rocker switch emulation - OF-DPA flow processing support + * + * Copyright (c) 2014 Scott Feldman + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "net/eth.h" +#include "qemu/iov.h" +#include "qemu/timer.h" +#include "qmp-commands.h" + +#include "rocker.h" +#include "rocker_hw.h" +#include "rocker_fp.h" +#include "rocker_tlv.h" +#include "rocker_world.h" +#include "rocker_desc.h" +#include "rocker_of_dpa.h" + +static const MACAddr zero_mac = { .a = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }; +static const MACAddr ff_mac = { .a = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff } }; + +typedef struct of_dpa { + World *world; + GHashTable *flow_tbl; + GHashTable *group_tbl; + unsigned int flow_tbl_max_size; + unsigned int group_tbl_max_size; +} OfDpa; + +/* flow_key stolen mostly from OVS + * + * Note: fields that compare with network packet header fields + * are stored in network order (BE) to avoid per-packet field + * byte-swaps. + */ + +typedef struct of_dpa_flow_key { + uint32_t in_pport; /* ingress port */ + uint32_t tunnel_id; /* overlay tunnel id */ + uint32_t tbl_id; /* table id */ + struct { + __be16 vlan_id; /* 0 if no VLAN */ + MACAddr src; /* ethernet source address */ + MACAddr dst; /* ethernet destination address */ + __be16 type; /* ethernet frame type */ + } eth; + struct { + uint8_t proto; /* IP protocol or ARP opcode */ + uint8_t tos; /* IP ToS */ + uint8_t ttl; /* IP TTL/hop limit */ + uint8_t frag; /* one of FRAG_TYPE_* */ + } ip; + union { + struct { + struct { + __be32 src; /* IP source address */ + __be32 dst; /* IP destination address */ + } addr; + union { + struct { + __be16 src; /* TCP/UDP/SCTP source port */ + __be16 dst; /* TCP/UDP/SCTP destination port */ + __be16 flags; /* TCP flags */ + } tp; + struct { + MACAddr sha; /* ARP source hardware address */ + MACAddr tha; /* ARP target hardware address */ + } arp; + }; + } ipv4; + struct { + struct { + Ipv6Addr src; /* IPv6 source address */ + Ipv6Addr dst; /* IPv6 destination address */ + } addr; + __be32 label; /* IPv6 flow label */ + struct { + __be16 src; /* TCP/UDP/SCTP source port */ + __be16 dst; /* TCP/UDP/SCTP destination port */ + __be16 flags; /* TCP flags */ + } tp; + struct { + Ipv6Addr target; /* ND target address */ + MACAddr sll; /* ND source link layer address */ + MACAddr tll; /* ND target link layer address */ + } nd; + } ipv6; + }; + int width; /* how many uint64_t's in key? */ +} OfDpaFlowKey; + +/* Width of key which includes field 'f' in u64s, rounded up */ +#define FLOW_KEY_WIDTH(f) \ + ((offsetof(OfDpaFlowKey, f) + \ + sizeof(((OfDpaFlowKey *)0)->f) + \ + sizeof(uint64_t) - 1) / sizeof(uint64_t)) + +typedef struct of_dpa_flow_action { + uint32_t goto_tbl; + struct { + uint32_t group_id; + uint32_t tun_log_lport; + __be16 vlan_id; + } write; + struct { + __be16 new_vlan_id; + uint32_t out_pport; + uint8_t copy_to_cpu; + __be16 vlan_id; + } apply; +} OfDpaFlowAction; + +typedef struct of_dpa_flow { + uint32_t lpm; + uint32_t priority; + uint32_t hardtime; + uint32_t idletime; + uint64_t cookie; + OfDpaFlowKey key; + OfDpaFlowKey mask; + OfDpaFlowAction action; + struct { + uint64_t hits; + int64_t install_time; + int64_t refresh_time; + uint64_t rx_pkts; + uint64_t tx_pkts; + } stats; +} OfDpaFlow; + +typedef struct of_dpa_flow_pkt_fields { + uint32_t tunnel_id; + struct eth_header *ethhdr; + __be16 *h_proto; + struct vlan_header *vlanhdr; + struct ip_header *ipv4hdr; + struct ip6_header *ipv6hdr; + Ipv6Addr *ipv6_src_addr; + Ipv6Addr *ipv6_dst_addr; +} OfDpaFlowPktFields; + +typedef struct of_dpa_flow_context { + uint32_t in_pport; + uint32_t tunnel_id; + struct iovec *iov; + int iovcnt; + struct eth_header ethhdr_rewrite; + struct vlan_header vlanhdr_rewrite; + struct vlan_header vlanhdr; + OfDpa *of_dpa; + OfDpaFlowPktFields fields; + OfDpaFlowAction action_set; +} OfDpaFlowContext; + +typedef struct of_dpa_flow_match { + OfDpaFlowKey value; + OfDpaFlow *best; +} OfDpaFlowMatch; + +typedef struct of_dpa_group { + uint32_t id; + union { + struct { + uint32_t out_pport; + uint8_t pop_vlan; + } l2_interface; + struct { + uint32_t group_id; + MACAddr src_mac; + MACAddr dst_mac; + __be16 vlan_id; + } l2_rewrite; + struct { + uint16_t group_count; + uint32_t *group_ids; + } l2_flood; + struct { + uint32_t group_id; + MACAddr src_mac; + MACAddr dst_mac; + __be16 vlan_id; + uint8_t ttl_check; + } l3_unicast; + }; +} OfDpaGroup; + +static int of_dpa_mask2prefix(__be32 mask) +{ + int i; + int count = 32; + + for (i = 0; i < 32; i++) { + if (!(ntohl(mask) & ((2 << i) - 1))) { + count--; + } + } + + return count; +} + +#if defined(DEBUG_ROCKER) +static void of_dpa_flow_key_dump(OfDpaFlowKey *key, OfDpaFlowKey *mask) +{ + char buf[512], *b = buf, *mac; + + b += sprintf(b, " tbl %2d", key->tbl_id); + + if (key->in_pport || (mask && mask->in_pport)) { + b += sprintf(b, " in_pport %2d", key->in_pport); + if (mask && mask->in_pport != 0xffffffff) { + b += sprintf(b, "/0x%08x", key->in_pport); + } + } + + if (key->tunnel_id || (mask && mask->tunnel_id)) { + b += sprintf(b, " tun %8d", key->tunnel_id); + if (mask && mask->tunnel_id != 0xffffffff) { + b += sprintf(b, "/0x%08x", key->tunnel_id); + } + } + + if (key->eth.vlan_id || (mask && mask->eth.vlan_id)) { + b += sprintf(b, " vlan %4d", ntohs(key->eth.vlan_id)); + if (mask && mask->eth.vlan_id != 0xffff) { + b += sprintf(b, "/0x%04x", ntohs(key->eth.vlan_id)); + } + } + + if (memcmp(key->eth.src.a, zero_mac.a, ETH_ALEN) || + (mask && memcmp(mask->eth.src.a, zero_mac.a, ETH_ALEN))) { + mac = qemu_mac_strdup_printf(key->eth.src.a); + b += sprintf(b, " src %s", mac); + g_free(mac); + if (mask && memcmp(mask->eth.src.a, ff_mac.a, ETH_ALEN)) { + mac = qemu_mac_strdup_printf(mask->eth.src.a); + b += sprintf(b, "/%s", mac); + g_free(mac); + } + } + + if (memcmp(key->eth.dst.a, zero_mac.a, ETH_ALEN) || + (mask && memcmp(mask->eth.dst.a, zero_mac.a, ETH_ALEN))) { + mac = qemu_mac_strdup_printf(key->eth.dst.a); + b += sprintf(b, " dst %s", mac); + g_free(mac); + if (mask && memcmp(mask->eth.dst.a, ff_mac.a, ETH_ALEN)) { + mac = qemu_mac_strdup_printf(mask->eth.dst.a); + b += sprintf(b, "/%s", mac); + g_free(mac); + } + } + + if (key->eth.type || (mask && mask->eth.type)) { + b += sprintf(b, " type 0x%04x", ntohs(key->eth.type)); + if (mask && mask->eth.type != 0xffff) { + b += sprintf(b, "/0x%04x", ntohs(mask->eth.type)); + } + switch (ntohs(key->eth.type)) { + case 0x0800: + case 0x86dd: + if (key->ip.proto || (mask && mask->ip.proto)) { + b += sprintf(b, " ip proto %2d", key->ip.proto); + if (mask && mask->ip.proto != 0xff) { + b += sprintf(b, "/0x%02x", mask->ip.proto); + } + } + if (key->ip.tos || (mask && mask->ip.tos)) { + b += sprintf(b, " ip tos %2d", key->ip.tos); + if (mask && mask->ip.tos != 0xff) { + b += sprintf(b, "/0x%02x", mask->ip.tos); + } + } + break; + } + switch (ntohs(key->eth.type)) { + case 0x0800: + if (key->ipv4.addr.dst || (mask && mask->ipv4.addr.dst)) { + b += sprintf(b, " dst %s", + inet_ntoa(*(struct in_addr *)&key->ipv4.addr.dst)); + if (mask) { + b += sprintf(b, "/%d", + of_dpa_mask2prefix(mask->ipv4.addr.dst)); + } + } + break; + } + } + + DPRINTF("%s\n", buf); +} +#else +#define of_dpa_flow_key_dump(k, m) +#endif + +static void _of_dpa_flow_match(void *key, void *value, void *user_data) +{ + OfDpaFlow *flow = value; + OfDpaFlowMatch *match = user_data; + uint64_t *k = (uint64_t *)&flow->key; + uint64_t *m = (uint64_t *)&flow->mask; + uint64_t *v = (uint64_t *)&match->value; + int i; + + if (flow->key.tbl_id == match->value.tbl_id) { + of_dpa_flow_key_dump(&flow->key, &flow->mask); + } + + if (flow->key.width > match->value.width) { + return; + } + + for (i = 0; i < flow->key.width; i++, k++, m++, v++) { + if ((~*k & *m & *v) | (*k & *m & ~*v)) { + return; + } + } + + DPRINTF("match\n"); + + if (!match->best || + flow->priority > match->best->priority || + flow->lpm > match->best->lpm) { + match->best = flow; + } +} + +static OfDpaFlow *of_dpa_flow_match(OfDpa *of_dpa, OfDpaFlowMatch *match) +{ + DPRINTF("\nnew search\n"); + of_dpa_flow_key_dump(&match->value, NULL); + + g_hash_table_foreach(of_dpa->flow_tbl, _of_dpa_flow_match, match); + + return match->best; +} + +static OfDpaFlow *of_dpa_flow_find(OfDpa *of_dpa, uint64_t cookie) +{ + return g_hash_table_lookup(of_dpa->flow_tbl, &cookie); +} + +static int of_dpa_flow_add(OfDpa *of_dpa, OfDpaFlow *flow) +{ + g_hash_table_insert(of_dpa->flow_tbl, &flow->cookie, flow); + + return 0; +} + +static int of_dpa_flow_mod(OfDpaFlow *flow) +{ + return 0; +} + +static void of_dpa_flow_del(OfDpa *of_dpa, OfDpaFlow *flow) +{ + g_hash_table_remove(of_dpa->flow_tbl, &flow->cookie); +} + +static OfDpaFlow *of_dpa_flow_alloc(uint64_t cookie, uint32_t priority, + uint32_t hardtime, uint32_t idletime) +{ + OfDpaFlow *flow; + int64_t now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) / 1000; + + flow = g_malloc0(sizeof(OfDpaFlow)); + if (!flow) { + return NULL; + } + + flow->cookie = cookie; + flow->priority = priority; + flow->hardtime = hardtime; + flow->idletime = idletime; + flow->mask.tbl_id = 0xffffffff; + + flow->stats.install_time = flow->stats.refresh_time = now; + + return flow; +} + +static void of_dpa_flow_pkt_hdr_reset(OfDpaFlowContext *fc) +{ + OfDpaFlowPktFields *fields = &fc->fields; + + fc->iov[0].iov_base = fields->ethhdr; + fc->iov[0].iov_len = sizeof(struct eth_header); + fc->iov[1].iov_base = fields->vlanhdr; + fc->iov[1].iov_len = fields->vlanhdr ? sizeof(struct vlan_header) : 0; +} + +static void of_dpa_flow_pkt_parse(OfDpaFlowContext *fc, + const struct iovec *iov, int iovcnt) +{ + OfDpaFlowPktFields *fields = &fc->fields; + size_t sofar = 0; + int i; + + sofar += sizeof(struct eth_header); + if (iov->iov_len < sofar) { + DPRINTF("flow_pkt_parse underrun on eth_header\n"); + return; + } + + fields->ethhdr = iov->iov_base; + fields->h_proto = &fields->ethhdr->h_proto; + + if (ntohs(*fields->h_proto) == ETH_P_VLAN) { + sofar += sizeof(struct vlan_header); + if (iov->iov_len < sofar) { + DPRINTF("flow_pkt_parse underrun on vlan_header\n"); + return; + } + fields->vlanhdr = (struct vlan_header *)(fields->ethhdr + 1); + fields->h_proto = &fields->vlanhdr->h_proto; + } + + switch (ntohs(*fields->h_proto)) { + case ETH_P_IP: + sofar += sizeof(struct ip_header); + if (iov->iov_len < sofar) { + DPRINTF("flow_pkt_parse underrun on ip_header\n"); + return; + } + fields->ipv4hdr = (struct ip_header *)(fields->h_proto + 1); + break; + case ETH_P_IPV6: + sofar += sizeof(struct ip6_header); + if (iov->iov_len < sofar) { + DPRINTF("flow_pkt_parse underrun on ip6_header\n"); + return; + } + fields->ipv6hdr = (struct ip6_header *)(fields->h_proto + 1); + break; + } + + /* To facilitate (potential) VLAN tag insertion, Make a + * copy of the iov and insert two new vectors at the + * beginning for eth hdr and vlan hdr. No data is copied, + * just the vectors. + */ + + of_dpa_flow_pkt_hdr_reset(fc); + + fc->iov[2].iov_base = fields->h_proto + 1; + fc->iov[2].iov_len = iov->iov_len - fc->iov[0].iov_len - fc->iov[1].iov_len; + + for (i = 1; i < iovcnt; i++) { + fc->iov[i+2] = iov[i]; + } + + fc->iovcnt = iovcnt + 2; +} + +static void of_dpa_flow_pkt_insert_vlan(OfDpaFlowContext *fc, __be16 vlan_id) +{ + OfDpaFlowPktFields *fields = &fc->fields; + uint16_t h_proto = fields->ethhdr->h_proto; + + if (fields->vlanhdr) { + DPRINTF("flow_pkt_insert_vlan packet already has vlan\n"); + return; + } + + fields->ethhdr->h_proto = htons(ETH_P_VLAN); + fields->vlanhdr = &fc->vlanhdr; + fields->vlanhdr->h_tci = vlan_id; + fields->vlanhdr->h_proto = h_proto; + fields->h_proto = &fields->vlanhdr->h_proto; + + fc->iov[1].iov_base = fields->vlanhdr; + fc->iov[1].iov_len = sizeof(struct vlan_header); +} + +static void of_dpa_flow_pkt_strip_vlan(OfDpaFlowContext *fc) +{ + OfDpaFlowPktFields *fields = &fc->fields; + + if (!fields->vlanhdr) { + return; + } + + fc->iov[0].iov_len -= sizeof(fields->ethhdr->h_proto); + fc->iov[1].iov_base = fields->h_proto; + fc->iov[1].iov_len = sizeof(fields->ethhdr->h_proto); +} + +static void of_dpa_flow_pkt_hdr_rewrite(OfDpaFlowContext *fc, + uint8_t *src_mac, uint8_t *dst_mac, + __be16 vlan_id) +{ + OfDpaFlowPktFields *fields = &fc->fields; + + if (src_mac || dst_mac) { + memcpy(&fc->ethhdr_rewrite, fields->ethhdr, sizeof(struct eth_header)); + if (src_mac && memcmp(src_mac, zero_mac.a, ETH_ALEN)) { + memcpy(fc->ethhdr_rewrite.h_source, src_mac, ETH_ALEN); + } + if (dst_mac && memcmp(dst_mac, zero_mac.a, ETH_ALEN)) { + memcpy(fc->ethhdr_rewrite.h_dest, dst_mac, ETH_ALEN); + } + fc->iov[0].iov_base = &fc->ethhdr_rewrite; + } + + if (vlan_id && fields->vlanhdr) { + fc->vlanhdr_rewrite = fc->vlanhdr; + fc->vlanhdr_rewrite.h_tci = vlan_id; + fc->iov[1].iov_base = &fc->vlanhdr_rewrite; + } +} + +static void of_dpa_flow_ig_tbl(OfDpaFlowContext *fc, uint32_t tbl_id); + +static void of_dpa_ig_port_build_match(OfDpaFlowContext *fc, + OfDpaFlowMatch *match) +{ + match->value.tbl_id = ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT; + match->value.in_pport = fc->in_pport; + match->value.width = FLOW_KEY_WIDTH(tbl_id); +} + +static void of_dpa_ig_port_miss(OfDpaFlowContext *fc) +{ + uint32_t port; + + /* The default on miss is for packets from physical ports + * to go to the VLAN Flow Table. There is no default rule + * for packets from logical ports, which are dropped on miss. + */ + + if (fp_port_from_pport(fc->in_pport, &port)) { + of_dpa_flow_ig_tbl(fc, ROCKER_OF_DPA_TABLE_ID_VLAN); + } +} + +static void of_dpa_vlan_build_match(OfDpaFlowContext *fc, + OfDpaFlowMatch *match) +{ + match->value.tbl_id = ROCKER_OF_DPA_TABLE_ID_VLAN; + match->value.in_pport = fc->in_pport; + if (fc->fields.vlanhdr) { + match->value.eth.vlan_id = fc->fields.vlanhdr->h_tci; + } + match->value.width = FLOW_KEY_WIDTH(eth.vlan_id); +} + +static void of_dpa_vlan_insert(OfDpaFlowContext *fc, + OfDpaFlow *flow) +{ + if (flow->action.apply.new_vlan_id) { + of_dpa_flow_pkt_insert_vlan(fc, flow->action.apply.new_vlan_id); + } +} + +static void of_dpa_term_mac_build_match(OfDpaFlowContext *fc, + OfDpaFlowMatch *match) +{ + match->value.tbl_id = ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC; + match->value.in_pport = fc->in_pport; + match->value.eth.type = *fc->fields.h_proto; + match->value.eth.vlan_id = fc->fields.vlanhdr->h_tci; + memcpy(match->value.eth.dst.a, fc->fields.ethhdr->h_dest, + sizeof(match->value.eth.dst.a)); + match->value.width = FLOW_KEY_WIDTH(eth.type); +} + +static void of_dpa_term_mac_miss(OfDpaFlowContext *fc) +{ + of_dpa_flow_ig_tbl(fc, ROCKER_OF_DPA_TABLE_ID_BRIDGING); +} + +static void of_dpa_apply_actions(OfDpaFlowContext *fc, + OfDpaFlow *flow) +{ + fc->action_set.apply.copy_to_cpu = flow->action.apply.copy_to_cpu; + fc->action_set.apply.vlan_id = flow->key.eth.vlan_id; +} + +static void of_dpa_bridging_build_match(OfDpaFlowContext *fc, + OfDpaFlowMatch *match) +{ + match->value.tbl_id = ROCKER_OF_DPA_TABLE_ID_BRIDGING; + if (fc->fields.vlanhdr) { + match->value.eth.vlan_id = fc->fields.vlanhdr->h_tci; + } else if (fc->tunnel_id) { + match->value.tunnel_id = fc->tunnel_id; + } + memcpy(match->value.eth.dst.a, fc->fields.ethhdr->h_dest, + sizeof(match->value.eth.dst.a)); + match->value.width = FLOW_KEY_WIDTH(eth.dst); +} + +static void of_dpa_bridging_learn(OfDpaFlowContext *fc, + OfDpaFlow *dst_flow) +{ + OfDpaFlowMatch match = { { 0, }, }; + OfDpaFlow *flow; + uint8_t *addr; + uint16_t vlan_id; + int64_t now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) / 1000; + int64_t refresh_delay = 1; + + /* Do a lookup in bridge table by src_mac/vlan */ + + addr = fc->fields.ethhdr->h_source; + vlan_id = fc->fields.vlanhdr->h_tci; + + match.value.tbl_id = ROCKER_OF_DPA_TABLE_ID_BRIDGING; + match.value.eth.vlan_id = vlan_id; + memcpy(match.value.eth.dst.a, addr, sizeof(match.value.eth.dst.a)); + match.value.width = FLOW_KEY_WIDTH(eth.dst); + + flow = of_dpa_flow_match(fc->of_dpa, &match); + if (flow) { + if (!memcmp(flow->mask.eth.dst.a, ff_mac.a, + sizeof(flow->mask.eth.dst.a))) { + /* src_mac/vlan already learned; if in_port and out_port + * don't match, the end station has moved and the port + * needs updating */ + /* XXX implement the in_port/out_port check */ + if (now - flow->stats.refresh_time < refresh_delay) { + return; + } + flow->stats.refresh_time = now; + } + } + + /* Let driver know about mac/vlan. This may be a new mac/vlan + * or a refresh of existing mac/vlan that's been hit after the + * refresh_delay. + */ + + rocker_event_mac_vlan_seen(world_rocker(fc->of_dpa->world), + fc->in_pport, addr, vlan_id); +} + +static void of_dpa_bridging_miss(OfDpaFlowContext *fc) +{ + of_dpa_bridging_learn(fc, NULL); + of_dpa_flow_ig_tbl(fc, ROCKER_OF_DPA_TABLE_ID_ACL_POLICY); +} + +static void of_dpa_bridging_action_write(OfDpaFlowContext *fc, + OfDpaFlow *flow) +{ + if (flow->action.write.group_id != ROCKER_GROUP_NONE) { + fc->action_set.write.group_id = flow->action.write.group_id; + } + fc->action_set.write.tun_log_lport = flow->action.write.tun_log_lport; +} + +static void of_dpa_unicast_routing_build_match(OfDpaFlowContext *fc, + OfDpaFlowMatch *match) +{ + match->value.tbl_id = ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING; + match->value.eth.type = *fc->fields.h_proto; + if (fc->fields.ipv4hdr) { + match->value.ipv4.addr.dst = fc->fields.ipv4hdr->ip_dst; + } + if (fc->fields.ipv6_dst_addr) { + memcpy(&match->value.ipv6.addr.dst, fc->fields.ipv6_dst_addr, + sizeof(match->value.ipv6.addr.dst)); + } + match->value.width = FLOW_KEY_WIDTH(ipv6.addr.dst); +} + +static void of_dpa_unicast_routing_miss(OfDpaFlowContext *fc) +{ + of_dpa_flow_ig_tbl(fc, ROCKER_OF_DPA_TABLE_ID_ACL_POLICY); +} + +static void of_dpa_unicast_routing_action_write(OfDpaFlowContext *fc, + OfDpaFlow *flow) +{ + if (flow->action.write.group_id != ROCKER_GROUP_NONE) { + fc->action_set.write.group_id = flow->action.write.group_id; + } +} + +static void +of_dpa_multicast_routing_build_match(OfDpaFlowContext *fc, + OfDpaFlowMatch *match) +{ + match->value.tbl_id = ROCKER_OF_DPA_TABLE_ID_MULTICAST_ROUTING; + match->value.eth.type = *fc->fields.h_proto; + match->value.eth.vlan_id = fc->fields.vlanhdr->h_tci; + if (fc->fields.ipv4hdr) { + match->value.ipv4.addr.src = fc->fields.ipv4hdr->ip_src; + match->value.ipv4.addr.dst = fc->fields.ipv4hdr->ip_dst; + } + if (fc->fields.ipv6_src_addr) { + memcpy(&match->value.ipv6.addr.src, fc->fields.ipv6_src_addr, + sizeof(match->value.ipv6.addr.src)); + } + if (fc->fields.ipv6_dst_addr) { + memcpy(&match->value.ipv6.addr.dst, fc->fields.ipv6_dst_addr, + sizeof(match->value.ipv6.addr.dst)); + } + match->value.width = FLOW_KEY_WIDTH(ipv6.addr.dst); +} + +static void of_dpa_multicast_routing_miss(OfDpaFlowContext *fc) +{ + of_dpa_flow_ig_tbl(fc, ROCKER_OF_DPA_TABLE_ID_ACL_POLICY); +} + +static void +of_dpa_multicast_routing_action_write(OfDpaFlowContext *fc, + OfDpaFlow *flow) +{ + if (flow->action.write.group_id != ROCKER_GROUP_NONE) { + fc->action_set.write.group_id = flow->action.write.group_id; + } + fc->action_set.write.vlan_id = flow->action.write.vlan_id; +} + +static void of_dpa_acl_build_match(OfDpaFlowContext *fc, + OfDpaFlowMatch *match) +{ + match->value.tbl_id = ROCKER_OF_DPA_TABLE_ID_ACL_POLICY; + match->value.in_pport = fc->in_pport; + memcpy(match->value.eth.src.a, fc->fields.ethhdr->h_source, + sizeof(match->value.eth.src.a)); + memcpy(match->value.eth.dst.a, fc->fields.ethhdr->h_dest, + sizeof(match->value.eth.dst.a)); + match->value.eth.type = *fc->fields.h_proto; + match->value.eth.vlan_id = fc->fields.vlanhdr->h_tci; + match->value.width = FLOW_KEY_WIDTH(eth.type); + if (fc->fields.ipv4hdr) { + match->value.ip.proto = fc->fields.ipv4hdr->ip_p; + match->value.ip.tos = fc->fields.ipv4hdr->ip_tos; + match->value.width = FLOW_KEY_WIDTH(ip.tos); + } else if (fc->fields.ipv6hdr) { + match->value.ip.proto = + fc->fields.ipv6hdr->ip6_ctlun.ip6_un1.ip6_un1_nxt; + match->value.ip.tos = 0; /* XXX what goes here? */ + match->value.width = FLOW_KEY_WIDTH(ip.tos); + } +} + +static void of_dpa_eg(OfDpaFlowContext *fc); +static void of_dpa_acl_hit(OfDpaFlowContext *fc, + OfDpaFlow *dst_flow) +{ + of_dpa_eg(fc); +} + +static void of_dpa_acl_action_write(OfDpaFlowContext *fc, + OfDpaFlow *flow) +{ + if (flow->action.write.group_id != ROCKER_GROUP_NONE) { + fc->action_set.write.group_id = flow->action.write.group_id; + } +} + +static void of_dpa_drop(OfDpaFlowContext *fc) +{ + /* drop packet */ +} + +static OfDpaGroup *of_dpa_group_find(OfDpa *of_dpa, + uint32_t group_id) +{ + return g_hash_table_lookup(of_dpa->group_tbl, &group_id); +} + +static int of_dpa_group_add(OfDpa *of_dpa, OfDpaGroup *group) +{ + g_hash_table_insert(of_dpa->group_tbl, &group->id, group); + + return 0; +} + +#if 0 +static int of_dpa_group_mod(OfDpa *of_dpa, OfDpaGroup *group) +{ + OfDpaGroup *old_group = of_dpa_group_find(of_dpa, group->id); + + if (!old_group) { + return -ENOENT; + } + + /* XXX */ + + return 0; +} +#endif + +static int of_dpa_group_del(OfDpa *of_dpa, OfDpaGroup *group) +{ + g_hash_table_remove(of_dpa->group_tbl, &group->id); + + return 0; +} + +#if 0 +static int of_dpa_group_get_stats(OfDpa *of_dpa, uint32_t id) +{ + OfDpaGroup *group = of_dpa_group_find(of_dpa, id); + + if (!group) { + return -ENOENT; + } + + /* XXX get/return stats */ + + return 0; +} +#endif + +static OfDpaGroup *of_dpa_group_alloc(uint32_t id) +{ + OfDpaGroup *group = g_malloc0(sizeof(OfDpaGroup)); + + if (!group) { + return NULL; + } + + group->id = id; + + return group; +} + +static void of_dpa_output_l2_interface(OfDpaFlowContext *fc, + OfDpaGroup *group) +{ + if (group->l2_interface.pop_vlan) { + of_dpa_flow_pkt_strip_vlan(fc); + } + + /* Note: By default, and as per the OpenFlow 1.3.1 + * specification, a packet cannot be forwarded back + * to the IN_PORT from which it came in. An action + * bucket that specifies the particular packet's + * egress port is not evaluated. + */ + + if (group->l2_interface.out_pport == 0) { + rx_produce(fc->of_dpa->world, fc->in_pport, fc->iov, fc->iovcnt); + } else if (group->l2_interface.out_pport != fc->in_pport) { + rocker_port_eg(world_rocker(fc->of_dpa->world), + group->l2_interface.out_pport, + fc->iov, fc->iovcnt); + } +} + +static void of_dpa_output_l2_rewrite(OfDpaFlowContext *fc, + OfDpaGroup *group) +{ + OfDpaGroup *l2_group = + of_dpa_group_find(fc->of_dpa, group->l2_rewrite.group_id); + + if (!l2_group) { + return; + } + + of_dpa_flow_pkt_hdr_rewrite(fc, group->l2_rewrite.src_mac.a, + group->l2_rewrite.dst_mac.a, + group->l2_rewrite.vlan_id); + of_dpa_output_l2_interface(fc, l2_group); +} + +static void of_dpa_output_l2_flood(OfDpaFlowContext *fc, + OfDpaGroup *group) +{ + OfDpaGroup *l2_group; + int i; + + for (i = 0; i < group->l2_flood.group_count; i++) { + of_dpa_flow_pkt_hdr_reset(fc); + l2_group = of_dpa_group_find(fc->of_dpa, group->l2_flood.group_ids[i]); + if (!l2_group) { + continue; + } + switch (ROCKER_GROUP_TYPE_GET(l2_group->id)) { + case ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE: + of_dpa_output_l2_interface(fc, l2_group); + break; + case ROCKER_OF_DPA_GROUP_TYPE_L2_REWRITE: + of_dpa_output_l2_rewrite(fc, l2_group); + break; + } + } +} + +static void of_dpa_output_l3_unicast(OfDpaFlowContext *fc, OfDpaGroup *group) +{ + OfDpaGroup *l2_group = + of_dpa_group_find(fc->of_dpa, group->l3_unicast.group_id); + + if (!l2_group) { + return; + } + + of_dpa_flow_pkt_hdr_rewrite(fc, group->l3_unicast.src_mac.a, + group->l3_unicast.dst_mac.a, + group->l3_unicast.vlan_id); + /* XXX need ttl_check */ + of_dpa_output_l2_interface(fc, l2_group); +} + +static void of_dpa_eg(OfDpaFlowContext *fc) +{ + OfDpaFlowAction *set = &fc->action_set; + OfDpaGroup *group; + uint32_t group_id; + + /* send a copy of pkt to CPU (controller)? */ + + if (set->apply.copy_to_cpu) { + group_id = ROCKER_GROUP_L2_INTERFACE(set->apply.vlan_id, 0); + group = of_dpa_group_find(fc->of_dpa, group_id); + if (group) { + of_dpa_output_l2_interface(fc, group); + of_dpa_flow_pkt_hdr_reset(fc); + } + } + + /* process group write actions */ + + if (!set->write.group_id) { + return; + } + + group = of_dpa_group_find(fc->of_dpa, set->write.group_id); + if (!group) { + return; + } + + switch (ROCKER_GROUP_TYPE_GET(group->id)) { + case ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE: + of_dpa_output_l2_interface(fc, group); + break; + case ROCKER_OF_DPA_GROUP_TYPE_L2_REWRITE: + of_dpa_output_l2_rewrite(fc, group); + break; + case ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD: + case ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST: + of_dpa_output_l2_flood(fc, group); + break; + case ROCKER_OF_DPA_GROUP_TYPE_L3_UCAST: + of_dpa_output_l3_unicast(fc, group); + break; + } +} + +typedef struct of_dpa_flow_tbl_ops { + void (*build_match)(OfDpaFlowContext *fc, OfDpaFlowMatch *match); + void (*hit)(OfDpaFlowContext *fc, OfDpaFlow *flow); + void (*miss)(OfDpaFlowContext *fc); + void (*hit_no_goto)(OfDpaFlowContext *fc); + void (*action_apply)(OfDpaFlowContext *fc, OfDpaFlow *flow); + void (*action_write)(OfDpaFlowContext *fc, OfDpaFlow *flow); +} OfDpaFlowTblOps; + +static OfDpaFlowTblOps of_dpa_tbl_ops[] = { + [ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT] = { + .build_match = of_dpa_ig_port_build_match, + .miss = of_dpa_ig_port_miss, + .hit_no_goto = of_dpa_drop, + }, + [ROCKER_OF_DPA_TABLE_ID_VLAN] = { + .build_match = of_dpa_vlan_build_match, + .hit_no_goto = of_dpa_drop, + .action_apply = of_dpa_vlan_insert, + }, + [ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC] = { + .build_match = of_dpa_term_mac_build_match, + .miss = of_dpa_term_mac_miss, + .hit_no_goto = of_dpa_drop, + .action_apply = of_dpa_apply_actions, + }, + [ROCKER_OF_DPA_TABLE_ID_BRIDGING] = { + .build_match = of_dpa_bridging_build_match, + .hit = of_dpa_bridging_learn, + .miss = of_dpa_bridging_miss, + .hit_no_goto = of_dpa_drop, + .action_apply = of_dpa_apply_actions, + .action_write = of_dpa_bridging_action_write, + }, + [ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING] = { + .build_match = of_dpa_unicast_routing_build_match, + .miss = of_dpa_unicast_routing_miss, + .hit_no_goto = of_dpa_drop, + .action_write = of_dpa_unicast_routing_action_write, + }, + [ROCKER_OF_DPA_TABLE_ID_MULTICAST_ROUTING] = { + .build_match = of_dpa_multicast_routing_build_match, + .miss = of_dpa_multicast_routing_miss, + .hit_no_goto = of_dpa_drop, + .action_write = of_dpa_multicast_routing_action_write, + }, + [ROCKER_OF_DPA_TABLE_ID_ACL_POLICY] = { + .build_match = of_dpa_acl_build_match, + .hit = of_dpa_acl_hit, + .miss = of_dpa_eg, + .action_apply = of_dpa_apply_actions, + .action_write = of_dpa_acl_action_write, + }, +}; + +static void of_dpa_flow_ig_tbl(OfDpaFlowContext *fc, uint32_t tbl_id) +{ + OfDpaFlowTblOps *ops = &of_dpa_tbl_ops[tbl_id]; + OfDpaFlowMatch match = { { 0, }, }; + OfDpaFlow *flow; + + if (ops->build_match) { + ops->build_match(fc, &match); + } else { + return; + } + + flow = of_dpa_flow_match(fc->of_dpa, &match); + if (!flow) { + if (ops->miss) { + ops->miss(fc); + } + return; + } + + flow->stats.hits++; + + if (ops->action_apply) { + ops->action_apply(fc, flow); + } + + if (ops->action_write) { + ops->action_write(fc, flow); + } + + if (ops->hit) { + ops->hit(fc, flow); + } + + if (flow->action.goto_tbl) { + of_dpa_flow_ig_tbl(fc, flow->action.goto_tbl); + } else if (ops->hit_no_goto) { + ops->hit_no_goto(fc); + } + + /* drop packet */ +} + +static ssize_t of_dpa_ig(World *world, uint32_t pport, + const struct iovec *iov, int iovcnt) +{ + struct iovec iov_copy[iovcnt + 2]; + OfDpaFlowContext fc = { + .of_dpa = world_private(world), + .in_pport = pport, + .iov = iov_copy, + .iovcnt = iovcnt + 2, + }; + + of_dpa_flow_pkt_parse(&fc, iov, iovcnt); + of_dpa_flow_ig_tbl(&fc, ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT); + + return iov_size(iov, iovcnt); +} + +#define ROCKER_TUNNEL_LPORT 0x00010000 + +static int of_dpa_cmd_add_ig_port(OfDpaFlow *flow, RockerTlv **flow_tlvs) +{ + OfDpaFlowKey *key = &flow->key; + OfDpaFlowKey *mask = &flow->mask; + OfDpaFlowAction *action = &flow->action; + bool overlay_tunnel; + + if (!flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT] || + !flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]) { + return -ROCKER_EINVAL; + } + + key->tbl_id = ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT; + key->width = FLOW_KEY_WIDTH(tbl_id); + + key->in_pport = rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT]); + if (flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT_MASK]) { + mask->in_pport = + rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT_MASK]); + } + + overlay_tunnel = !!(key->in_pport & ROCKER_TUNNEL_LPORT); + + action->goto_tbl = + rocker_tlv_get_le16(flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]); + + if (!overlay_tunnel && action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_VLAN) { + return -ROCKER_EINVAL; + } + + if (overlay_tunnel && action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_BRIDGING) { + return -ROCKER_EINVAL; + } + + return ROCKER_OK; +} + +static int of_dpa_cmd_add_vlan(OfDpaFlow *flow, RockerTlv **flow_tlvs) +{ + OfDpaFlowKey *key = &flow->key; + OfDpaFlowKey *mask = &flow->mask; + OfDpaFlowAction *action = &flow->action; + uint32_t port; + bool untagged; + + if (!flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT] || + !flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]) { + DPRINTF("Must give in_pport and vlan_id to install VLAN tbl entry\n"); + return -ROCKER_EINVAL; + } + + key->tbl_id = ROCKER_OF_DPA_TABLE_ID_VLAN; + key->width = FLOW_KEY_WIDTH(eth.vlan_id); + + key->in_pport = rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT]); + if (!fp_port_from_pport(key->in_pport, &port)) { + DPRINTF("in_pport (%d) not a front-panel port\n", key->in_pport); + return -ROCKER_EINVAL; + } + mask->in_pport = 0xffffffff; + + key->eth.vlan_id = rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]); + + if (flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID_MASK]) { + mask->eth.vlan_id = + rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID_MASK]); + } + + if (key->eth.vlan_id) { + untagged = false; /* filtering */ + } else { + untagged = true; + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]) { + action->goto_tbl = + rocker_tlv_get_le16(flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]); + if (action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC) { + DPRINTF("Goto tbl (%d) must be TERM_MAC\n", action->goto_tbl); + return -ROCKER_EINVAL; + } + } + + if (untagged) { + if (!flow_tlvs[ROCKER_TLV_OF_DPA_NEW_VLAN_ID]) { + DPRINTF("Must specify new vlan_id if untagged\n"); + return -ROCKER_EINVAL; + } + action->apply.new_vlan_id = + rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_NEW_VLAN_ID]); + if (1 > ntohs(action->apply.new_vlan_id) || + ntohs(action->apply.new_vlan_id) > 4095) { + DPRINTF("New vlan_id (%d) must be between 1 and 4095\n", + ntohs(action->apply.new_vlan_id)); + return -ROCKER_EINVAL; + } + } + + return ROCKER_OK; +} + +static int of_dpa_cmd_add_term_mac(OfDpaFlow *flow, RockerTlv **flow_tlvs) +{ + OfDpaFlowKey *key = &flow->key; + OfDpaFlowKey *mask = &flow->mask; + OfDpaFlowAction *action = &flow->action; + const MACAddr ipv4_mcast = { .a = { 0x01, 0x00, 0x5e, 0x00, 0x00, 0x00 } }; + const MACAddr ipv4_mask = { .a = { 0xff, 0xff, 0xff, 0x80, 0x00, 0x00 } }; + const MACAddr ipv6_mcast = { .a = { 0x33, 0x33, 0x00, 0x00, 0x00, 0x00 } }; + const MACAddr ipv6_mask = { .a = { 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 } }; + uint32_t port; + bool unicast = false; + bool multicast = false; + + if (!flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT] || + !flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT_MASK] || + !flow_tlvs[ROCKER_TLV_OF_DPA_ETHERTYPE] || + !flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC] || + !flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC_MASK] || + !flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID] || + !flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID_MASK]) { + return -ROCKER_EINVAL; + } + + key->tbl_id = ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC; + key->width = FLOW_KEY_WIDTH(eth.type); + + key->in_pport = rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT]); + if (!fp_port_from_pport(key->in_pport, &port)) { + return -ROCKER_EINVAL; + } + mask->in_pport = + rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT_MASK]); + + key->eth.type = rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_ETHERTYPE]); + if (key->eth.type != htons(0x0800) && key->eth.type != htons(0x86dd)) { + return -ROCKER_EINVAL; + } + mask->eth.type = htons(0xffff); + + memcpy(key->eth.dst.a, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]), + sizeof(key->eth.dst.a)); + memcpy(mask->eth.dst.a, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC_MASK]), + sizeof(mask->eth.dst.a)); + + if ((key->eth.dst.a[0] & 0x01) == 0x00) { + unicast = true; + } + + /* only two wildcard rules are acceptable for IPv4 and IPv6 multicast */ + if (memcmp(key->eth.dst.a, ipv4_mcast.a, sizeof(key->eth.dst.a)) == 0 && + memcmp(mask->eth.dst.a, ipv4_mask.a, sizeof(mask->eth.dst.a)) == 0) { + multicast = true; + } + if (memcmp(key->eth.dst.a, ipv6_mcast.a, sizeof(key->eth.dst.a)) == 0 && + memcmp(mask->eth.dst.a, ipv6_mask.a, sizeof(mask->eth.dst.a)) == 0) { + multicast = true; + } + + if (!unicast && !multicast) { + return -ROCKER_EINVAL; + } + + key->eth.vlan_id = rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]); + mask->eth.vlan_id = + rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID_MASK]); + + if (flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]) { + action->goto_tbl = + rocker_tlv_get_le16(flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]); + + if (action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING && + action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_MULTICAST_ROUTING) { + return -ROCKER_EINVAL; + } + + if (unicast && + action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING) { + return -ROCKER_EINVAL; + } + + if (multicast && + action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_MULTICAST_ROUTING) { + return -ROCKER_EINVAL; + } + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_COPY_CPU_ACTION]) { + action->apply.copy_to_cpu = + rocker_tlv_get_u8(flow_tlvs[ROCKER_TLV_OF_DPA_COPY_CPU_ACTION]); + } + + return ROCKER_OK; +} + +static int of_dpa_cmd_add_bridging(OfDpaFlow *flow, RockerTlv **flow_tlvs) +{ + OfDpaFlowKey *key = &flow->key; + OfDpaFlowKey *mask = &flow->mask; + OfDpaFlowAction *action = &flow->action; + bool unicast = false; + bool dst_mac = false; + bool dst_mac_mask = false; + enum { + BRIDGING_MODE_UNKNOWN, + BRIDGING_MODE_VLAN_UCAST, + BRIDGING_MODE_VLAN_MCAST, + BRIDGING_MODE_VLAN_DFLT, + BRIDGING_MODE_TUNNEL_UCAST, + BRIDGING_MODE_TUNNEL_MCAST, + BRIDGING_MODE_TUNNEL_DFLT, + } mode = BRIDGING_MODE_UNKNOWN; + + key->tbl_id = ROCKER_OF_DPA_TABLE_ID_BRIDGING; + + if (flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]) { + key->eth.vlan_id = + rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]); + mask->eth.vlan_id = 0xffff; + key->width = FLOW_KEY_WIDTH(eth.vlan_id); + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_TUNNEL_ID]) { + key->tunnel_id = + rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_TUNNEL_ID]); + mask->tunnel_id = 0xffffffff; + key->width = FLOW_KEY_WIDTH(tunnel_id); + } + + /* can't do VLAN bridging and tunnel bridging at same time */ + if (key->eth.vlan_id && key->tunnel_id) { + DPRINTF("can't do VLAN bridging and tunnel bridging at same time\n"); + return -ROCKER_EINVAL; + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]) { + memcpy(key->eth.dst.a, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]), + sizeof(key->eth.dst.a)); + key->width = FLOW_KEY_WIDTH(eth.dst); + dst_mac = true; + unicast = (key->eth.dst.a[0] & 0x01) == 0x00; + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC_MASK]) { + memcpy(mask->eth.dst.a, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC_MASK]), + sizeof(mask->eth.dst.a)); + key->width = FLOW_KEY_WIDTH(eth.dst); + dst_mac_mask = true; + } else if (flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]) { + memcpy(mask->eth.dst.a, ff_mac.a, sizeof(mask->eth.dst.a)); + } + + if (key->eth.vlan_id) { + if (dst_mac && !dst_mac_mask) { + mode = unicast ? BRIDGING_MODE_VLAN_UCAST : + BRIDGING_MODE_VLAN_MCAST; + } else if ((dst_mac && dst_mac_mask) || !dst_mac) { + mode = BRIDGING_MODE_VLAN_DFLT; + } + } else if (key->tunnel_id) { + if (dst_mac && !dst_mac_mask) { + mode = unicast ? BRIDGING_MODE_TUNNEL_UCAST : + BRIDGING_MODE_TUNNEL_MCAST; + } else if ((dst_mac && dst_mac_mask) || !dst_mac) { + mode = BRIDGING_MODE_TUNNEL_DFLT; + } + } + + if (mode == BRIDGING_MODE_UNKNOWN) { + DPRINTF("Unknown bridging mode\n"); + return -ROCKER_EINVAL; + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]) { + action->goto_tbl = + rocker_tlv_get_le16(flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]); + if (action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_ACL_POLICY) { + DPRINTF("Briding goto tbl must be ACL policy\n"); + return -ROCKER_EINVAL; + } + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]) { + action->write.group_id = + rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]); + switch (mode) { + case BRIDGING_MODE_VLAN_UCAST: + if (ROCKER_GROUP_TYPE_GET(action->write.group_id) != + ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE) { + DPRINTF("Bridging mode vlan ucast needs L2 " + "interface group (0x%08x)\n", + action->write.group_id); + return -ROCKER_EINVAL; + } + break; + case BRIDGING_MODE_VLAN_MCAST: + if (ROCKER_GROUP_TYPE_GET(action->write.group_id) != + ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST) { + DPRINTF("Bridging mode vlan mcast needs L2 " + "mcast group (0x%08x)\n", + action->write.group_id); + return -ROCKER_EINVAL; + } + break; + case BRIDGING_MODE_VLAN_DFLT: + if (ROCKER_GROUP_TYPE_GET(action->write.group_id) != + ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD) { + DPRINTF("Bridging mode vlan dflt needs L2 " + "flood group (0x%08x)\n", + action->write.group_id); + return -ROCKER_EINVAL; + } + break; + case BRIDGING_MODE_TUNNEL_MCAST: + if (ROCKER_GROUP_TYPE_GET(action->write.group_id) != + ROCKER_OF_DPA_GROUP_TYPE_L2_OVERLAY) { + DPRINTF("Bridging mode tunnel mcast needs L2 " + "overlay group (0x%08x)\n", + action->write.group_id); + return -ROCKER_EINVAL; + } + break; + case BRIDGING_MODE_TUNNEL_DFLT: + if (ROCKER_GROUP_TYPE_GET(action->write.group_id) != + ROCKER_OF_DPA_GROUP_TYPE_L2_OVERLAY) { + DPRINTF("Bridging mode tunnel dflt needs L2 " + "overlay group (0x%08x)\n", + action->write.group_id); + return -ROCKER_EINVAL; + } + break; + default: + return -ROCKER_EINVAL; + } + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_TUNNEL_LPORT]) { + action->write.tun_log_lport = + rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_TUNNEL_LPORT]); + if (mode != BRIDGING_MODE_TUNNEL_UCAST) { + DPRINTF("Have tunnel logical port but not " + "in bridging tunnel mode\n"); + return -ROCKER_EINVAL; + } + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_COPY_CPU_ACTION]) { + action->apply.copy_to_cpu = + rocker_tlv_get_u8(flow_tlvs[ROCKER_TLV_OF_DPA_COPY_CPU_ACTION]); + } + + return ROCKER_OK; +} + +static int of_dpa_cmd_add_unicast_routing(OfDpaFlow *flow, + RockerTlv **flow_tlvs) +{ + OfDpaFlowKey *key = &flow->key; + OfDpaFlowKey *mask = &flow->mask; + OfDpaFlowAction *action = &flow->action; + enum { + UNICAST_ROUTING_MODE_UNKNOWN, + UNICAST_ROUTING_MODE_IPV4, + UNICAST_ROUTING_MODE_IPV6, + } mode = UNICAST_ROUTING_MODE_UNKNOWN; + uint8_t type; + + if (!flow_tlvs[ROCKER_TLV_OF_DPA_ETHERTYPE]) { + return -ROCKER_EINVAL; + } + + key->tbl_id = ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING; + key->width = FLOW_KEY_WIDTH(ipv6.addr.dst); + + key->eth.type = rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_ETHERTYPE]); + switch (ntohs(key->eth.type)) { + case 0x0800: + mode = UNICAST_ROUTING_MODE_IPV4; + break; + case 0x86dd: + mode = UNICAST_ROUTING_MODE_IPV6; + break; + default: + return -ROCKER_EINVAL; + } + mask->eth.type = htons(0xffff); + + switch (mode) { + case UNICAST_ROUTING_MODE_IPV4: + if (!flow_tlvs[ROCKER_TLV_OF_DPA_DST_IP]) { + return -ROCKER_EINVAL; + } + key->ipv4.addr.dst = + rocker_tlv_get_u32(flow_tlvs[ROCKER_TLV_OF_DPA_DST_IP]); + if (ipv4_addr_is_multicast(key->ipv4.addr.dst)) { + return -ROCKER_EINVAL; + } + flow->lpm = of_dpa_mask2prefix(htonl(0xffffffff)); + if (flow_tlvs[ROCKER_TLV_OF_DPA_DST_IP_MASK]) { + mask->ipv4.addr.dst = + rocker_tlv_get_u32(flow_tlvs[ROCKER_TLV_OF_DPA_DST_IP_MASK]); + flow->lpm = of_dpa_mask2prefix(mask->ipv4.addr.dst); + } + break; + case UNICAST_ROUTING_MODE_IPV6: + if (!flow_tlvs[ROCKER_TLV_OF_DPA_DST_IPV6]) { + return -ROCKER_EINVAL; + } + memcpy(&key->ipv6.addr.dst, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_DST_IPV6]), + sizeof(key->ipv6.addr.dst)); + if (ipv6_addr_is_multicast(&key->ipv6.addr.dst)) { + return -ROCKER_EINVAL; + } + if (flow_tlvs[ROCKER_TLV_OF_DPA_DST_IPV6_MASK]) { + memcpy(&mask->ipv6.addr.dst, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_DST_IPV6_MASK]), + sizeof(mask->ipv6.addr.dst)); + } + break; + default: + return -ROCKER_EINVAL; + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]) { + action->goto_tbl = + rocker_tlv_get_le16(flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]); + if (action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_ACL_POLICY) { + return -ROCKER_EINVAL; + } + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]) { + action->write.group_id = + rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]); + type = ROCKER_GROUP_TYPE_GET(action->write.group_id); + if (type != ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE && + type != ROCKER_OF_DPA_GROUP_TYPE_L3_UCAST && + type != ROCKER_OF_DPA_GROUP_TYPE_L3_ECMP) { + return -ROCKER_EINVAL; + } + } + + return ROCKER_OK; +} + +static int of_dpa_cmd_add_multicast_routing(OfDpaFlow *flow, + RockerTlv **flow_tlvs) +{ + OfDpaFlowKey *key = &flow->key; + OfDpaFlowKey *mask = &flow->mask; + OfDpaFlowAction *action = &flow->action; + enum { + MULTICAST_ROUTING_MODE_UNKNOWN, + MULTICAST_ROUTING_MODE_IPV4, + MULTICAST_ROUTING_MODE_IPV6, + } mode = MULTICAST_ROUTING_MODE_UNKNOWN; + + if (!flow_tlvs[ROCKER_TLV_OF_DPA_ETHERTYPE] || + !flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]) { + return -ROCKER_EINVAL; + } + + key->tbl_id = ROCKER_OF_DPA_TABLE_ID_MULTICAST_ROUTING; + key->width = FLOW_KEY_WIDTH(ipv6.addr.dst); + + key->eth.type = rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_ETHERTYPE]); + switch (ntohs(key->eth.type)) { + case 0x0800: + mode = MULTICAST_ROUTING_MODE_IPV4; + break; + case 0x86dd: + mode = MULTICAST_ROUTING_MODE_IPV6; + break; + default: + return -ROCKER_EINVAL; + } + + key->eth.vlan_id = rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]); + + switch (mode) { + case MULTICAST_ROUTING_MODE_IPV4: + + if (flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IP]) { + key->ipv4.addr.src = + rocker_tlv_get_u32(flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IP]); + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IP_MASK]) { + mask->ipv4.addr.src = + rocker_tlv_get_u32(flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IP_MASK]); + } + + if (!flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IP]) { + if (mask->ipv4.addr.src != 0) { + return -ROCKER_EINVAL; + } + } + + if (!flow_tlvs[ROCKER_TLV_OF_DPA_DST_IP]) { + return -ROCKER_EINVAL; + } + + key->ipv4.addr.dst = + rocker_tlv_get_u32(flow_tlvs[ROCKER_TLV_OF_DPA_DST_IP]); + if (!ipv4_addr_is_multicast(key->ipv4.addr.dst)) { + return -ROCKER_EINVAL; + } + + break; + + case MULTICAST_ROUTING_MODE_IPV6: + + if (flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IPV6]) { + memcpy(&key->ipv6.addr.src, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IPV6]), + sizeof(key->ipv6.addr.src)); + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IPV6_MASK]) { + memcpy(&mask->ipv6.addr.src, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IPV6_MASK]), + sizeof(mask->ipv6.addr.src)); + } + + if (!flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IPV6]) { + if (mask->ipv6.addr.src.addr32[0] != 0 && + mask->ipv6.addr.src.addr32[1] != 0 && + mask->ipv6.addr.src.addr32[2] != 0 && + mask->ipv6.addr.src.addr32[3] != 0) { + return -ROCKER_EINVAL; + } + } + + if (!flow_tlvs[ROCKER_TLV_OF_DPA_DST_IPV6]) { + return -ROCKER_EINVAL; + } + + memcpy(&key->ipv6.addr.dst, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_DST_IPV6]), + sizeof(key->ipv6.addr.dst)); + if (!ipv6_addr_is_multicast(&key->ipv6.addr.dst)) { + return -ROCKER_EINVAL; + } + + break; + + default: + return -ROCKER_EINVAL; + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]) { + action->goto_tbl = + rocker_tlv_get_le16(flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]); + if (action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_ACL_POLICY) { + return -ROCKER_EINVAL; + } + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]) { + action->write.group_id = + rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]); + if (ROCKER_GROUP_TYPE_GET(action->write.group_id) != + ROCKER_OF_DPA_GROUP_TYPE_L3_MCAST) { + return -ROCKER_EINVAL; + } + action->write.vlan_id = key->eth.vlan_id; + } + + return ROCKER_OK; +} + +static int of_dpa_cmd_add_acl_ip(OfDpaFlowKey *key, OfDpaFlowKey *mask, + RockerTlv **flow_tlvs) +{ + key->width = FLOW_KEY_WIDTH(ip.tos); + + key->ip.proto = 0; + key->ip.tos = 0; + mask->ip.proto = 0; + mask->ip.tos = 0; + + if (flow_tlvs[ROCKER_TLV_OF_DPA_IP_PROTO]) { + key->ip.proto = + rocker_tlv_get_u8(flow_tlvs[ROCKER_TLV_OF_DPA_IP_PROTO]); + } + if (flow_tlvs[ROCKER_TLV_OF_DPA_IP_PROTO_MASK]) { + mask->ip.proto = + rocker_tlv_get_u8(flow_tlvs[ROCKER_TLV_OF_DPA_IP_PROTO_MASK]); + } + if (flow_tlvs[ROCKER_TLV_OF_DPA_IP_DSCP]) { + key->ip.tos = + rocker_tlv_get_u8(flow_tlvs[ROCKER_TLV_OF_DPA_IP_DSCP]); + } + if (flow_tlvs[ROCKER_TLV_OF_DPA_IP_DSCP_MASK]) { + mask->ip.tos = + rocker_tlv_get_u8(flow_tlvs[ROCKER_TLV_OF_DPA_IP_DSCP_MASK]); + } + if (flow_tlvs[ROCKER_TLV_OF_DPA_IP_ECN]) { + key->ip.tos |= + rocker_tlv_get_u8(flow_tlvs[ROCKER_TLV_OF_DPA_IP_ECN]) << 6; + } + if (flow_tlvs[ROCKER_TLV_OF_DPA_IP_ECN_MASK]) { + mask->ip.tos |= + rocker_tlv_get_u8(flow_tlvs[ROCKER_TLV_OF_DPA_IP_ECN_MASK]) << 6; + } + + return ROCKER_OK; +} + +static int of_dpa_cmd_add_acl(OfDpaFlow *flow, RockerTlv **flow_tlvs) +{ + OfDpaFlowKey *key = &flow->key; + OfDpaFlowKey *mask = &flow->mask; + OfDpaFlowAction *action = &flow->action; + enum { + ACL_MODE_UNKNOWN, + ACL_MODE_IPV4_VLAN, + ACL_MODE_IPV6_VLAN, + ACL_MODE_IPV4_TENANT, + ACL_MODE_IPV6_TENANT, + ACL_MODE_NON_IP_VLAN, + ACL_MODE_NON_IP_TENANT, + ACL_MODE_ANY_VLAN, + ACL_MODE_ANY_TENANT, + } mode = ACL_MODE_UNKNOWN; + int err = ROCKER_OK; + + if (!flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT] || + !flow_tlvs[ROCKER_TLV_OF_DPA_ETHERTYPE]) { + return -ROCKER_EINVAL; + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID] && + flow_tlvs[ROCKER_TLV_OF_DPA_TUNNEL_ID]) { + return -ROCKER_EINVAL; + } + + key->tbl_id = ROCKER_OF_DPA_TABLE_ID_ACL_POLICY; + key->width = FLOW_KEY_WIDTH(eth.type); + + key->in_pport = rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT]); + if (flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT_MASK]) { + mask->in_pport = + rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT_MASK]); + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_SRC_MAC]) { + memcpy(key->eth.src.a, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_SRC_MAC]), + sizeof(key->eth.src.a)); + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_SRC_MAC_MASK]) { + memcpy(mask->eth.src.a, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_SRC_MAC_MASK]), + sizeof(mask->eth.src.a)); + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]) { + memcpy(key->eth.dst.a, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]), + sizeof(key->eth.dst.a)); + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC_MASK]) { + memcpy(mask->eth.dst.a, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC_MASK]), + sizeof(mask->eth.dst.a)); + } + + key->eth.type = rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_ETHERTYPE]); + if (key->eth.type) { + mask->eth.type = 0xffff; + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]) { + key->eth.vlan_id = + rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]); + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID_MASK]) { + mask->eth.vlan_id = + rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID_MASK]); + } + + switch (ntohs(key->eth.type)) { + case 0x0000: + mode = (key->eth.vlan_id) ? ACL_MODE_ANY_VLAN : ACL_MODE_ANY_TENANT; + break; + case 0x0800: + mode = (key->eth.vlan_id) ? ACL_MODE_IPV4_VLAN : ACL_MODE_IPV4_TENANT; + break; + case 0x86dd: + mode = (key->eth.vlan_id) ? ACL_MODE_IPV6_VLAN : ACL_MODE_IPV6_TENANT; + break; + default: + mode = (key->eth.vlan_id) ? ACL_MODE_NON_IP_VLAN : + ACL_MODE_NON_IP_TENANT; + break; + } + + /* XXX only supporting VLAN modes for now */ + if (mode != ACL_MODE_IPV4_VLAN && + mode != ACL_MODE_IPV6_VLAN && + mode != ACL_MODE_NON_IP_VLAN && + mode != ACL_MODE_ANY_VLAN) { + return -ROCKER_EINVAL; + } + + switch (ntohs(key->eth.type)) { + case 0x0800: + case 0x86dd: + err = of_dpa_cmd_add_acl_ip(key, mask, flow_tlvs); + break; + } + + if (err) { + return err; + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]) { + action->write.group_id = + rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]); + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_COPY_CPU_ACTION]) { + action->apply.copy_to_cpu = + rocker_tlv_get_u8(flow_tlvs[ROCKER_TLV_OF_DPA_COPY_CPU_ACTION]); + } + + return ROCKER_OK; +} + +static int of_dpa_cmd_flow_add(OfDpa *of_dpa, uint64_t cookie, + RockerTlv **flow_tlvs) +{ + OfDpaFlow *flow = of_dpa_flow_find(of_dpa, cookie); + enum rocker_of_dpa_table_id tbl; + uint32_t priority; + uint32_t hardtime; + uint32_t idletime = 0; + int err = ROCKER_OK; + + if (flow) { + return -ROCKER_EEXIST; + } + + if (!flow_tlvs[ROCKER_TLV_OF_DPA_TABLE_ID] || + !flow_tlvs[ROCKER_TLV_OF_DPA_PRIORITY] || + !flow_tlvs[ROCKER_TLV_OF_DPA_HARDTIME]) { + return -ROCKER_EINVAL; + } + + tbl = rocker_tlv_get_le16(flow_tlvs[ROCKER_TLV_OF_DPA_TABLE_ID]); + priority = rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_PRIORITY]); + hardtime = rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_HARDTIME]); + + if (flow_tlvs[ROCKER_TLV_OF_DPA_IDLETIME]) { + if (tbl == ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT || + tbl == ROCKER_OF_DPA_TABLE_ID_VLAN || + tbl == ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC) { + return -ROCKER_EINVAL; + } + idletime = rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_IDLETIME]); + } + + flow = of_dpa_flow_alloc(cookie, priority, hardtime, idletime); + if (!flow) { + return -ROCKER_ENOMEM; + } + + switch (tbl) { + case ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT: + err = of_dpa_cmd_add_ig_port(flow, flow_tlvs); + break; + case ROCKER_OF_DPA_TABLE_ID_VLAN: + err = of_dpa_cmd_add_vlan(flow, flow_tlvs); + break; + case ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC: + err = of_dpa_cmd_add_term_mac(flow, flow_tlvs); + break; + case ROCKER_OF_DPA_TABLE_ID_BRIDGING: + err = of_dpa_cmd_add_bridging(flow, flow_tlvs); + break; + case ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING: + err = of_dpa_cmd_add_unicast_routing(flow, flow_tlvs); + break; + case ROCKER_OF_DPA_TABLE_ID_MULTICAST_ROUTING: + err = of_dpa_cmd_add_multicast_routing(flow, flow_tlvs); + break; + case ROCKER_OF_DPA_TABLE_ID_ACL_POLICY: + err = of_dpa_cmd_add_acl(flow, flow_tlvs); + break; + } + + if (err) { + goto err_cmd_add; + } + + err = of_dpa_flow_add(of_dpa, flow); + if (err) { + goto err_cmd_add; + } + + return ROCKER_OK; + +err_cmd_add: + g_free(flow); + return err; +} + +static int of_dpa_cmd_flow_mod(OfDpa *of_dpa, uint64_t cookie, + RockerTlv **flow_tlvs) +{ + OfDpaFlow *flow = of_dpa_flow_find(of_dpa, cookie); + + if (!flow) { + return -ROCKER_ENOENT; + } + + return of_dpa_flow_mod(flow); +} + +static int of_dpa_cmd_flow_del(OfDpa *of_dpa, uint64_t cookie) +{ + OfDpaFlow *flow = of_dpa_flow_find(of_dpa, cookie); + + if (!flow) { + return -ROCKER_ENOENT; + } + + of_dpa_flow_del(of_dpa, flow); + + return ROCKER_OK; +} + +static int of_dpa_cmd_flow_get_stats(OfDpa *of_dpa, uint64_t cookie, + struct desc_info *info, char *buf) +{ + OfDpaFlow *flow = of_dpa_flow_find(of_dpa, cookie); + size_t tlv_size; + int64_t now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) / 1000; + int pos; + + if (!flow) { + return -ROCKER_ENOENT; + } + + tlv_size = rocker_tlv_total_size(sizeof(uint32_t)) + /* duration */ + rocker_tlv_total_size(sizeof(uint64_t)) + /* rx_pkts */ + rocker_tlv_total_size(sizeof(uint64_t)); /* tx_ptks */ + + if (tlv_size > desc_buf_size(info)) { + return -ROCKER_EMSGSIZE; + } + + pos = 0; + rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_OF_DPA_FLOW_STAT_DURATION, + (int32_t)(now - flow->stats.install_time)); + rocker_tlv_put_le64(buf, &pos, ROCKER_TLV_OF_DPA_FLOW_STAT_RX_PKTS, + flow->stats.rx_pkts); + rocker_tlv_put_le64(buf, &pos, ROCKER_TLV_OF_DPA_FLOW_STAT_TX_PKTS, + flow->stats.tx_pkts); + + return desc_set_buf(info, tlv_size); +} + +static int of_dpa_flow_cmd(OfDpa *of_dpa, struct desc_info *info, + char *buf, uint16_t cmd, + RockerTlv **flow_tlvs) +{ + uint64_t cookie; + + if (!flow_tlvs[ROCKER_TLV_OF_DPA_COOKIE]) { + return -ROCKER_EINVAL; + } + + cookie = rocker_tlv_get_le64(flow_tlvs[ROCKER_TLV_OF_DPA_COOKIE]); + + switch (cmd) { + case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD: + return of_dpa_cmd_flow_add(of_dpa, cookie, flow_tlvs); + case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD: + return of_dpa_cmd_flow_mod(of_dpa, cookie, flow_tlvs); + case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL: + return of_dpa_cmd_flow_del(of_dpa, cookie); + case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS: + return of_dpa_cmd_flow_get_stats(of_dpa, cookie, info, buf); + } + + return -ROCKER_ENOTSUP; +} + +static int of_dpa_cmd_add_l2_interface(OfDpaGroup *group, + RockerTlv **group_tlvs) +{ + if (!group_tlvs[ROCKER_TLV_OF_DPA_OUT_PPORT] || + !group_tlvs[ROCKER_TLV_OF_DPA_POP_VLAN]) { + return -ROCKER_EINVAL; + } + + group->l2_interface.out_pport = + rocker_tlv_get_le32(group_tlvs[ROCKER_TLV_OF_DPA_OUT_PPORT]); + group->l2_interface.pop_vlan = + rocker_tlv_get_u8(group_tlvs[ROCKER_TLV_OF_DPA_POP_VLAN]); + + return ROCKER_OK; +} + +static int of_dpa_cmd_add_l2_rewrite(OfDpa *of_dpa, OfDpaGroup *group, + RockerTlv **group_tlvs) +{ + OfDpaGroup *l2_interface_group; + + if (!group_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID_LOWER]) { + return -ROCKER_EINVAL; + } + + group->l2_rewrite.group_id = + rocker_tlv_get_le32(group_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID_LOWER]); + + l2_interface_group = of_dpa_group_find(of_dpa, group->l2_rewrite.group_id); + if (!l2_interface_group || + ROCKER_GROUP_TYPE_GET(l2_interface_group->id) != + ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE) { + DPRINTF("l2 rewrite group needs a valid l2 interface group\n"); + return -ROCKER_EINVAL; + } + + if (group_tlvs[ROCKER_TLV_OF_DPA_SRC_MAC]) { + memcpy(group->l2_rewrite.src_mac.a, + rocker_tlv_data(group_tlvs[ROCKER_TLV_OF_DPA_SRC_MAC]), + sizeof(group->l2_rewrite.src_mac.a)); + } + + if (group_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]) { + memcpy(group->l2_rewrite.dst_mac.a, + rocker_tlv_data(group_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]), + sizeof(group->l2_rewrite.dst_mac.a)); + } + + if (group_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]) { + group->l2_rewrite.vlan_id = + rocker_tlv_get_u16(group_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]); + if (ROCKER_GROUP_VLAN_GET(l2_interface_group->id) != + (ntohs(group->l2_rewrite.vlan_id) & VLAN_VID_MASK)) { + DPRINTF("Set VLAN ID must be same as L2 interface group\n"); + return -ROCKER_EINVAL; + } + } + + return ROCKER_OK; +} + +static int of_dpa_cmd_add_l2_flood(OfDpa *of_dpa, OfDpaGroup *group, + RockerTlv **group_tlvs) +{ + OfDpaGroup *l2_group; + RockerTlv **tlvs; + int err; + int i; + + if (!group_tlvs[ROCKER_TLV_OF_DPA_GROUP_COUNT] || + !group_tlvs[ROCKER_TLV_OF_DPA_GROUP_IDS]) { + return -ROCKER_EINVAL; + } + + group->l2_flood.group_count = + rocker_tlv_get_le16(group_tlvs[ROCKER_TLV_OF_DPA_GROUP_COUNT]); + + tlvs = g_malloc0((group->l2_flood.group_count + 1) * + sizeof(RockerTlv *)); + if (!tlvs) { + return -ROCKER_ENOMEM; + } + + g_free(group->l2_flood.group_ids); + group->l2_flood.group_ids = + g_malloc0(group->l2_flood.group_count * sizeof(uint32_t)); + if (!group->l2_flood.group_ids) { + err = -ROCKER_ENOMEM; + goto err_out; + } + + rocker_tlv_parse_nested(tlvs, group->l2_flood.group_count, + group_tlvs[ROCKER_TLV_OF_DPA_GROUP_IDS]); + + for (i = 0; i < group->l2_flood.group_count; i++) { + group->l2_flood.group_ids[i] = rocker_tlv_get_le32(tlvs[i + 1]); + } + + /* All of the L2 interface groups referenced by the L2 flood + * must have same VLAN + */ + + for (i = 0; i < group->l2_flood.group_count; i++) { + l2_group = of_dpa_group_find(of_dpa, group->l2_flood.group_ids[i]); + if (!l2_group) { + continue; + } + if ((ROCKER_GROUP_TYPE_GET(l2_group->id) == + ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE) && + (ROCKER_GROUP_VLAN_GET(l2_group->id) != + ROCKER_GROUP_VLAN_GET(group->id))) { + DPRINTF("l2 interface group 0x%08x VLAN doesn't match l2 " + "flood group 0x%08x\n", + group->l2_flood.group_ids[i], group->id); + err = -ROCKER_EINVAL; + goto err_out; + } + } + + g_free(tlvs); + return ROCKER_OK; + +err_out: + group->l2_flood.group_count = 0; + g_free(group->l2_flood.group_ids); + g_free(tlvs); + + return err; +} + +static int of_dpa_cmd_add_l3_unicast(OfDpaGroup *group, RockerTlv **group_tlvs) +{ + if (!group_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID_LOWER]) { + return -ROCKER_EINVAL; + } + + group->l3_unicast.group_id = + rocker_tlv_get_le32(group_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID_LOWER]); + + if (group_tlvs[ROCKER_TLV_OF_DPA_SRC_MAC]) { + memcpy(group->l3_unicast.src_mac.a, + rocker_tlv_data(group_tlvs[ROCKER_TLV_OF_DPA_SRC_MAC]), + sizeof(group->l3_unicast.src_mac.a)); + } + + if (group_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]) { + memcpy(group->l3_unicast.dst_mac.a, + rocker_tlv_data(group_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]), + sizeof(group->l3_unicast.dst_mac.a)); + } + + if (group_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]) { + group->l3_unicast.vlan_id = + rocker_tlv_get_u16(group_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]); + } + + if (group_tlvs[ROCKER_TLV_OF_DPA_TTL_CHECK]) { + group->l3_unicast.ttl_check = + rocker_tlv_get_u8(group_tlvs[ROCKER_TLV_OF_DPA_TTL_CHECK]); + } + + return ROCKER_OK; +} + +static int of_dpa_cmd_group_do(OfDpa *of_dpa, uint32_t group_id, + OfDpaGroup *group, RockerTlv **group_tlvs) +{ + uint8_t type = ROCKER_GROUP_TYPE_GET(group_id); + + switch (type) { + case ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE: + return of_dpa_cmd_add_l2_interface(group, group_tlvs); + case ROCKER_OF_DPA_GROUP_TYPE_L2_REWRITE: + return of_dpa_cmd_add_l2_rewrite(of_dpa, group, group_tlvs); + case ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD: + /* Treat L2 multicast group same as a L2 flood group */ + case ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST: + return of_dpa_cmd_add_l2_flood(of_dpa, group, group_tlvs); + case ROCKER_OF_DPA_GROUP_TYPE_L3_UCAST: + return of_dpa_cmd_add_l3_unicast(group, group_tlvs); + } + + return -ROCKER_ENOTSUP; +} + +static int of_dpa_cmd_group_add(OfDpa *of_dpa, uint32_t group_id, + RockerTlv **group_tlvs) +{ + OfDpaGroup *group = of_dpa_group_find(of_dpa, group_id); + int err; + + if (group) { + return -ROCKER_EEXIST; + } + + group = of_dpa_group_alloc(group_id); + if (!group) { + return -ROCKER_ENOMEM; + } + + err = of_dpa_cmd_group_do(of_dpa, group_id, group, group_tlvs); + if (err) { + goto err_cmd_add; + } + + err = of_dpa_group_add(of_dpa, group); + if (err) { + goto err_cmd_add; + } + + return ROCKER_OK; + +err_cmd_add: + g_free(group); + return err; +} + +static int of_dpa_cmd_group_mod(OfDpa *of_dpa, uint32_t group_id, + RockerTlv **group_tlvs) +{ + OfDpaGroup *group = of_dpa_group_find(of_dpa, group_id); + + if (!group) { + return -ROCKER_ENOENT; + } + + return of_dpa_cmd_group_do(of_dpa, group_id, group, group_tlvs); +} + +static int of_dpa_cmd_group_del(OfDpa *of_dpa, uint32_t group_id) +{ + OfDpaGroup *group = of_dpa_group_find(of_dpa, group_id); + + if (!group) { + return -ROCKER_ENOENT; + } + + return of_dpa_group_del(of_dpa, group); +} + +static int of_dpa_cmd_group_get_stats(OfDpa *of_dpa, uint32_t group_id, + struct desc_info *info, char *buf) +{ + return -ROCKER_ENOTSUP; +} + +static int of_dpa_group_cmd(OfDpa *of_dpa, struct desc_info *info, + char *buf, uint16_t cmd, RockerTlv **group_tlvs) +{ + uint32_t group_id; + + if (!group_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]) { + return -ROCKER_EINVAL; + } + + group_id = rocker_tlv_get_le32(group_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]); + + switch (cmd) { + case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD: + return of_dpa_cmd_group_add(of_dpa, group_id, group_tlvs); + case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD: + return of_dpa_cmd_group_mod(of_dpa, group_id, group_tlvs); + case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL: + return of_dpa_cmd_group_del(of_dpa, group_id); + case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS: + return of_dpa_cmd_group_get_stats(of_dpa, group_id, info, buf); + } + + return -ROCKER_ENOTSUP; +} + +static int of_dpa_cmd(World *world, struct desc_info *info, + char *buf, uint16_t cmd, RockerTlv *cmd_info_tlv) +{ + OfDpa *of_dpa = world_private(world); + RockerTlv *tlvs[ROCKER_TLV_OF_DPA_MAX + 1]; + + rocker_tlv_parse_nested(tlvs, ROCKER_TLV_OF_DPA_MAX, cmd_info_tlv); + + switch (cmd) { + case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD: + case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD: + case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL: + case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS: + return of_dpa_flow_cmd(of_dpa, info, buf, cmd, tlvs); + case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD: + case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD: + case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL: + case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS: + return of_dpa_group_cmd(of_dpa, info, buf, cmd, tlvs); + } + + return -ROCKER_ENOTSUP; +} + +static int of_dpa_init(World *world) +{ + OfDpa *of_dpa = world_private(world); + + of_dpa->world = world; + + of_dpa->flow_tbl = g_hash_table_new_full(g_int64_hash, g_int64_equal, + NULL, g_free); + if (!of_dpa->flow_tbl) { + return -ENOMEM; + } + + of_dpa->group_tbl = g_hash_table_new_full(g_int_hash, g_int_equal, + NULL, g_free); + if (!of_dpa->group_tbl) { + goto err_group_tbl; + } + + /* XXX hardcode some artificial table max values */ + of_dpa->flow_tbl_max_size = 100; + of_dpa->group_tbl_max_size = 100; + + return 0; + +err_group_tbl: + g_hash_table_destroy(of_dpa->flow_tbl); + return -ENOMEM; +} + +static void of_dpa_uninit(World *world) +{ + OfDpa *of_dpa = world_private(world); + + g_hash_table_destroy(of_dpa->group_tbl); + g_hash_table_destroy(of_dpa->flow_tbl); +} + +static WorldOps of_dpa_ops = { + .init = of_dpa_init, + .uninit = of_dpa_uninit, + .ig = of_dpa_ig, + .cmd = of_dpa_cmd, +}; + +World *of_dpa_world_alloc(Rocker *r) +{ + return world_alloc(r, sizeof(OfDpa), ROCKER_WORLD_TYPE_OF_DPA, &of_dpa_ops); +} diff --git a/hw/net/rocker/rocker_of_dpa.h b/hw/net/rocker/rocker_of_dpa.h new file mode 100644 index 0000000..0af4790 --- /dev/null +++ b/hw/net/rocker/rocker_of_dpa.h @@ -0,0 +1,25 @@ +/* + * QEMU rocker switch emulation - OF-DPA flow processing support + * + * Copyright (c) 2014 Scott Feldman + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _ROCKER_OF_DPA_H_ +#define _ROCKER_OF_DPA_H_ + +typedef struct rocker Rocker; +typedef struct world World; + +World *of_dpa_world_alloc(Rocker *r); + +#endif /* _ROCKER_OF_DPA_H_ */ diff --git a/hw/net/rocker/rocker_tlv.h b/hw/net/rocker/rocker_tlv.h new file mode 100644 index 0000000..e3c4ab6 --- /dev/null +++ b/hw/net/rocker/rocker_tlv.h @@ -0,0 +1,244 @@ +/* + * QEMU rocker switch emulation - TLV parsing and composing + * + * Copyright (c) 2014 Jiri Pirko + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _ROCKER_TLV_H_ +#define _ROCKER_TLV_H_ + +#define ROCKER_TLV_ALIGNTO 8U +#define ROCKER_TLV_ALIGN(len) \ + (((len) + ROCKER_TLV_ALIGNTO - 1) & ~(ROCKER_TLV_ALIGNTO - 1)) +#define ROCKER_TLV_HDRLEN ROCKER_TLV_ALIGN(sizeof(RockerTlv)) + +/* + * <------- ROCKER_TLV_HDRLEN -------> <--- ROCKER_TLV_ALIGN(payload) ---> + * +-----------------------------+- - -+- - - - - - - - - - - - - - -+- - -+ + * | Header | Pad | Payload | Pad | + * | (RockerTlv) | ing | | ing | + * +-----------------------------+- - -+- - - - - - - - - - - - - - -+- - -+ + * <--------------------------- tlv->len --------------------------> + */ + +static inline RockerTlv *rocker_tlv_next(const RockerTlv *tlv, int *remaining) +{ + int totlen = ROCKER_TLV_ALIGN(le16_to_cpu(tlv->len)); + + *remaining -= totlen; + return (RockerTlv *) ((char *) tlv + totlen); +} + +static inline int rocker_tlv_ok(const RockerTlv *tlv, int remaining) +{ + return remaining >= (int) ROCKER_TLV_HDRLEN && + le16_to_cpu(tlv->len) >= ROCKER_TLV_HDRLEN && + le16_to_cpu(tlv->len) <= remaining; +} + +#define rocker_tlv_for_each(pos, head, len, rem) \ + for (pos = head, rem = len; \ + rocker_tlv_ok(pos, rem); \ + pos = rocker_tlv_next(pos, &(rem))) + +#define rocker_tlv_for_each_nested(pos, tlv, rem) \ + rocker_tlv_for_each(pos, rocker_tlv_data(tlv), rocker_tlv_len(tlv), rem) + +static inline int rocker_tlv_size(int payload) +{ + return ROCKER_TLV_HDRLEN + payload; +} + +static inline int rocker_tlv_total_size(int payload) +{ + return ROCKER_TLV_ALIGN(rocker_tlv_size(payload)); +} + +static inline int rocker_tlv_padlen(int payload) +{ + return rocker_tlv_total_size(payload) - rocker_tlv_size(payload); +} + +static inline int rocker_tlv_type(const RockerTlv *tlv) +{ + return le32_to_cpu(tlv->type); +} + +static inline void *rocker_tlv_data(const RockerTlv *tlv) +{ + return (char *) tlv + ROCKER_TLV_HDRLEN; +} + +static inline int rocker_tlv_len(const RockerTlv *tlv) +{ + return le16_to_cpu(tlv->len) - ROCKER_TLV_HDRLEN; +} + +static inline uint8_t rocker_tlv_get_u8(const RockerTlv *tlv) +{ + return *(uint8_t *) rocker_tlv_data(tlv); +} + +static inline uint16_t rocker_tlv_get_u16(const RockerTlv *tlv) +{ + return *(uint16_t *) rocker_tlv_data(tlv); +} + +static inline uint32_t rocker_tlv_get_u32(const RockerTlv *tlv) +{ + return *(uint32_t *) rocker_tlv_data(tlv); +} + +static inline uint64_t rocker_tlv_get_u64(const RockerTlv *tlv) +{ + return *(uint64_t *) rocker_tlv_data(tlv); +} + +static inline uint16_t rocker_tlv_get_le16(const RockerTlv *tlv) +{ + return le16_to_cpup((uint16_t *) rocker_tlv_data(tlv)); +} + +static inline uint32_t rocker_tlv_get_le32(const RockerTlv *tlv) +{ + return le32_to_cpup((uint32_t *) rocker_tlv_data(tlv)); +} + +static inline uint64_t rocker_tlv_get_le64(const RockerTlv *tlv) +{ + return le64_to_cpup((uint64_t *) rocker_tlv_data(tlv)); +} + +static inline void rocker_tlv_parse(RockerTlv **tb, int maxtype, + const char *buf, int buf_len) +{ + const RockerTlv *tlv; + const RockerTlv *head = (const RockerTlv *) buf; + int rem; + + memset(tb, 0, sizeof(RockerTlv *) * (maxtype + 1)); + + rocker_tlv_for_each(tlv, head, buf_len, rem) { + uint32_t type = rocker_tlv_type(tlv); + + if (type > 0 && type <= maxtype) { + tb[type] = (RockerTlv *) tlv; + } + } +} + +static inline void rocker_tlv_parse_nested(RockerTlv **tb, int maxtype, + const RockerTlv *tlv) +{ + rocker_tlv_parse(tb, maxtype, rocker_tlv_data(tlv), rocker_tlv_len(tlv)); +} + +static inline RockerTlv *rocker_tlv_start(char *buf, int buf_pos) +{ + return (RockerTlv *) (buf + buf_pos); +} + +static inline void rocker_tlv_put_iov(char *buf, int *buf_pos, + int type, const struct iovec *iov, + const unsigned int iovcnt) +{ + size_t len = iov_size(iov, iovcnt); + int total_size = rocker_tlv_total_size(len); + RockerTlv *tlv; + + tlv = rocker_tlv_start(buf, *buf_pos); + *buf_pos += total_size; + tlv->type = cpu_to_le32(type); + tlv->len = cpu_to_le16(rocker_tlv_size(len)); + iov_to_buf(iov, iovcnt, 0, rocker_tlv_data(tlv), len); + memset((char *) tlv + le16_to_cpu(tlv->len), 0, rocker_tlv_padlen(len)); +} + +static inline void rocker_tlv_put(char *buf, int *buf_pos, + int type, int len, void *data) +{ + struct iovec iov = { + .iov_base = data, + .iov_len = len, + }; + + rocker_tlv_put_iov(buf, buf_pos, type, &iov, 1); +} + +static inline void rocker_tlv_put_u8(char *buf, int *buf_pos, + int type, uint8_t value) +{ + rocker_tlv_put(buf, buf_pos, type, sizeof(uint8_t), &value); +} + +static inline void rocker_tlv_put_u16(char *buf, int *buf_pos, + int type, uint16_t value) +{ + rocker_tlv_put(buf, buf_pos, type, sizeof(uint16_t), &value); +} + +static inline void rocker_tlv_put_u32(char *buf, int *buf_pos, + int type, uint32_t value) +{ + rocker_tlv_put(buf, buf_pos, type, sizeof(uint32_t), &value); +} + +static inline void rocker_tlv_put_u64(char *buf, int *buf_pos, + int type, uint64_t value) +{ + rocker_tlv_put(buf, buf_pos, type, sizeof(uint64_t), &value); +} + +static inline void rocker_tlv_put_le16(char *buf, int *buf_pos, + int type, uint16_t value) +{ + value = cpu_to_le16(value); + rocker_tlv_put(buf, buf_pos, type, sizeof(uint16_t), &value); +} + +static inline void rocker_tlv_put_le32(char *buf, int *buf_pos, + int type, uint32_t value) +{ + value = cpu_to_le32(value); + rocker_tlv_put(buf, buf_pos, type, sizeof(uint32_t), &value); +} + +static inline void rocker_tlv_put_le64(char *buf, int *buf_pos, + int type, uint64_t value) +{ + value = cpu_to_le64(value); + rocker_tlv_put(buf, buf_pos, type, sizeof(uint64_t), &value); +} + +static inline RockerTlv *rocker_tlv_nest_start(char *buf, int *buf_pos, + int type) +{ + RockerTlv *start = rocker_tlv_start(buf, *buf_pos); + + rocker_tlv_put(buf, buf_pos, type, 0, NULL); + return start; +} + +static inline void rocker_tlv_nest_end(char *buf, int *buf_pos, + RockerTlv *start) +{ + start->len = (char *) rocker_tlv_start(buf, *buf_pos) - (char *) start; +} + +static inline void rocker_tlv_nest_cancel(char *buf, int *buf_pos, + RockerTlv *start) +{ + *buf_pos = (char *) start - buf; +} + +#endif diff --git a/hw/net/rocker/rocker_world.c b/hw/net/rocker/rocker_world.c new file mode 100644 index 0000000..a218290 --- /dev/null +++ b/hw/net/rocker/rocker_world.c @@ -0,0 +1,108 @@ +/* + * QEMU rocker switch emulation - switch worlds + * + * Copyright (c) 2014 Scott Feldman + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "qemu/iov.h" + +#include "rocker.h" +#include "rocker_world.h" + +typedef struct rocker Rocker; + +typedef struct world { + Rocker *r; + enum rocker_world_type type; + WorldOps *ops; +} World; + +ssize_t world_ingress(World *world, uint32_t pport, + const struct iovec *iov, int iovcnt) +{ + if (world->ops->ig) { + return world->ops->ig(world, pport, iov, iovcnt); + } + + return iov_size(iov, iovcnt); +} + +int world_do_cmd(World *world, DescInfo *info, + char *buf, uint16_t cmd, RockerTlv *cmd_info_tlv) +{ + if (world->ops->cmd) { + return world->ops->cmd(world, info, buf, cmd, cmd_info_tlv); + } + + return -ROCKER_ENOTSUP; +} + +World *world_alloc(Rocker *r, size_t sizeof_private, + enum rocker_world_type type, WorldOps *ops) +{ + World *w = g_malloc0(sizeof(World) + sizeof_private); + + if (w) { + w->r = r; + w->type = type; + w->ops = ops; + if (w->ops->init) { + w->ops->init(w); + } + } + + return w; +} + +void world_free(World *world) +{ + if (world->ops->uninit) { + world->ops->uninit(world); + } + g_free(world); +} + +void world_reset(World *world) +{ + if (world->ops->uninit) { + world->ops->uninit(world); + } + if (world->ops->init) { + world->ops->init(world); + } +} + +void *world_private(World *world) +{ + return world + 1; +} + +Rocker *world_rocker(World *world) +{ + return world->r; +} + +enum rocker_world_type world_type(World *world) +{ + return world->type; +} + +const char *world_name(World *world) +{ + switch (world->type) { + case ROCKER_WORLD_TYPE_OF_DPA: + return "OF_DPA"; + default: + return "unknown"; + } +} diff --git a/hw/net/rocker/rocker_world.h b/hw/net/rocker/rocker_world.h new file mode 100644 index 0000000..a844198 --- /dev/null +++ b/hw/net/rocker/rocker_world.h @@ -0,0 +1,63 @@ +/* + * QEMU rocker switch emulation - switch worlds + * + * Copyright (c) 2014 Scott Feldman + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _ROCKER_WORLD_H_ +#define _ROCKER_WORLD_H_ + +#include "rocker_hw.h" + +typedef struct world World; +typedef struct rocker Rocker; +typedef struct rocker_tlv RockerTlv; +typedef struct desc_info DescInfo; + +enum rocker_world_type { + ROCKER_WORLD_TYPE_OF_DPA = ROCKER_PORT_MODE_OF_DPA, + ROCKER_WORLD_TYPE_MAX, +}; + +typedef int (world_init)(World *world); +typedef void (world_uninit)(World *world); +typedef ssize_t (world_ig)(World *world, uint32_t pport, + const struct iovec *iov, int iovcnt); +typedef int (world_cmd)(World *world, DescInfo *info, + char *buf, uint16_t cmd, + RockerTlv *cmd_info_tlv); + +typedef struct world_ops { + world_init *init; + world_uninit *uninit; + world_ig *ig; + world_cmd *cmd; +} WorldOps; + +ssize_t world_ingress(World *world, uint32_t pport, + const struct iovec *iov, int iovcnt); +int world_do_cmd(World *world, DescInfo *info, + char *buf, uint16_t cmd, RockerTlv *cmd_info_tlv); + +World *world_alloc(Rocker *r, size_t sizeof_private, + enum rocker_world_type type, WorldOps *ops); +void world_free(World *world); +void world_reset(World *world); + +void *world_private(World *world); +Rocker *world_rocker(World *world); + +enum rocker_world_type world_type(World *world); +const char *world_name(World *world); + +#endif /* _ROCKER_WORLD_H_ */ -- 1.7.10.4