xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] x86/HVM: avoid pointer wraparound in bufioreq handling
@ 2015-06-18 13:14 Jan Beulich
  2015-06-18 13:41 ` Andrew Cooper
  2015-07-21 16:18 ` Stefano Stabellini
  0 siblings, 2 replies; 7+ messages in thread
From: Jan Beulich @ 2015-06-18 13:14 UTC (permalink / raw)
  To: xen-devel
  Cc: Keir Fraser, Stefano Stabellini, Andrew Cooper, Ian Jackson,
	Tim Deegan, Ian Campbell, Wei Liu

[-- Attachment #1: Type: text/plain, Size: 7594 bytes --]

The number of slots per page being 511 (i.e. not a power of two) means
that the (32-bit) read and write indexes going beyond 2^32 will likely
disturb operation. Extend I/O req server creation so the caller can
indicate that it is using suitable atomic accesses where needed (not
all accesses to the two pointers really need to be atomic), allowing
the hypervisor to atomically canonicalize both pointers when both have
gone through at least one cycle.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
---
v2: Limit canonicalization loop to IOREQ_BUFFER_SLOT_NUM iterations.
    Adjust xc_hvm_create_ioreq_server() documentation.

--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -1933,7 +1933,8 @@ int xc_get_hvm_param(xc_interface *handl
  *
  * @parm xch a handle to an open hypervisor interface.
  * @parm domid the domain id to be serviced
- * @parm handle_bufioreq should the IOREQ Server handle buffered requests?
+ * @parm handle_bufioreq how should the IOREQ Server handle buffered requests
+ *                       (HVM_IOREQSRV_BUFIOREQ_*)?
  * @parm id pointer to an ioservid_t to receive the IOREQ Server id.
  * @return 0 on success, -1 on failure.
  */
--- a/tools/libxc/xc_domain.c
+++ b/tools/libxc/xc_domain.c
@@ -1411,7 +1411,7 @@ int xc_hvm_create_ioreq_server(xc_interf
     hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg);
 
     arg->domid = domid;
-    arg->handle_bufioreq = !!handle_bufioreq;
+    arg->handle_bufioreq = handle_bufioreq;
 
     rc = do_xen_hypercall(xch, &hypercall);
 
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -921,7 +921,7 @@ static void hvm_ioreq_server_disable(str
 
 static int hvm_ioreq_server_init(struct hvm_ioreq_server *s, struct domain *d,
                                  domid_t domid, bool_t is_default,
-                                 bool_t handle_bufioreq, ioservid_t id)
+                                 int bufioreq_handling, ioservid_t id)
 {
     struct vcpu *v;
     int rc;
@@ -938,7 +938,11 @@ static int hvm_ioreq_server_init(struct 
     if ( rc )
         return rc;
 
-    rc = hvm_ioreq_server_setup_pages(s, is_default, handle_bufioreq);
+    if ( bufioreq_handling == HVM_IOREQSRV_BUFIOREQ_ATOMIC )
+        s->bufioreq_atomic = 1;
+
+    rc = hvm_ioreq_server_setup_pages(
+             s, is_default, bufioreq_handling != HVM_IOREQSRV_BUFIOREQ_OFF);
     if ( rc )
         goto fail_map;
 
@@ -997,12 +1001,15 @@ static ioservid_t next_ioservid(struct d
 }
 
 static int hvm_create_ioreq_server(struct domain *d, domid_t domid,
-                                   bool_t is_default, bool_t handle_bufioreq,
+                                   bool_t is_default, int bufioreq_handling,
                                    ioservid_t *id)
 {
     struct hvm_ioreq_server *s;
     int rc;
 
+    if ( bufioreq_handling > HVM_IOREQSRV_BUFIOREQ_ATOMIC )
+        return -EINVAL;
+
     rc = -ENOMEM;
     s = xzalloc(struct hvm_ioreq_server);
     if ( !s )
@@ -1015,7 +1022,7 @@ static int hvm_create_ioreq_server(struc
     if ( is_default && d->arch.hvm_domain.default_ioreq_server != NULL )
         goto fail2;
 
-    rc = hvm_ioreq_server_init(s, d, domid, is_default, handle_bufioreq,
+    rc = hvm_ioreq_server_init(s, d, domid, is_default, bufioreq_handling,
                                next_ioservid(d));
     if ( rc )
         goto fail3;
@@ -2560,7 +2567,7 @@ int hvm_buffered_io_send(ioreq_t *p)
 
     spin_lock(&s->bufioreq_lock);
 
-    if ( (pg->write_pointer - pg->read_pointer) >=
+    if ( (pg->ptrs.write_pointer - pg->ptrs.read_pointer) >=
          (IOREQ_BUFFER_SLOT_NUM - qw) )
     {
         /* The queue is full: send the iopacket through the normal path. */
@@ -2568,17 +2575,29 @@ int hvm_buffered_io_send(ioreq_t *p)
         return 0;
     }
 
-    pg->buf_ioreq[pg->write_pointer % IOREQ_BUFFER_SLOT_NUM] = bp;
+    pg->buf_ioreq[pg->ptrs.write_pointer % IOREQ_BUFFER_SLOT_NUM] = bp;
 
     if ( qw )
     {
         bp.data = p->data >> 32;
-        pg->buf_ioreq[(pg->write_pointer+1) % IOREQ_BUFFER_SLOT_NUM] = bp;
+        pg->buf_ioreq[(pg->ptrs.write_pointer+1) % IOREQ_BUFFER_SLOT_NUM] = bp;
     }
 
     /* Make the ioreq_t visible /before/ write_pointer. */
     wmb();
-    pg->write_pointer += qw ? 2 : 1;
+    pg->ptrs.write_pointer += qw ? 2 : 1;
+
+    /* Canonicalize read/write pointers to prevent their overflow. */
+    while ( s->bufioreq_atomic && qw++ < IOREQ_BUFFER_SLOT_NUM &&
+            pg->ptrs.read_pointer >= IOREQ_BUFFER_SLOT_NUM )
+    {
+        union bufioreq_pointers old = pg->ptrs, new;
+        unsigned int n = old.read_pointer / IOREQ_BUFFER_SLOT_NUM;
+
+        new.read_pointer = old.read_pointer - n * IOREQ_BUFFER_SLOT_NUM;
+        new.write_pointer = old.write_pointer - n * IOREQ_BUFFER_SLOT_NUM;
+        cmpxchg(&pg->ptrs.full, old.full, new.full);
+    }
 
     notify_via_xen_event_channel(d, s->bufioreq_evtchn);
     spin_unlock(&s->bufioreq_lock);
@@ -5446,7 +5465,7 @@ static int hvmop_create_ioreq_server(
         goto out;
 
     rc = hvm_create_ioreq_server(d, curr_d->domain_id, 0,
-                                 !!op.handle_bufioreq, &op.id);
+                                 op.handle_bufioreq, &op.id);
     if ( rc != 0 )
         goto out;
 
@@ -5928,7 +5947,8 @@ static int hvmop_get_param(
 
         /* May need to create server. */
         domid = d->arch.hvm_domain.params[HVM_PARAM_DM_DOMAIN];
-        rc = hvm_create_ioreq_server(d, domid, 1, 1, NULL);
+        rc = hvm_create_ioreq_server(d, domid, 1,
+                                     HVM_IOREQSRV_BUFIOREQ_LEGACY, NULL);
         if ( rc != 0 && rc != -EEXIST )
             goto out;
     }
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -70,6 +70,7 @@ struct hvm_ioreq_server {
     evtchn_port_t          bufioreq_evtchn;
     struct rangeset        *range[NR_IO_RANGE_TYPES];
     bool_t                 enabled;
+    bool_t                 bufioreq_atomic;
 };
 
 struct hvm_domain {
--- a/xen/include/public/hvm/hvm_op.h
+++ b/xen/include/public/hvm/hvm_op.h
@@ -266,6 +266,13 @@ typedef uint16_t ioservid_t;
 #define HVMOP_create_ioreq_server 17
 struct xen_hvm_create_ioreq_server {
     domid_t domid;           /* IN - domain to be serviced */
+#define HVM_IOREQSRV_BUFIOREQ_OFF    0
+#define HVM_IOREQSRV_BUFIOREQ_LEGACY 1
+/*
+ * Use this when read_pointer gets updated atomically and
+ * the pointer pair gets read atomically:
+ */
+#define HVM_IOREQSRV_BUFIOREQ_ATOMIC 2
     uint8_t handle_bufioreq; /* IN - should server handle buffered ioreqs */
     ioservid_t id;           /* OUT - server id */
 };
--- a/xen/include/public/hvm/ioreq.h
+++ b/xen/include/public/hvm/ioreq.h
@@ -83,8 +83,17 @@ typedef struct buf_ioreq buf_ioreq_t;
 
 #define IOREQ_BUFFER_SLOT_NUM     511 /* 8 bytes each, plus 2 4-byte indexes */
 struct buffered_iopage {
-    unsigned int read_pointer;
-    unsigned int write_pointer;
+#ifdef __XEN__
+    union bufioreq_pointers {
+        struct {
+#endif
+            uint32_t read_pointer;
+            uint32_t write_pointer;
+#ifdef __XEN__
+        };
+        uint64_t full;
+    } ptrs;
+#endif
     buf_ioreq_t buf_ioreq[IOREQ_BUFFER_SLOT_NUM];
 }; /* NB. Size of this structure must be no greater than one page. */
 typedef struct buffered_iopage buffered_iopage_t;



[-- Attachment #2: x86-HVM-bufioreq-atomic.patch --]
[-- Type: text/plain, Size: 7648 bytes --]

x86/HVM: avoid pointer wraparound in bufioreq handling

The number of slots per page being 511 (i.e. not a power of two) means
that the (32-bit) read and write indexes going beyond 2^32 will likely
disturb operation. Extend I/O req server creation so the caller can
indicate that it is using suitable atomic accesses where needed (not
all accesses to the two pointers really need to be atomic), allowing
the hypervisor to atomically canonicalize both pointers when both have
gone through at least one cycle.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
---
v2: Limit canonicalization loop to IOREQ_BUFFER_SLOT_NUM iterations.
    Adjust xc_hvm_create_ioreq_server() documentation.

--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -1933,7 +1933,8 @@ int xc_get_hvm_param(xc_interface *handl
  *
  * @parm xch a handle to an open hypervisor interface.
  * @parm domid the domain id to be serviced
- * @parm handle_bufioreq should the IOREQ Server handle buffered requests?
+ * @parm handle_bufioreq how should the IOREQ Server handle buffered requests
+ *                       (HVM_IOREQSRV_BUFIOREQ_*)?
  * @parm id pointer to an ioservid_t to receive the IOREQ Server id.
  * @return 0 on success, -1 on failure.
  */
--- a/tools/libxc/xc_domain.c
+++ b/tools/libxc/xc_domain.c
@@ -1411,7 +1411,7 @@ int xc_hvm_create_ioreq_server(xc_interf
     hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg);
 
     arg->domid = domid;
-    arg->handle_bufioreq = !!handle_bufioreq;
+    arg->handle_bufioreq = handle_bufioreq;
 
     rc = do_xen_hypercall(xch, &hypercall);
 
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -921,7 +921,7 @@ static void hvm_ioreq_server_disable(str
 
 static int hvm_ioreq_server_init(struct hvm_ioreq_server *s, struct domain *d,
                                  domid_t domid, bool_t is_default,
-                                 bool_t handle_bufioreq, ioservid_t id)
+                                 int bufioreq_handling, ioservid_t id)
 {
     struct vcpu *v;
     int rc;
@@ -938,7 +938,11 @@ static int hvm_ioreq_server_init(struct 
     if ( rc )
         return rc;
 
-    rc = hvm_ioreq_server_setup_pages(s, is_default, handle_bufioreq);
+    if ( bufioreq_handling == HVM_IOREQSRV_BUFIOREQ_ATOMIC )
+        s->bufioreq_atomic = 1;
+
+    rc = hvm_ioreq_server_setup_pages(
+             s, is_default, bufioreq_handling != HVM_IOREQSRV_BUFIOREQ_OFF);
     if ( rc )
         goto fail_map;
 
@@ -997,12 +1001,15 @@ static ioservid_t next_ioservid(struct d
 }
 
 static int hvm_create_ioreq_server(struct domain *d, domid_t domid,
-                                   bool_t is_default, bool_t handle_bufioreq,
+                                   bool_t is_default, int bufioreq_handling,
                                    ioservid_t *id)
 {
     struct hvm_ioreq_server *s;
     int rc;
 
+    if ( bufioreq_handling > HVM_IOREQSRV_BUFIOREQ_ATOMIC )
+        return -EINVAL;
+
     rc = -ENOMEM;
     s = xzalloc(struct hvm_ioreq_server);
     if ( !s )
@@ -1015,7 +1022,7 @@ static int hvm_create_ioreq_server(struc
     if ( is_default && d->arch.hvm_domain.default_ioreq_server != NULL )
         goto fail2;
 
-    rc = hvm_ioreq_server_init(s, d, domid, is_default, handle_bufioreq,
+    rc = hvm_ioreq_server_init(s, d, domid, is_default, bufioreq_handling,
                                next_ioservid(d));
     if ( rc )
         goto fail3;
@@ -2560,7 +2567,7 @@ int hvm_buffered_io_send(ioreq_t *p)
 
     spin_lock(&s->bufioreq_lock);
 
-    if ( (pg->write_pointer - pg->read_pointer) >=
+    if ( (pg->ptrs.write_pointer - pg->ptrs.read_pointer) >=
          (IOREQ_BUFFER_SLOT_NUM - qw) )
     {
         /* The queue is full: send the iopacket through the normal path. */
@@ -2568,17 +2575,29 @@ int hvm_buffered_io_send(ioreq_t *p)
         return 0;
     }
 
-    pg->buf_ioreq[pg->write_pointer % IOREQ_BUFFER_SLOT_NUM] = bp;
+    pg->buf_ioreq[pg->ptrs.write_pointer % IOREQ_BUFFER_SLOT_NUM] = bp;
 
     if ( qw )
     {
         bp.data = p->data >> 32;
-        pg->buf_ioreq[(pg->write_pointer+1) % IOREQ_BUFFER_SLOT_NUM] = bp;
+        pg->buf_ioreq[(pg->ptrs.write_pointer+1) % IOREQ_BUFFER_SLOT_NUM] = bp;
     }
 
     /* Make the ioreq_t visible /before/ write_pointer. */
     wmb();
-    pg->write_pointer += qw ? 2 : 1;
+    pg->ptrs.write_pointer += qw ? 2 : 1;
+
+    /* Canonicalize read/write pointers to prevent their overflow. */
+    while ( s->bufioreq_atomic && qw++ < IOREQ_BUFFER_SLOT_NUM &&
+            pg->ptrs.read_pointer >= IOREQ_BUFFER_SLOT_NUM )
+    {
+        union bufioreq_pointers old = pg->ptrs, new;
+        unsigned int n = old.read_pointer / IOREQ_BUFFER_SLOT_NUM;
+
+        new.read_pointer = old.read_pointer - n * IOREQ_BUFFER_SLOT_NUM;
+        new.write_pointer = old.write_pointer - n * IOREQ_BUFFER_SLOT_NUM;
+        cmpxchg(&pg->ptrs.full, old.full, new.full);
+    }
 
     notify_via_xen_event_channel(d, s->bufioreq_evtchn);
     spin_unlock(&s->bufioreq_lock);
@@ -5446,7 +5465,7 @@ static int hvmop_create_ioreq_server(
         goto out;
 
     rc = hvm_create_ioreq_server(d, curr_d->domain_id, 0,
-                                 !!op.handle_bufioreq, &op.id);
+                                 op.handle_bufioreq, &op.id);
     if ( rc != 0 )
         goto out;
 
@@ -5928,7 +5947,8 @@ static int hvmop_get_param(
 
         /* May need to create server. */
         domid = d->arch.hvm_domain.params[HVM_PARAM_DM_DOMAIN];
-        rc = hvm_create_ioreq_server(d, domid, 1, 1, NULL);
+        rc = hvm_create_ioreq_server(d, domid, 1,
+                                     HVM_IOREQSRV_BUFIOREQ_LEGACY, NULL);
         if ( rc != 0 && rc != -EEXIST )
             goto out;
     }
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -70,6 +70,7 @@ struct hvm_ioreq_server {
     evtchn_port_t          bufioreq_evtchn;
     struct rangeset        *range[NR_IO_RANGE_TYPES];
     bool_t                 enabled;
+    bool_t                 bufioreq_atomic;
 };
 
 struct hvm_domain {
--- a/xen/include/public/hvm/hvm_op.h
+++ b/xen/include/public/hvm/hvm_op.h
@@ -266,6 +266,13 @@ typedef uint16_t ioservid_t;
 #define HVMOP_create_ioreq_server 17
 struct xen_hvm_create_ioreq_server {
     domid_t domid;           /* IN - domain to be serviced */
+#define HVM_IOREQSRV_BUFIOREQ_OFF    0
+#define HVM_IOREQSRV_BUFIOREQ_LEGACY 1
+/*
+ * Use this when read_pointer gets updated atomically and
+ * the pointer pair gets read atomically:
+ */
+#define HVM_IOREQSRV_BUFIOREQ_ATOMIC 2
     uint8_t handle_bufioreq; /* IN - should server handle buffered ioreqs */
     ioservid_t id;           /* OUT - server id */
 };
--- a/xen/include/public/hvm/ioreq.h
+++ b/xen/include/public/hvm/ioreq.h
@@ -83,8 +83,17 @@ typedef struct buf_ioreq buf_ioreq_t;
 
 #define IOREQ_BUFFER_SLOT_NUM     511 /* 8 bytes each, plus 2 4-byte indexes */
 struct buffered_iopage {
-    unsigned int read_pointer;
-    unsigned int write_pointer;
+#ifdef __XEN__
+    union bufioreq_pointers {
+        struct {
+#endif
+            uint32_t read_pointer;
+            uint32_t write_pointer;
+#ifdef __XEN__
+        };
+        uint64_t full;
+    } ptrs;
+#endif
     buf_ioreq_t buf_ioreq[IOREQ_BUFFER_SLOT_NUM];
 }; /* NB. Size of this structure must be no greater than one page. */
 typedef struct buffered_iopage buffered_iopage_t;

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] x86/HVM: avoid pointer wraparound in bufioreq handling
  2015-06-18 13:14 [PATCH v2] x86/HVM: avoid pointer wraparound in bufioreq handling Jan Beulich
@ 2015-06-18 13:41 ` Andrew Cooper
  2015-07-21 16:18 ` Stefano Stabellini
  1 sibling, 0 replies; 7+ messages in thread
From: Andrew Cooper @ 2015-06-18 13:41 UTC (permalink / raw)
  To: Jan Beulich, xen-devel
  Cc: Keir Fraser, Stefano Stabellini, Tim Deegan, Ian Jackson,
	Ian Campbell, Wei Liu


[-- Attachment #1.1: Type: text/plain, Size: 662 bytes --]

On 18/06/15 14:14, Jan Beulich wrote:
> The number of slots per page being 511 (i.e. not a power of two) means
> that the (32-bit) read and write indexes going beyond 2^32 will likely
> disturb operation. Extend I/O req server creation so the caller can
> indicate that it is using suitable atomic accesses where needed (not
> all accesses to the two pointers really need to be atomic), allowing
> the hypervisor to atomically canonicalize both pointers when both have
> gone through at least one cycle.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> Acked-by: Ian Campbell <ian.campbell@citrix.com>

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

[-- Attachment #1.2: Type: text/html, Size: 1404 bytes --]

[-- Attachment #2: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] x86/HVM: avoid pointer wraparound in bufioreq handling
  2015-06-18 13:14 [PATCH v2] x86/HVM: avoid pointer wraparound in bufioreq handling Jan Beulich
  2015-06-18 13:41 ` Andrew Cooper
@ 2015-07-21 16:18 ` Stefano Stabellini
  2015-07-22 13:38   ` Jan Beulich
  1 sibling, 1 reply; 7+ messages in thread
From: Stefano Stabellini @ 2015-07-21 16:18 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Wei Liu, Stefano Stabellini, Andrew Cooper, Ian Jackson,
	Tim Deegan, Ian Campbell, xen-devel, Keir Fraser

On Thu, 18 Jun 2015, Jan Beulich wrote:
> The number of slots per page being 511 (i.e. not a power of two) means
> that the (32-bit) read and write indexes going beyond 2^32 will likely
> disturb operation. Extend I/O req server creation so the caller can
> indicate that it is using suitable atomic accesses where needed (not
> all accesses to the two pointers really need to be atomic), allowing
> the hypervisor to atomically canonicalize both pointers when both have
> gone through at least one cycle.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> Acked-by: Ian Campbell <ian.campbell@citrix.com>
> ---
> v2: Limit canonicalization loop to IOREQ_BUFFER_SLOT_NUM iterations.
>     Adjust xc_hvm_create_ioreq_server() documentation.
> 
> --- a/tools/libxc/include/xenctrl.h
> +++ b/tools/libxc/include/xenctrl.h
> @@ -1933,7 +1933,8 @@ int xc_get_hvm_param(xc_interface *handl
>   *
>   * @parm xch a handle to an open hypervisor interface.
>   * @parm domid the domain id to be serviced
> - * @parm handle_bufioreq should the IOREQ Server handle buffered requests?
> + * @parm handle_bufioreq how should the IOREQ Server handle buffered requests
> + *                       (HVM_IOREQSRV_BUFIOREQ_*)?
>   * @parm id pointer to an ioservid_t to receive the IOREQ Server id.
>   * @return 0 on success, -1 on failure.
>   */
> --- a/tools/libxc/xc_domain.c
> +++ b/tools/libxc/xc_domain.c
> @@ -1411,7 +1411,7 @@ int xc_hvm_create_ioreq_server(xc_interf
>      hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg);
>  
>      arg->domid = domid;
> -    arg->handle_bufioreq = !!handle_bufioreq;
> +    arg->handle_bufioreq = handle_bufioreq;
>  
>      rc = do_xen_hypercall(xch, &hypercall);
>  
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -921,7 +921,7 @@ static void hvm_ioreq_server_disable(str
>  
>  static int hvm_ioreq_server_init(struct hvm_ioreq_server *s, struct domain *d,
>                                   domid_t domid, bool_t is_default,
> -                                 bool_t handle_bufioreq, ioservid_t id)
> +                                 int bufioreq_handling, ioservid_t id)

uint8_t?


>  {
>      struct vcpu *v;
>      int rc;
> @@ -938,7 +938,11 @@ static int hvm_ioreq_server_init(struct 
>      if ( rc )
>          return rc;
>  
> -    rc = hvm_ioreq_server_setup_pages(s, is_default, handle_bufioreq);
> +    if ( bufioreq_handling == HVM_IOREQSRV_BUFIOREQ_ATOMIC )
> +        s->bufioreq_atomic = 1;
> +
> +    rc = hvm_ioreq_server_setup_pages(
> +             s, is_default, bufioreq_handling != HVM_IOREQSRV_BUFIOREQ_OFF);
>      if ( rc )
>          goto fail_map;
>  
> @@ -997,12 +1001,15 @@ static ioservid_t next_ioservid(struct d
>  }
>  
>  static int hvm_create_ioreq_server(struct domain *d, domid_t domid,
> -                                   bool_t is_default, bool_t handle_bufioreq,
> +                                   bool_t is_default, int bufioreq_handling,

uint8_t?


>                                     ioservid_t *id)
>  {
>      struct hvm_ioreq_server *s;
>      int rc;
>  
> +    if ( bufioreq_handling > HVM_IOREQSRV_BUFIOREQ_ATOMIC )
> +        return -EINVAL;
> +
>      rc = -ENOMEM;
>      s = xzalloc(struct hvm_ioreq_server);
>      if ( !s )
> @@ -1015,7 +1022,7 @@ static int hvm_create_ioreq_server(struc
>      if ( is_default && d->arch.hvm_domain.default_ioreq_server != NULL )
>          goto fail2;
>  
> -    rc = hvm_ioreq_server_init(s, d, domid, is_default, handle_bufioreq,
> +    rc = hvm_ioreq_server_init(s, d, domid, is_default, bufioreq_handling,
>                                 next_ioservid(d));
>      if ( rc )
>          goto fail3;
> @@ -2560,7 +2567,7 @@ int hvm_buffered_io_send(ioreq_t *p)
>      spin_lock(&s->bufioreq_lock);
>  
> -    if ( (pg->write_pointer - pg->read_pointer) >=
> +    if ( (pg->ptrs.write_pointer - pg->ptrs.read_pointer) >=
>           (IOREQ_BUFFER_SLOT_NUM - qw) )
>      {
>          /* The queue is full: send the iopacket through the normal path. */
> @@ -2568,17 +2575,29 @@ int hvm_buffered_io_send(ioreq_t *p)
>          return 0;
>      }
>  
> -    pg->buf_ioreq[pg->write_pointer % IOREQ_BUFFER_SLOT_NUM] = bp;
> +    pg->buf_ioreq[pg->ptrs.write_pointer % IOREQ_BUFFER_SLOT_NUM] = bp;
>  
>      if ( qw )
>      {
>          bp.data = p->data >> 32;
> -        pg->buf_ioreq[(pg->write_pointer+1) % IOREQ_BUFFER_SLOT_NUM] = bp;
> +        pg->buf_ioreq[(pg->ptrs.write_pointer+1) % IOREQ_BUFFER_SLOT_NUM] = bp;
>      }
>  
>      /* Make the ioreq_t visible /before/ write_pointer. */
>      wmb();
> -    pg->write_pointer += qw ? 2 : 1;
> +    pg->ptrs.write_pointer += qw ? 2 : 1;
> +
> +    /* Canonicalize read/write pointers to prevent their overflow. */
> +    while ( s->bufioreq_atomic && qw++ < IOREQ_BUFFER_SLOT_NUM &&
> +            pg->ptrs.read_pointer >= IOREQ_BUFFER_SLOT_NUM )
> +    {
> +        union bufioreq_pointers old = pg->ptrs, new;
> +        unsigned int n = old.read_pointer / IOREQ_BUFFER_SLOT_NUM;
> +
> +        new.read_pointer = old.read_pointer - n * IOREQ_BUFFER_SLOT_NUM;
> +        new.write_pointer = old.write_pointer - n * IOREQ_BUFFER_SLOT_NUM;
> +        cmpxchg(&pg->ptrs.full, old.full, new.full);
> +    }

This is not safe: if the reader increments read_pointer (atomically)
after you read old.read_pointer and before cmpxchg, the increment is
lost.

I think you need to remove the cmpxchg and subtract a multiple of
IOREQ_BUFFER_SLOT_NUM from read_pointer atomically here. Unfortunately
if we do that, we cannot update both write_pointer and read_pointer
together anymore. But if we decrement write_pointer atomically before
read_pointer, I think we should be fine if we appropriately fix the
reader side too:

QEMU:
  atomic_read(&read_pointer);
  read write_pointer;
  xen_rmb();
  while (read_pointer < write_pointer) {

      [work]

      atomic_add(&read_pointer, stuff);
      read write_pointer;
      xen_rmb();
  }


Xen:
  [work]
  increment write_pointer;
  xen_wmb();

  if (read_pointer >= IOREQ_BUFFER_SLOT_NUM) {
      write_pointer -= IOREQ_BUFFER_SLOT_NUM;
      xen_wmb();
      atomic_sub(read_pointer, IOREQ_BUFFER_SLOT_NUM);
  }

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] x86/HVM: avoid pointer wraparound in bufioreq handling
  2015-07-21 16:18 ` Stefano Stabellini
@ 2015-07-22 13:38   ` Jan Beulich
  2015-07-22 14:49     ` Stefano Stabellini
  0 siblings, 1 reply; 7+ messages in thread
From: Jan Beulich @ 2015-07-22 13:38 UTC (permalink / raw)
  To: Stefano Stabellini
  Cc: Keir Fraser, Andrew Cooper, Ian Jackson, Tim Deegan,
	Ian Campbell, xen-devel, Wei Liu

>>> On 21.07.15 at 18:18, <stefano.stabellini@eu.citrix.com> wrote:
> On Thu, 18 Jun 2015, Jan Beulich wrote:
>> --- a/xen/arch/x86/hvm/hvm.c
>> +++ b/xen/arch/x86/hvm/hvm.c
>> @@ -921,7 +921,7 @@ static void hvm_ioreq_server_disable(str
>>  
>>  static int hvm_ioreq_server_init(struct hvm_ioreq_server *s, struct domain *d,
>>                                   domid_t domid, bool_t is_default,
>> -                                 bool_t handle_bufioreq, ioservid_t id)
>> +                                 int bufioreq_handling, ioservid_t id)
> 
> uint8_t?

Why? I'm generally against using fixed width types when you don't
really need them. And using uint_least8_t or uint_fast8_t is neither
an opton, nor would it make the code look reasonable. Plain int is
just fine here.

>> @@ -2568,17 +2575,29 @@ int hvm_buffered_io_send(ioreq_t *p)
>>          return 0;
>>      }
>>  
>> -    pg->buf_ioreq[pg->write_pointer % IOREQ_BUFFER_SLOT_NUM] = bp;
>> +    pg->buf_ioreq[pg->ptrs.write_pointer % IOREQ_BUFFER_SLOT_NUM] = bp;
>>  
>>      if ( qw )
>>      {
>>          bp.data = p->data >> 32;
>> -        pg->buf_ioreq[(pg->write_pointer+1) % IOREQ_BUFFER_SLOT_NUM] = bp;
>> +        pg->buf_ioreq[(pg->ptrs.write_pointer+1) % IOREQ_BUFFER_SLOT_NUM] = bp;
>>      }
>>  
>>      /* Make the ioreq_t visible /before/ write_pointer. */
>>      wmb();
>> -    pg->write_pointer += qw ? 2 : 1;
>> +    pg->ptrs.write_pointer += qw ? 2 : 1;
>> +
>> +    /* Canonicalize read/write pointers to prevent their overflow. */
>> +    while ( s->bufioreq_atomic && qw++ < IOREQ_BUFFER_SLOT_NUM &&
>> +            pg->ptrs.read_pointer >= IOREQ_BUFFER_SLOT_NUM )
>> +    {
>> +        union bufioreq_pointers old = pg->ptrs, new;
>> +        unsigned int n = old.read_pointer / IOREQ_BUFFER_SLOT_NUM;
>> +
>> +        new.read_pointer = old.read_pointer - n * IOREQ_BUFFER_SLOT_NUM;
>> +        new.write_pointer = old.write_pointer - n * IOREQ_BUFFER_SLOT_NUM;
>> +        cmpxchg(&pg->ptrs.full, old.full, new.full);
>> +    }
> 
> This is not safe: if the reader increments read_pointer (atomically)
> after you read old.read_pointer and before cmpxchg, the increment is
> lost.

But that's what I use cmpxchg() for - if the value changes between
the read and the cmpxchg, the latter will indicate so (for the purpose
here simply by not updating the memory location), and the loop will go
through another iteration.

> I think you need to remove the cmpxchg and subtract a multiple of
> IOREQ_BUFFER_SLOT_NUM from read_pointer atomically here. Unfortunately
> if we do that, we cannot update both write_pointer and read_pointer
> together anymore. But if we decrement write_pointer atomically before
> read_pointer, I think we should be fine if we appropriately fix the
> reader side too:
> 
> QEMU:
>   atomic_read(&read_pointer);
>   read write_pointer;
>   xen_rmb();
>   while (read_pointer < write_pointer) {

That would cause this comparison to possibly evaluate to "false", and
I don't think leaving this loop early can bring any good.

Jan

> 
>       [work]
> 
>       atomic_add(&read_pointer, stuff);
>       read write_pointer;
>       xen_rmb();
>   }
> 
> 
> Xen:
>   [work]
>   increment write_pointer;
>   xen_wmb();
> 
>   if (read_pointer >= IOREQ_BUFFER_SLOT_NUM) {
>       write_pointer -= IOREQ_BUFFER_SLOT_NUM;
>       xen_wmb();
>       atomic_sub(read_pointer, IOREQ_BUFFER_SLOT_NUM);
>   }

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] x86/HVM: avoid pointer wraparound in bufioreq handling
  2015-07-22 13:38   ` Jan Beulich
@ 2015-07-22 14:49     ` Stefano Stabellini
  2015-07-22 15:21       ` Jan Beulich
  0 siblings, 1 reply; 7+ messages in thread
From: Stefano Stabellini @ 2015-07-22 14:49 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Wei Liu, Stefano Stabellini, Andrew Cooper, Ian Jackson,
	Tim Deegan, Ian Campbell, xen-devel, Keir Fraser

On Wed, 22 Jul 2015, Jan Beulich wrote:
> >>> On 21.07.15 at 18:18, <stefano.stabellini@eu.citrix.com> wrote:
> > On Thu, 18 Jun 2015, Jan Beulich wrote:
> >> --- a/xen/arch/x86/hvm/hvm.c
> >> +++ b/xen/arch/x86/hvm/hvm.c
> >> @@ -921,7 +921,7 @@ static void hvm_ioreq_server_disable(str
> >>  
> >>  static int hvm_ioreq_server_init(struct hvm_ioreq_server *s, struct domain *d,
> >>                                   domid_t domid, bool_t is_default,
> >> -                                 bool_t handle_bufioreq, ioservid_t id)
> >> +                                 int bufioreq_handling, ioservid_t id)
> > 
> > uint8_t?
> 
> Why? I'm generally against using fixed width types when you don't
> really need them. And using uint_least8_t or uint_fast8_t is neither
> an opton, nor would it make the code look reasonable. Plain int is
> just fine here.

You are not just changing integer size but also switching from unsigned
to signed implicitly. I think it is not a good coding practice.


> >> @@ -2568,17 +2575,29 @@ int hvm_buffered_io_send(ioreq_t *p)
> >>          return 0;
> >>      }
> >>  
> >> -    pg->buf_ioreq[pg->write_pointer % IOREQ_BUFFER_SLOT_NUM] = bp;
> >> +    pg->buf_ioreq[pg->ptrs.write_pointer % IOREQ_BUFFER_SLOT_NUM] = bp;
> >>  
> >>      if ( qw )
> >>      {
> >>          bp.data = p->data >> 32;
> >> -        pg->buf_ioreq[(pg->write_pointer+1) % IOREQ_BUFFER_SLOT_NUM] = bp;
> >> +        pg->buf_ioreq[(pg->ptrs.write_pointer+1) % IOREQ_BUFFER_SLOT_NUM] = bp;
> >>      }
> >>  
> >>      /* Make the ioreq_t visible /before/ write_pointer. */
> >>      wmb();
> >> -    pg->write_pointer += qw ? 2 : 1;
> >> +    pg->ptrs.write_pointer += qw ? 2 : 1;
> >> +
> >> +    /* Canonicalize read/write pointers to prevent their overflow. */
> >> +    while ( s->bufioreq_atomic && qw++ < IOREQ_BUFFER_SLOT_NUM &&
> >> +            pg->ptrs.read_pointer >= IOREQ_BUFFER_SLOT_NUM )
> >> +    {
> >> +        union bufioreq_pointers old = pg->ptrs, new;
> >> +        unsigned int n = old.read_pointer / IOREQ_BUFFER_SLOT_NUM;
> >> +
> >> +        new.read_pointer = old.read_pointer - n * IOREQ_BUFFER_SLOT_NUM;
> >> +        new.write_pointer = old.write_pointer - n * IOREQ_BUFFER_SLOT_NUM;
> >> +        cmpxchg(&pg->ptrs.full, old.full, new.full);
> >> +    }
> > 
> > This is not safe: if the reader increments read_pointer (atomically)
> > after you read old.read_pointer and before cmpxchg, the increment is
> > lost.
> 
> But that's what I use cmpxchg() for - if the value changes between
> the read and the cmpxchg, the latter will indicate so (for the purpose
> here simply by not updating the memory location), and the loop will go
> through another iteration.

Right, sorry, I read cmpxchg as xchg. I really don't know why. I think
that the code above works then.



> > I think you need to remove the cmpxchg and subtract a multiple of
> > IOREQ_BUFFER_SLOT_NUM from read_pointer atomically here. Unfortunately
> > if we do that, we cannot update both write_pointer and read_pointer
> > together anymore. But if we decrement write_pointer atomically before
> > read_pointer, I think we should be fine if we appropriately fix the
> > reader side too:
> > 
> > QEMU:
> >   atomic_read(&read_pointer);
> >   read write_pointer;
> >   xen_rmb();
> >   while (read_pointer < write_pointer) {
> 
> That would cause this comparison to possibly evaluate to "false", and
> I don't think leaving this loop early can bring any good.

Actually it would be fine because right after the loop we have:

notify_via_xen_event_channel(d, s->bufioreq_evtchn);



> > 
> >       [work]
> > 
> >       atomic_add(&read_pointer, stuff);
> >       read write_pointer;
> >       xen_rmb();
> >   }
> > 
> > 
> > Xen:
> >   [work]
> >   increment write_pointer;
> >   xen_wmb();
> > 
> >   if (read_pointer >= IOREQ_BUFFER_SLOT_NUM) {
> >       write_pointer -= IOREQ_BUFFER_SLOT_NUM;
> >       xen_wmb();
> >       atomic_sub(read_pointer, IOREQ_BUFFER_SLOT_NUM);
> >   }

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] x86/HVM: avoid pointer wraparound in bufioreq handling
  2015-07-22 14:49     ` Stefano Stabellini
@ 2015-07-22 15:21       ` Jan Beulich
  2015-07-22 15:45         ` Stefano Stabellini
  0 siblings, 1 reply; 7+ messages in thread
From: Jan Beulich @ 2015-07-22 15:21 UTC (permalink / raw)
  To: Stefano Stabellini
  Cc: Keir Fraser, Andrew Cooper, Ian Jackson, TimDeegan, Ian Campbell,
	xen-devel, Wei Liu

>>> On 22.07.15 at 16:49, <stefano.stabellini@eu.citrix.com> wrote:
> On Wed, 22 Jul 2015, Jan Beulich wrote:
>> >>> On 21.07.15 at 18:18, <stefano.stabellini@eu.citrix.com> wrote:
>> > On Thu, 18 Jun 2015, Jan Beulich wrote:
>> >> --- a/xen/arch/x86/hvm/hvm.c
>> >> +++ b/xen/arch/x86/hvm/hvm.c
>> >> @@ -921,7 +921,7 @@ static void hvm_ioreq_server_disable(str
>> >>  
>> >>  static int hvm_ioreq_server_init(struct hvm_ioreq_server *s, struct domain *d,
>> >>                                   domid_t domid, bool_t is_default,
>> >> -                                 bool_t handle_bufioreq, ioservid_t id)
>> >> +                                 int bufioreq_handling, ioservid_t id)
>> > 
>> > uint8_t?
>> 
>> Why? I'm generally against using fixed width types when you don't
>> really need them. And using uint_least8_t or uint_fast8_t is neither
>> an opton, nor would it make the code look reasonable. Plain int is
>> just fine here.
> 
> You are not just changing integer size but also switching from unsigned
> to signed implicitly. I think it is not a good coding practice.

To me bool (and by implication bool_t) is neither a signed nor
an unsigned type.

Jan

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] x86/HVM: avoid pointer wraparound in bufioreq handling
  2015-07-22 15:21       ` Jan Beulich
@ 2015-07-22 15:45         ` Stefano Stabellini
  0 siblings, 0 replies; 7+ messages in thread
From: Stefano Stabellini @ 2015-07-22 15:45 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Wei Liu, Stefano Stabellini, Andrew Cooper, Ian Jackson,
	TimDeegan, Ian Campbell, xen-devel, Keir Fraser

On Wed, 22 Jul 2015, Jan Beulich wrote:
> >>> On 22.07.15 at 16:49, <stefano.stabellini@eu.citrix.com> wrote:
> > On Wed, 22 Jul 2015, Jan Beulich wrote:
> >> >>> On 21.07.15 at 18:18, <stefano.stabellini@eu.citrix.com> wrote:
> >> > On Thu, 18 Jun 2015, Jan Beulich wrote:
> >> >> --- a/xen/arch/x86/hvm/hvm.c
> >> >> +++ b/xen/arch/x86/hvm/hvm.c
> >> >> @@ -921,7 +921,7 @@ static void hvm_ioreq_server_disable(str
> >> >>  
> >> >>  static int hvm_ioreq_server_init(struct hvm_ioreq_server *s, struct domain *d,
> >> >>                                   domid_t domid, bool_t is_default,
> >> >> -                                 bool_t handle_bufioreq, ioservid_t id)
> >> >> +                                 int bufioreq_handling, ioservid_t id)
> >> > 
> >> > uint8_t?
> >> 
> >> Why? I'm generally against using fixed width types when you don't
> >> really need them. And using uint_least8_t or uint_fast8_t is neither
> >> an opton, nor would it make the code look reasonable. Plain int is
> >> just fine here.
> > 
> > You are not just changing integer size but also switching from unsigned
> > to signed implicitly. I think it is not a good coding practice.
> 
> To me bool (and by implication bool_t) is neither a signed nor
> an unsigned type.

I meant that handle_bufioreq, a uint8_t, is actually transparently
passed to hvm_create_ioreq_server, that takes an int as argument.
I think it should be avoided, or casted explicitly to avoid confusion
in the future.

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2015-07-22 15:46 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-06-18 13:14 [PATCH v2] x86/HVM: avoid pointer wraparound in bufioreq handling Jan Beulich
2015-06-18 13:41 ` Andrew Cooper
2015-07-21 16:18 ` Stefano Stabellini
2015-07-22 13:38   ` Jan Beulich
2015-07-22 14:49     ` Stefano Stabellini
2015-07-22 15:21       ` Jan Beulich
2015-07-22 15:45         ` Stefano Stabellini

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).