From mboxrd@z Thu Jan 1 00:00:00 1970 From: Stefano Stabellini Subject: Re: [PATCH v2] x86/HVM: avoid pointer wraparound in bufioreq handling Date: Tue, 21 Jul 2015 17:18:24 +0100 Message-ID: References: <5582E0660200007800086A27@mail.emea.novell.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: Received: from mail6.bemta3.messagelabs.com ([195.245.230.39]) by lists.xen.org with esmtp (Exim 4.72) (envelope-from ) id 1ZHaH6-0000TX-Cc for xen-devel@lists.xenproject.org; Tue, 21 Jul 2015 16:20:12 +0000 In-Reply-To: <5582E0660200007800086A27@mail.emea.novell.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Jan Beulich Cc: Wei Liu , Stefano Stabellini , Andrew Cooper , Ian Jackson , Tim Deegan , Ian Campbell , xen-devel , Keir Fraser List-Id: xen-devel@lists.xenproject.org On Thu, 18 Jun 2015, Jan Beulich wrote: > The number of slots per page being 511 (i.e. not a power of two) means > that the (32-bit) read and write indexes going beyond 2^32 will likely > disturb operation. Extend I/O req server creation so the caller can > indicate that it is using suitable atomic accesses where needed (not > all accesses to the two pointers really need to be atomic), allowing > the hypervisor to atomically canonicalize both pointers when both have > gone through at least one cycle. > > Signed-off-by: Jan Beulich > Acked-by: Ian Campbell > --- > v2: Limit canonicalization loop to IOREQ_BUFFER_SLOT_NUM iterations. > Adjust xc_hvm_create_ioreq_server() documentation. > > --- a/tools/libxc/include/xenctrl.h > +++ b/tools/libxc/include/xenctrl.h > @@ -1933,7 +1933,8 @@ int xc_get_hvm_param(xc_interface *handl > * > * @parm xch a handle to an open hypervisor interface. > * @parm domid the domain id to be serviced > - * @parm handle_bufioreq should the IOREQ Server handle buffered requests? > + * @parm handle_bufioreq how should the IOREQ Server handle buffered requests > + * (HVM_IOREQSRV_BUFIOREQ_*)? > * @parm id pointer to an ioservid_t to receive the IOREQ Server id. > * @return 0 on success, -1 on failure. > */ > --- a/tools/libxc/xc_domain.c > +++ b/tools/libxc/xc_domain.c > @@ -1411,7 +1411,7 @@ int xc_hvm_create_ioreq_server(xc_interf > hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg); > > arg->domid = domid; > - arg->handle_bufioreq = !!handle_bufioreq; > + arg->handle_bufioreq = handle_bufioreq; > > rc = do_xen_hypercall(xch, &hypercall); > > --- a/xen/arch/x86/hvm/hvm.c > +++ b/xen/arch/x86/hvm/hvm.c > @@ -921,7 +921,7 @@ static void hvm_ioreq_server_disable(str > > static int hvm_ioreq_server_init(struct hvm_ioreq_server *s, struct domain *d, > domid_t domid, bool_t is_default, > - bool_t handle_bufioreq, ioservid_t id) > + int bufioreq_handling, ioservid_t id) uint8_t? > { > struct vcpu *v; > int rc; > @@ -938,7 +938,11 @@ static int hvm_ioreq_server_init(struct > if ( rc ) > return rc; > > - rc = hvm_ioreq_server_setup_pages(s, is_default, handle_bufioreq); > + if ( bufioreq_handling == HVM_IOREQSRV_BUFIOREQ_ATOMIC ) > + s->bufioreq_atomic = 1; > + > + rc = hvm_ioreq_server_setup_pages( > + s, is_default, bufioreq_handling != HVM_IOREQSRV_BUFIOREQ_OFF); > if ( rc ) > goto fail_map; > > @@ -997,12 +1001,15 @@ static ioservid_t next_ioservid(struct d > } > > static int hvm_create_ioreq_server(struct domain *d, domid_t domid, > - bool_t is_default, bool_t handle_bufioreq, > + bool_t is_default, int bufioreq_handling, uint8_t? > ioservid_t *id) > { > struct hvm_ioreq_server *s; > int rc; > > + if ( bufioreq_handling > HVM_IOREQSRV_BUFIOREQ_ATOMIC ) > + return -EINVAL; > + > rc = -ENOMEM; > s = xzalloc(struct hvm_ioreq_server); > if ( !s ) > @@ -1015,7 +1022,7 @@ static int hvm_create_ioreq_server(struc > if ( is_default && d->arch.hvm_domain.default_ioreq_server != NULL ) > goto fail2; > > - rc = hvm_ioreq_server_init(s, d, domid, is_default, handle_bufioreq, > + rc = hvm_ioreq_server_init(s, d, domid, is_default, bufioreq_handling, > next_ioservid(d)); > if ( rc ) > goto fail3; > @@ -2560,7 +2567,7 @@ int hvm_buffered_io_send(ioreq_t *p) > spin_lock(&s->bufioreq_lock); > > - if ( (pg->write_pointer - pg->read_pointer) >= > + if ( (pg->ptrs.write_pointer - pg->ptrs.read_pointer) >= > (IOREQ_BUFFER_SLOT_NUM - qw) ) > { > /* The queue is full: send the iopacket through the normal path. */ > @@ -2568,17 +2575,29 @@ int hvm_buffered_io_send(ioreq_t *p) > return 0; > } > > - pg->buf_ioreq[pg->write_pointer % IOREQ_BUFFER_SLOT_NUM] = bp; > + pg->buf_ioreq[pg->ptrs.write_pointer % IOREQ_BUFFER_SLOT_NUM] = bp; > > if ( qw ) > { > bp.data = p->data >> 32; > - pg->buf_ioreq[(pg->write_pointer+1) % IOREQ_BUFFER_SLOT_NUM] = bp; > + pg->buf_ioreq[(pg->ptrs.write_pointer+1) % IOREQ_BUFFER_SLOT_NUM] = bp; > } > > /* Make the ioreq_t visible /before/ write_pointer. */ > wmb(); > - pg->write_pointer += qw ? 2 : 1; > + pg->ptrs.write_pointer += qw ? 2 : 1; > + > + /* Canonicalize read/write pointers to prevent their overflow. */ > + while ( s->bufioreq_atomic && qw++ < IOREQ_BUFFER_SLOT_NUM && > + pg->ptrs.read_pointer >= IOREQ_BUFFER_SLOT_NUM ) > + { > + union bufioreq_pointers old = pg->ptrs, new; > + unsigned int n = old.read_pointer / IOREQ_BUFFER_SLOT_NUM; > + > + new.read_pointer = old.read_pointer - n * IOREQ_BUFFER_SLOT_NUM; > + new.write_pointer = old.write_pointer - n * IOREQ_BUFFER_SLOT_NUM; > + cmpxchg(&pg->ptrs.full, old.full, new.full); > + } This is not safe: if the reader increments read_pointer (atomically) after you read old.read_pointer and before cmpxchg, the increment is lost. I think you need to remove the cmpxchg and subtract a multiple of IOREQ_BUFFER_SLOT_NUM from read_pointer atomically here. Unfortunately if we do that, we cannot update both write_pointer and read_pointer together anymore. But if we decrement write_pointer atomically before read_pointer, I think we should be fine if we appropriately fix the reader side too: QEMU: atomic_read(&read_pointer); read write_pointer; xen_rmb(); while (read_pointer < write_pointer) { [work] atomic_add(&read_pointer, stuff); read write_pointer; xen_rmb(); } Xen: [work] increment write_pointer; xen_wmb(); if (read_pointer >= IOREQ_BUFFER_SLOT_NUM) { write_pointer -= IOREQ_BUFFER_SLOT_NUM; xen_wmb(); atomic_sub(read_pointer, IOREQ_BUFFER_SLOT_NUM); }