xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/1] qemu-qdisk: indirect descriptors
@ 2016-06-15 17:01 Paulina Szubarczyk
  2016-06-15 17:01 ` [PATCH 1/1] " Paulina Szubarczyk
  0 siblings, 1 reply; 2+ messages in thread
From: Paulina Szubarczyk @ 2016-06-15 17:01 UTC (permalink / raw)
  To: xen-devel, roger.pau
  Cc: anthony.perard, sstabellini, Paulina Szubarczyk, P.Gawkowski

In the meantime I tried an implementation for indirect descriptors for qemu.
Described further in the next mail. It is based on current staging branch of qemu. 

From tests I did not observed an improvement. A decrease of bandwith starts 
earlier when the block size increase then for staging branch, especially for 
higher values of iodepth[1]. 
I run it under gprof and all the results are available on my github[2] 
but below is a part of flat profile for staging and indirect descriptors when 
fio is run with iodepth=256 and bs=256 for 300 sec. 

In the indirect descriptors implementation more time is spent in ioreq_unmap
function with smaller number of calls. I tried to check if it cooperate better
with grant copy running in the same time vmstat but then rapidly memory is 
exhausted and swap-out/in, the part of the listings are below, and that is not
a case for poor grant copy implementation. 
I tried also different values of MAX_INDIRECT_SEGMENTS in the range 
{256, 128, 64, 32, 16} without bigger difference.

I would appreciate any suggestions how to approach the problem.  

flat profiles:
indirect descriptors
 Each sample counts as 0.01 seconds.
  %   cumulative   self              self     total           
 time   seconds   seconds    calls   s/call   s/call  name    
13.19      1.12     1.12   653798     0.00     0.00  get_clock_realtime
 10.13      1.98     0.86    83570     0.00     0.00  ioreq_unmap
  4.77      2.38     0.41 31245461     0.00     0.00  rcu_read_unlock
  4.12      2.73     0.35    83423     0.00     0.00  ioreq_map
  3.65      3.04     0.31 20900170     0.00     0.00  phys_page_find
  3.12      3.31     0.27 20886790     0.00     0.00  address_space_rw
  2.24      3.50     0.19 20886790     0.00     0.00  address_space_translate
  2.00      3.67     0.17 10849312     0.00     0.00  test_and_clear_bit
  1.88      3.83     0.16 31245456     0.00     0.00  rcu_read_lock
  1.71      3.98     0.14 41773586     0.00     0.00  memory_access_is_direct
  1.65      4.12     0.14 10330994     0.00     0.00  xen_map_cache_unlocked
  1.59      4.25     0.14 20886785     0.00     0.00  address_space_translate_internal
  1.53      4.38     0.13 10339152     0.00     0.00  cpu_inw
  1.41      4.50     0.12 10458730     0.00     0.00  find_portio
  1.30      4.61     0.11 10389053     0.00     0.00  cpu_physical_memory_rw
  1.12      4.71     0.10 10358655     0.00     0.00  qemu_get_ram_block
  1.06      4.79     0.09 31245450     0.00     0.00  xen_enabled
  1.06      4.88     0.09 10447242     0.00     0.00  portio_read
  1.06      4.97     0.09   237496     0.00     0.00  cpu_ioreq_pio
  1.06      5.07     0.09     1557     0.00     0.00  vnc_refresh_server_sur

 staging 
 Each sample counts as 0.01 seconds.
  %   cumulative   self              self     total           
 time   seconds   seconds    calls   s/call   s/call  name    
 11.51      1.61     1.61   970388     0.00     0.00  get_clock_realtime
  9.58      2.95     1.34  1186036     0.00     0.00  ioreq_unmap
  5.50      3.72     0.77  1187881     0.00     0.00  ioreq_map
  4.15      4.30     0.58 31195245     0.00     0.00  rcu_read_unlock
  2.50      4.65     0.35 31195243     0.00     0.00  rcu_read_lock
  2.50      5.00     0.35 20866261     0.00     0.00  phys_page_find
  1.79      5.25     0.25 20852888     0.00     0.00  address_space_rw
  1.36      5.44     0.19  4912499     0.00     0.00  qemu_coroutine_switch
  1.22      5.61     0.17 20852881     0.00     0.00  address_space_translate
  1.22      5.78     0.17  6141137     0.00     0.00  bdrv_is_inserted
  1.07      5.93     0.15  2455277     0.00     0.00  tracked_request_end
  1.07      6.08     0.15  1187877     0.00     0.00  ioreq_parse
  1.00      6.22     0.14 20852887     0.00     0.00  address_space_translate_internal
  1.00      6.36     0.14  2456463     0.00     0.00  qemu_aio_unref
  1.00      6.50     0.14  2456156     0.00     0.00  qemu_coroutine_enter
  0.93      6.63     0.13 41705784     0.00     0.00  memory_access_is_direct

vmstat listings:
grant map
procs -----------memory---------- ---swap-- -----io---- -system-- ------cpu-----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa st
 1  1     11     62   2775    638    0    0  4052   244 16250 14124  6 20 71  2  1
 1  0     11     58   2779    638    0    0  4308     0 16227 14254  7 18 74  1  0
 1  0     11     56   2781    638    0    0  2320  1456 16310 14124  6 19 74  0  1
 1  0     13     67   2776    631    0    1  3924  1372 14720 14019  6 20 74  0  1
 1  0     13     66   2779    631    0    0  2768     0 16105 14038  6 19 74  0  0
 1  0     13     63   2782    631    0    0  3000     0 14471 14002  6 19 74  0  0
 1  0     13     58   2786    632    0    0  3988    36 12383 13135  7 19 73  1  0
 1  0     13     56   2789    632    0    0  2488   116 12417 13853  6 20 74  0  0
 1  0     13     61   2788    627    0    0  2556   296 12402 13382  7 20 73  0  0
 2  0     13     59   2791    627    0    0  2552     0 16114 14085  7 18 74  0  1
 1  0     13     56   2793    627    0    0  2320     0 16155 14092  5 20 75  0  0
 1  0     14     69   2787    621    0    1  2848  1248 16766 14480  7 19 73  1  1
 1  0     14     65   2792    620    0    0  4356     6 16369 14136  6 20 74  0  0
 1  0     14     62   2795    621    0    0  3020     0 16079 14079  7 19 74  0  1
 1  0     14     59   2798    621    0    0  2964     0 16229 14084  5 19 75  0  1
 1  0     14     57   2800    621    0    0  2172     0 16454 14257  6 18 75  0  0
 2  0     15     69   2794    614    0    0  3024   712 16416 14241  7 18 73  1  1
 1  0     15     67   2797    615    0    0  2936    32 16168 14084  6 19 74  0  0

grant map with indirect desriptors
procs -----------memory---------- ---swap-- -----io---- -system-- ------cpu-----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa st
 2  1      0     89   1900   1477    0    0  5760    24 9876 11438  5 19 76  0  0
 2  0      0     83   1906   1478    0    0  5568     0 8829 12670  5 19 76  1  0
 1  0      0     78   1911   1477    0    0  4736     0 8649 11291  5 19 76  0  1
 1  0      0     73   1916   1478    0    0  5120   984 8746 11946  5 20 75  0  0
 1  0      0     66   1922   1478    0    0  6016     0 8959 11785  6 18 76  0  1
 1  0      0     61   1927   1478    0    0  5312    32 9031 11559  5 18 76  1  0
 2  0      0     56   1932   1477    0    0  4608     0 9170 12156  5 19 75  0  1
 2  0      0     63   1937   1466    0    0  4992    28 8205 11871  5 21 74  0  0
 2  0      0     57   1942   1466    0    0  4928     0 8249 12198  5 18 76  0  0
 1  0      0     67   1948   1450    0    0  5376     8 10813 11381  6 20 74  0  0
 2  0      0     63   1952   1450    0    0  4288   192 9651 11814  5 20 70  4  1
 1  0      0     59   1956   1450    0    0  4096     0 8960 12058  4 19 76  0  0
 1  0      0     68   1962   1434    0    0  5184    12 9207 12089  5 20 75  0  1
 1  0      0     64   1966   1434    0    0  4096     0 8433 12016  5 20 75  0  0
 1  0      0     60   1970   1434    0    0  4224   140 10919 10750  5 18 76  0  0
 1  0      0     55   1976   1434    0    0  5440     0 8362 12207  5 19 76  0  0
 1  0      0     60   1980   1425    0    0  3776    64 8437 12020  6 19 74  1  1
 1  0      0     55   1984   1425    0    0  4416     0 8902 11962  6 17 76  0  0

grant copy
procs -----------memory---------- ---swap-- -----io---- -system-- ------cpu-----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa st
 3  1      0     63   2789    760    0    0  2268     8 36651 32671  6 19 74  0  1
 0  0      0     62   2791    760    0    0  1240     4 36465 33543  5 18 75  1  1
 1  0      0     61   2792    760    0    0  1584     0 36237 32312  4 21 74  0  1
 3  0      0     59   2794    760    0    0  1628     0 36475 32888  4 20 75  0  1
 2  0      0     57   2796    760    0    0  1968     0 34898 33329  5 19 75  0  1
 0  0      0     55   2798    759    0    0  1948     0 31510 31938  4 20 75  0  1
 1  0      0     66   2794    753    0    0  2244    12 36692 34147  5 18 75  1  1
 1  1      0     64   2796    753    0    0  1792    20 29159 32907  5 18 76  0  1
 1  0      0     62   2798    753    0    0  2416     0 37445 35323  2 19 77  1  1
 2  0      0     59   2800    753    0    0  2188     0 35741 32670  4 20 76  0  1
 1  0      0     58   2802    753    0    0  1772     0 36770 34468  4 17 78  0  1
 0  0      0     56   2803    752    0    0  1260     0 36317 33152  4 19 76  0  1
 1  0      0     55   2805    753    0    0  1216     0 36364 32263  4 19 76  0  1
 4  0      0     67   2802    743    0    0  1068    16 35886 32045  5 19 75  1  1
 2  0      0     65   2805    743    0    0  2928     0 28347 33364  5 20 74  0  1
 2  0      0     62   2807    743    0    0  1944     0 36737 35010  4 17 78  0  1
 1  0      0     61   2808    743    0    0  1540     0 35855 31968  3 20 76  0  1
 1  0      0     58   2810    743    0    0  2268     0 36047 31639  4 20 75  0  1

grant copy with indirect descriptors
procs -----------memory---------- ---swap-- -----io---- -system-- ------cpu-----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa st
 0 29    296     55      9    150    0  153 124752 167260 11989 13103  2  7  6 84  0
 0 28    296     53      9    156    0    0  2692   620  596  852  0  0  0 100  0
 0 27    296     72      1    159    0    0  7896  2072  675  882  0  2  0 98  0
 2 11    313     53     13    146    0   17 35392 17032 2920 3640  1  5 10 83  0
 0 13    324     58      1    139    0   10 25240 10688 2270 2584  0  3 22 74  0
 0 27    447     72      0    117    0  122 126116 120204 10926 12598  1  5  4 90  0
 1 24    450     67      0    130    0    3 11968  3608  772 1283  0  2  5 93  0
 0 21    486     71      0    133    0   35  5176 34968  596  952  0  2 46 53  0
 0 18    486     62      0    141    0    0  9352     0  652 1029  0  1 35 64  0
 0 14    488     80      0    146    0    2 12068  2124  584  706  1  2 26 71  0
 0 27    619     59      8    104    0  131 126264 128104 10722 12416  1  7  5 87  0
 0 22    619     78      0    111    0    0 16652    28 1267 1865  0  2  0 98  0
 0 25    800     68      5     81    0  180 166844 176752 13875 16661  1  6  0 92  0
 0 19    800     55      1    100    0    0 14436   200  763  909  0  1  0 99  0
 2 15    801     57      6    105    0    1 16308  1080 1103 1461  0  4 17 79  0
 0 28    832     68      0     82    0   30 179036 30080 14029 16746  2  8  6 83  0
 0 17    831     57      0     94    0    0 14140     0 1082 1316  0  2 13 86  0
 1 18    849     68      1    101    0   17  9908 17444  691  873  0  2  6 92  0

[1] https://docs.google.com/spreadsheets/d/1E6AMiB8ceJpExL6jWpH9u2yy6DZxzhmDUyFf-eUuJ0c/edit#gid=1390267663
[2] https://github.com/paulina-szubarczyk/xen-benchmark/tree/master/gprof

Thanks and regards, 
Paulina

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 2+ messages in thread

* [PATCH 1/1] qemu-qdisk: indirect descriptors
  2016-06-15 17:01 [PATCH 0/1] qemu-qdisk: indirect descriptors Paulina Szubarczyk
@ 2016-06-15 17:01 ` Paulina Szubarczyk
  0 siblings, 0 replies; 2+ messages in thread
From: Paulina Szubarczyk @ 2016-06-15 17:01 UTC (permalink / raw)
  To: xen-devel, roger.pau
  Cc: anthony.perard, sstabellini, Paulina Szubarczyk, P.Gawkowski

Introduction of indirect descriptors for qdisk.

Changes in the xen_blkif.h file:
 - struct blkif_x86_**_request contains union of
   'struct blkif_x86_**_request_direct' (previous struct blkif_x86_**_request)
   and 'struct blkif_x86_**_request_indirect'
 - new helper functions to rewrite 'struct blkif_x86_**_request_**'
   to struct 'blkif_request_local' named like that to not interfer with
   'blkif_request' from "tools/include/xen/io/blkif.h"
 - a set of macros to maintain the indirect descriptors

Changes in the xen_disk.c file:
 - a new boolean feature_indirect member
 - a new helper function ioreq_get_operation_and_nr_segments
 - a new ioreq_parse_indirect function called when 'BLKIF_OP_INDIRECT'
   occurs. The function grant maps the pages with indirect descriptors and copy
   the segments to a local seg[MAX_INDIRECT_SEGMENTS] tabel placed in ioreq.

   After that the ioreq_parse function proceedes withoth changes. For
   direct request segments are mem-copied to the ioreq page.

Signed-off-by: Paulina Szubarczyk <paulinaszubarczyk@gmail.com>
---
 hw/block/xen_blkif.h         | 151 ++++++++++++++++++++++++++++++----
 hw/block/xen_disk.c          | 187 ++++++++++++++++++++++++++++++++++---------
 include/hw/xen/xen_backend.h |   2 +
 3 files changed, 285 insertions(+), 55 deletions(-)

diff --git a/hw/block/xen_blkif.h b/hw/block/xen_blkif.h
index c68487cb..04dce2f 100644
--- a/hw/block/xen_blkif.h
+++ b/hw/block/xen_blkif.h
@@ -18,40 +18,97 @@ struct blkif_common_response {
 
 /* i386 protocol version */
 #pragma pack(push, 4)
-struct blkif_x86_32_request {
-	uint8_t        operation;    /* BLKIF_OP_???                         */
+struct blkif_x86_32_request_direct {
 	uint8_t        nr_segments;  /* number of segments                   */
 	blkif_vdev_t   handle;       /* only for read/write requests         */
 	uint64_t       id;           /* private guest value, echoed in resp  */
 	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
 	struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-};
+} __attribute__((__packed__));
+
+struct blkif_x86_32_request_indirect {
+	uint8_t        operation;
+	uint16_t       nr_segments;
+	uint64_t       id;
+	blkif_sector_t sector_number;
+	blkif_vdev_t   handle;
+	uint16_t       _pad2;
+	grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
+	uint64_t       _pad3;         /* make it 64 byte aligned */
+} __attribute__((__packed__));
+
+struct blkif_x86_32_request {
+	uint8_t 	   operation;
+	union  {
+		struct blkif_x86_32_request_direct direct;
+		struct blkif_x86_32_request_indirect indirect;
+	} u;
+} __attribute__((__packed__));
+
 struct blkif_x86_32_response {
 	uint64_t        id;              /* copied from request */
 	uint8_t         operation;       /* copied from request */
 	int16_t         status;          /* BLKIF_RSP_???       */
-};
+} __attribute__((__packed__));
+
 typedef struct blkif_x86_32_request blkif_x86_32_request_t;
+typedef struct blkif_x86_32_request_direct blkif_x86_32_request_direct_t;
+typedef struct blkif_x86_32_request_indirect blkif_x86_32_request_indirect_t;
 typedef struct blkif_x86_32_response blkif_x86_32_response_t;
 #pragma pack(pop)
 
 /* x86_64 protocol version */
-struct blkif_x86_64_request {
-	uint8_t        operation;    /* BLKIF_OP_???                         */
+struct blkif_x86_64_request_direct {
 	uint8_t        nr_segments;  /* number of segments                   */
 	blkif_vdev_t   handle;       /* only for read/write requests         */
-	uint64_t       __attribute__((__aligned__(8))) id;
+	uint32_t       _pad1;	     /* offsetof(blkif_request,u.rw.id) == 8 */
+	uint64_t       id;           /* private guest value, echoed in resp  */
 	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
 	struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-};
+} __attribute__((__packed__));
+
+struct blkif_x86_64_request_indirect {
+	uint8_t        operation;
+	uint16_t       nr_segments;
+	uint32_t       _pad1;  
+	uint64_t       id;
+	blkif_sector_t sector_number;
+	blkif_vdev_t   handle;
+	uint16_t       _pad2;
+	grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
+	uint32_t      _pad3;         /* make it 64 byte aligned */
+} __attribute__((__packed__));
+
+struct blkif_x86_64_request {
+	uint8_t        operation;    /* BLKIF_OP_???                         */
+	union {
+		struct blkif_x86_64_request_direct direct;
+		struct blkif_x86_64_request_indirect indirect;
+	} u;
+} __attribute__((__packed__));
+
 struct blkif_x86_64_response {
-	uint64_t       __attribute__((__aligned__(8))) id;
+	uint64_t        id;              /* copied from request */
 	uint8_t         operation;       /* copied from request */
 	int16_t         status;          /* BLKIF_RSP_???       */
 };
+
 typedef struct blkif_x86_64_request blkif_x86_64_request_t;
+typedef struct blkif_x86_64_request_direct blkif_x86_64_request_direct_t;
+typedef struct blkif_x86_64_request_indirect blkif_x86_64_request_indirect_t;
 typedef struct blkif_x86_64_response blkif_x86_64_response_t;
 
+struct blkif_request_local {
+	uint8_t 	   operation;
+ 	union {
+		struct blkif_request direct;
+		struct blkif_request_indirect indirect;
+	} u;
+} __attribute__((__packed__));
+typedef struct blkif_request blkif_request_direct_t;
+typedef struct blkif_request_indirect blkif_request_indirect_t;
+typedef struct blkif_request_local blkif_request_local_t;
+
 DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, struct blkif_common_response);
 DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, struct blkif_x86_32_response);
 DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request, struct blkif_x86_64_response);
@@ -70,16 +127,27 @@ enum blkif_protocol {
 	BLKIF_PROTOCOL_X86_64 = 3,
 };
 
-static inline void blkif_get_x86_32_req(blkif_request_t *dst, blkif_x86_32_request_t *src)
+#define XEN_PAGE_SIZE 4096
+#define XEN_PAGES_PER_SEGMENT 1
+#define XEN_PAGES_PER_INDIRECT_FRAME \
+      (XEN_PAGE_SIZE/sizeof(struct blkif_request_segment))
+#define SEGS_PER_INDIRECT_FRAME \
+      (XEN_PAGES_PER_INDIRECT_FRAME / XEN_PAGES_PER_SEGMENT)
+#define MAX_INDIRECT_PAGES \
+      ((MAX_INDIRECT_SEGMENTS + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
+#define INDIRECT_PAGES(_segs) DIV_ROUND_UP(_segs, XEN_PAGES_PER_INDIRECT_FRAME)
+
+static inline void blkif_get_x86_32_req_direct(blkif_request_direct_t *dst, 
+										       blkif_x86_32_request_direct_t *src,
+										       uint8_t operation)
 {
 	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
 
-	dst->operation = src->operation;
 	dst->nr_segments = src->nr_segments;
 	dst->handle = src->handle;
 	dst->id = src->id;
 	dst->sector_number = src->sector_number;
-	if (src->operation == BLKIF_OP_DISCARD) {
+	if (operation == BLKIF_OP_DISCARD) {
 		struct blkif_request_discard *s = (void *)src;
 		struct blkif_request_discard *d = (void *)dst;
 		d->nr_sectors = s->nr_sectors;
@@ -93,16 +161,43 @@ static inline void blkif_get_x86_32_req(blkif_request_t *dst, blkif_x86_32_reque
 		dst->seg[i] = src->seg[i];
 }
 
-static inline void blkif_get_x86_64_req(blkif_request_t *dst, blkif_x86_64_request_t *src)
+static inline void blkif_get_x86_32_req_indirect(blkif_request_indirect_t *dst, 
+									             blkif_x86_32_request_indirect_t *src)
 {
-	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+	int i, n;
+
+	dst->operation = src->operation;
+	dst->nr_segments = src->nr_segments;
+	dst->handle = src->handle;
+	dst->id = src->id;
+	dst->sector_number = src->sector_number;
+	n = INDIRECT_PAGES(dst->nr_segments);
+	for (i = 0; i < n; i++)
+		dst->indirect_grefs[i] = src->indirect_grefs[i];
+}
 
+static inline void blkif_get_x86_32_req_local(blkif_request_local_t *dst, 
+											  blkif_x86_32_request_t *src) 
+{
 	dst->operation = src->operation;
+	if (dst->operation == BLKIF_OP_INDIRECT) {
+		blkif_get_x86_32_req_indirect(&dst->u.indirect, &src->u.indirect);
+	} else {
+		blkif_get_x86_32_req_direct(&dst->u.direct, &src->u.direct, dst->operation);
+	}
+}
+
+static inline void blkif_get_x86_64_req_direct(blkif_request_direct_t *dst, 
+											   blkif_x86_64_request_direct_t *src,
+										       uint8_t operation)
+{
+	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+
 	dst->nr_segments = src->nr_segments;
 	dst->handle = src->handle;
 	dst->id = src->id;
 	dst->sector_number = src->sector_number;
-	if (src->operation == BLKIF_OP_DISCARD) {
+	if (operation == BLKIF_OP_DISCARD) {
 		struct blkif_request_discard *s = (void *)src;
 		struct blkif_request_discard *d = (void *)dst;
 		d->nr_sectors = s->nr_sectors;
@@ -116,4 +211,30 @@ static inline void blkif_get_x86_64_req(blkif_request_t *dst, blkif_x86_64_reque
 		dst->seg[i] = src->seg[i];
 }
 
+static inline void blkif_get_x86_64_req_indirect(blkif_request_indirect_t *dst, 
+									             blkif_x86_64_request_indirect_t *src)
+{
+	int i, n;
+
+	dst->operation = src->operation;
+	dst->nr_segments = src->nr_segments;
+	dst->handle = src->handle;
+	dst->id = src->id;
+	dst->sector_number = src->sector_number;
+	n = INDIRECT_PAGES(dst->nr_segments);
+	for (i = 0; i < n; i++)
+		dst->indirect_grefs[i] = src->indirect_grefs[i];
+}
+
+static inline void blkif_get_x86_64_req_local(blkif_request_local_t *dst, 
+											  blkif_x86_64_request_t *src) 
+{
+	dst->operation = src->operation;
+	if (dst->operation == BLKIF_OP_INDIRECT) {
+		blkif_get_x86_64_req_indirect(&dst->u.indirect, &src->u.indirect);
+	} else {
+		blkif_get_x86_64_req_direct(&dst->u.direct, &src->u.direct, dst->operation);
+	}
+}
+
 #endif /* __XEN_BLKIF_H__ */
diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index 37e14d1..e497cde 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -52,7 +52,6 @@ static int max_requests = 32;
 /* ------------------------------------------------------------- */
 
 #define BLOCK_SIZE  512
-#define IOCB_COUNT  (BLKIF_MAX_SEGMENTS_PER_REQUEST + 2)
 
 struct PersistentGrant {
     void *page;
@@ -69,8 +68,8 @@ struct PersistentRegion {
 typedef struct PersistentRegion PersistentRegion;
 
 struct ioreq {
-    blkif_request_t     req;
-    int16_t             status;
+    blkif_request_local_t   req;
+    int16_t                 status;
 
     /* parsed request */
     off_t               start;
@@ -80,19 +79,22 @@ struct ioreq {
     uint8_t             mapped;
 
     /* grant mapping */
-    uint32_t            domids[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-    uint32_t            refs[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+    uint32_t            domids[MAX_INDIRECT_SEGMENTS];
+    uint32_t            refs[MAX_INDIRECT_SEGMENTS];
     int                 prot;
-    void                *page[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+    void                *page[MAX_INDIRECT_SEGMENTS];
     void                *pages;
     int                 num_unmap;
 
+    /* indirect request */
+    struct blkif_request_segment seg[MAX_INDIRECT_SEGMENTS];
+
     /* aio status */
     int                 aio_inflight;
     int                 aio_errors;
 
     struct XenBlkDev    *blkdev;
-    QLIST_ENTRY(ioreq)   list;
+    QLIST_ENTRY(ioreq)  list;
     BlockAcctCookie     acct;
 };
 
@@ -131,6 +133,9 @@ struct XenBlkDev {
     unsigned int        persistent_gnt_count;
     unsigned int        max_grants;
 
+    /* Indirect descriptors */
+    gboolean            feature_indirect;
+
     /* qemu block driver */
     DriveInfo           *dinfo;
     BlockBackend        *blk;
@@ -216,7 +221,11 @@ static struct ioreq *ioreq_start(struct XenBlkDev *blkdev)
         ioreq = g_malloc0(sizeof(*ioreq));
         ioreq->blkdev = blkdev;
         blkdev->requests_total++;
-        qemu_iovec_init(&ioreq->v, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+        if (blkdev->feature_indirect) {
+            qemu_iovec_init(&ioreq->v, MAX_INDIRECT_SEGMENTS);
+        } else {
+            qemu_iovec_init(&ioreq->v, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+        }
     } else {
         /* get one from freelist */
         ioreq = QLIST_FIRST(&blkdev->freelist);
@@ -254,6 +263,57 @@ static void ioreq_release(struct ioreq *ioreq, bool finish)
     }
 }
 
+static void ioreq_get_operation_and_nr_segments(struct ioreq *ioreq, 
+                                                uint8_t *operation, 
+                                                uint16_t *nseg)
+{
+    if (ioreq->req.operation == BLKIF_OP_INDIRECT) {
+        *operation = ioreq->req.u.indirect.operation;
+        *nseg = ioreq->req.u.indirect.nr_segments;
+    } else {
+        *operation = ioreq->req.operation;
+        *nseg = ioreq->req.u.direct.nr_segments;
+    }
+}
+
+static int ioreq_parse_indirect(struct XenBlkDev *blkdev,
+                                blkif_request_indirect_t *req, uint32_t domid,
+                                struct blkif_request_segment *seg) 
+{
+    void *pages;
+    struct blkif_request_segment *segments = NULL;
+    int i, j, nr_indirect_grefs;
+
+    nr_indirect_grefs = INDIRECT_PAGES(req->nr_segments);
+
+    pages = xc_gnttab_map_domain_grant_refs(blkdev->xendev.gnttabdev, 
+                                            nr_indirect_grefs, domid, 
+                                            req->indirect_grefs,
+                                            PROT_READ);
+    
+    if (pages == NULL) {
+        xen_be_printf(&blkdev->xendev, 0, "can't map indirect grant refs %s\n", 
+                      strerror(errno));
+        return -1;
+    }
+
+    for (i = 0, j = 0; j < req->nr_segments; j++) {
+        i = j % SEGS_PER_INDIRECT_FRAME;
+        if (i == 0) {
+            segments = pages + i/SEGS_PER_INDIRECT_FRAME * XC_PAGE_SIZE;
+        }
+        seg[j].gref = segments[i].gref;
+        seg[j].first_sect = segments[i].first_sect;
+        seg[j].last_sect = segments[i].last_sect;
+    }
+
+    if (xc_gnttab_munmap(blkdev->xendev.gnttabdev, pages, nr_indirect_grefs)) {
+        xen_be_printf(&blkdev->xendev, 0, "xc_gnttab_munmap failed: %s\n",
+                      strerror(errno));
+    }
+
+    return 0;
+}
 /*
  * translate request into iovec + start offset
  * do sanity checks along the way
@@ -261,21 +321,21 @@ static void ioreq_release(struct ioreq *ioreq, bool finish)
 static int ioreq_parse(struct ioreq *ioreq)
 {
     struct XenBlkDev *blkdev = ioreq->blkdev;
+    uint8_t operation;
+    uint16_t nseg;
     uintptr_t mem;
     size_t len;
-    int i;
+    int i, r;
 
-    xen_be_printf(&blkdev->xendev, 3,
-                  "op %d, nr %d, handle %d, id %" PRId64 ", sector %" PRId64 "\n",
-                  ioreq->req.operation, ioreq->req.nr_segments,
-                  ioreq->req.handle, ioreq->req.id, ioreq->req.sector_number);
-    switch (ioreq->req.operation) {
+    ioreq_get_operation_and_nr_segments(ioreq, &operation, &nseg);
+
+    switch (operation) {
     case BLKIF_OP_READ:
         ioreq->prot = PROT_WRITE; /* to memory */
         break;
     case BLKIF_OP_FLUSH_DISKCACHE:
         ioreq->presync = 1;
-        if (!ioreq->req.nr_segments) {
+        if (!nseg) {
             return 0;
         }
         /* fall through */
@@ -286,35 +346,53 @@ static int ioreq_parse(struct ioreq *ioreq)
         return 0;
     default:
         xen_be_printf(&blkdev->xendev, 0, "error: unknown operation (%d)\n",
-                      ioreq->req.operation);
+                      operation);
         goto err;
     };
 
-    if (ioreq->req.operation != BLKIF_OP_READ && blkdev->mode[0] != 'w') {
+    if (operation != BLKIF_OP_READ && blkdev->mode[0] != 'w') {
         xen_be_printf(&blkdev->xendev, 0, "error: write req for ro device\n");
         goto err;
     }
 
-    ioreq->start = ioreq->req.sector_number * blkdev->file_blk;
-    for (i = 0; i < ioreq->req.nr_segments; i++) {
-        if (i == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
+    if (ioreq->req.operation == BLKIF_OP_INDIRECT) {
+        if (nseg > MAX_INDIRECT_SEGMENTS) {
             xen_be_printf(&blkdev->xendev, 0, "error: nr_segments too big\n");
             goto err;
         }
-        if (ioreq->req.seg[i].first_sect > ioreq->req.seg[i].last_sect) {
+        r = ioreq_parse_indirect(ioreq->blkdev, &ioreq->req.u.indirect, 
+                                 blkdev->xendev.dom, ioreq->seg);
+        if (r != 0) {
+            xen_be_printf(&blkdev->xendev, 0, 
+                                  "error: failed to map indirect segments\n");
+            goto err;
+        }
+        ioreq->start = ioreq->req.u.indirect.sector_number * blkdev->file_blk;
+    } else {
+        if (nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) {
+            xen_be_printf(&blkdev->xendev, 0, "error: nr_segments too big\n");
+            goto err;
+        }
+        memcpy(ioreq->seg, ioreq->req.u.direct.seg, sizeof(struct blkif_request_segment)*nseg);
+        ioreq->start = ioreq->req.u.direct.sector_number * blkdev->file_blk;
+    }
+
+    for (i = 0; i < nseg; i++) {
+
+        if (ioreq->seg[i].first_sect > ioreq->seg[i].last_sect) {
             xen_be_printf(&blkdev->xendev, 0, "error: first > last sector\n");
             goto err;
         }
-        if (ioreq->req.seg[i].last_sect * BLOCK_SIZE >= XC_PAGE_SIZE) {
+        if (ioreq->seg[i].last_sect * BLOCK_SIZE >= XC_PAGE_SIZE) {
             xen_be_printf(&blkdev->xendev, 0, "error: page crossing\n");
             goto err;
         }
 
         ioreq->domids[i] = blkdev->xendev.dom;
-        ioreq->refs[i]   = ioreq->req.seg[i].gref;
+        ioreq->refs[i]   = ioreq->seg[i].gref;
 
-        mem = ioreq->req.seg[i].first_sect * blkdev->file_blk;
-        len = (ioreq->req.seg[i].last_sect - ioreq->req.seg[i].first_sect + 1) * blkdev->file_blk;
+        mem = ioreq->seg[i].first_sect * blkdev->file_blk;
+        len = (ioreq->seg[i].last_sect - ioreq->seg[i].first_sect + 1) * blkdev->file_blk;
         qemu_iovec_add(&ioreq->v, (void*)mem, len);
     }
     if (ioreq->start + ioreq->v.size > blkdev->file_size) {
@@ -365,9 +443,9 @@ static void ioreq_unmap(struct ioreq *ioreq)
 static int ioreq_map(struct ioreq *ioreq)
 {
     XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev;
-    uint32_t domids[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-    uint32_t refs[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-    void *page[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+    uint32_t domids[MAX_INDIRECT_SEGMENTS];
+    uint32_t refs[MAX_INDIRECT_SEGMENTS];
+    void *page[MAX_INDIRECT_SEGMENTS];
     int i, j, new_maps = 0;
     PersistentGrant *grant;
     PersistentRegion *region;
@@ -505,10 +583,14 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq);
 static void qemu_aio_complete(void *opaque, int ret)
 {
     struct ioreq *ioreq = opaque;
+    uint8_t operation;    
+    uint16_t nseg;
+
+    ioreq_get_operation_and_nr_segments(ioreq, &operation, &nseg);
 
     if (ret != 0) {
         xen_be_printf(&ioreq->blkdev->xendev, 0, "%s I/O error\n",
-                      ioreq->req.operation == BLKIF_OP_READ ? "read" : "write");
+                      operation ? "read" : "write");
         ioreq->aio_errors++;
     }
 
@@ -531,10 +613,10 @@ static void qemu_aio_complete(void *opaque, int ret)
     ioreq->status = ioreq->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY;
     ioreq_unmap(ioreq);
     ioreq_finish(ioreq);
-    switch (ioreq->req.operation) {
+    switch (operation) {
     case BLKIF_OP_WRITE:
     case BLKIF_OP_FLUSH_DISKCACHE:
-        if (!ioreq->req.nr_segments) {
+        if (!nseg) {
             break;
         }
     case BLKIF_OP_READ:
@@ -550,8 +632,12 @@ static void qemu_aio_complete(void *opaque, int ret)
 static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
 {
     struct XenBlkDev *blkdev = ioreq->blkdev;
+    uint8_t operation;    
+    uint16_t nseg;
 
-    if (ioreq->req.nr_segments && ioreq_map(ioreq) == -1) {
+    ioreq_get_operation_and_nr_segments(ioreq, &operation, &nseg);
+
+    if (nseg && ioreq_map(ioreq) == -1) {
         goto err_no_map;
     }
 
@@ -561,7 +647,7 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
         return 0;
     }
 
-    switch (ioreq->req.operation) {
+    switch (operation) {
     case BLKIF_OP_READ:
         block_acct_start(blk_get_stats(blkdev->blk), &ioreq->acct,
                          ioreq->v.size, BLOCK_ACCT_READ);
@@ -572,7 +658,7 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
         break;
     case BLKIF_OP_WRITE:
     case BLKIF_OP_FLUSH_DISKCACHE:
-        if (!ioreq->req.nr_segments) {
+        if (!nseg) {
             break;
         }
 
@@ -617,8 +703,13 @@ static int blk_send_response_one(struct ioreq *ioreq)
     blkif_response_t  resp;
     void              *dst;
 
-    resp.id        = ioreq->req.id;
-    resp.operation = ioreq->req.operation;
+    if (ioreq->req.operation == BLKIF_OP_INDIRECT) {
+        resp.id        = ioreq->req.u.indirect.id;
+        resp.operation = ioreq->req.u.indirect.operation;
+    } else {        
+        resp.id        = ioreq->req.u.direct.id;
+        resp.operation = ioreq->req.operation;
+    }   
     resp.status    = ioreq->status;
 
     /* Place on the response ring for the relevant domain. */
@@ -683,11 +774,11 @@ static int blk_get_request(struct XenBlkDev *blkdev, struct ioreq *ioreq, RING_I
                sizeof(ioreq->req));
         break;
     case BLKIF_PROTOCOL_X86_32:
-        blkif_get_x86_32_req(&ioreq->req,
+        blkif_get_x86_32_req_local(&ioreq->req,
                              RING_GET_REQUEST(&blkdev->rings.x86_32_part, rc));
         break;
     case BLKIF_PROTOCOL_X86_64:
-        blkif_get_x86_64_req(&ioreq->req,
+        blkif_get_x86_64_req_local(&ioreq->req,
                              RING_GET_REQUEST(&blkdev->rings.x86_64_part, rc));
         break;
     }
@@ -756,6 +847,7 @@ static void blk_bh(void *opaque)
 static void blk_alloc(struct XenDevice *xendev)
 {
     struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
+    int max_segments = BLKIF_MAX_SEGMENTS_PER_REQUEST;
 
     QLIST_INIT(&blkdev->inflight);
     QLIST_INIT(&blkdev->finished);
@@ -764,8 +856,15 @@ static void blk_alloc(struct XenDevice *xendev)
     if (xen_mode != XEN_EMULATE) {
         batch_maps = 1;
     }
+    blkdev->feature_indirect = true;
+
+    if (blkdev->feature_indirect) {
+        max_segments = MAX_INDIRECT_SEGMENTS;
+    }
+
+    if (blkdev->feature_indirect)
     if (xc_gnttab_set_max_grants(xendev->gnttabdev,
-            MAX_GRANTS(max_requests, BLKIF_MAX_SEGMENTS_PER_REQUEST)) < 0) {
+            MAX_GRANTS(max_requests, max_segments)) < 0) {
         xen_be_printf(xendev, 0, "xc_gnttab_set_max_grants failed: %s\n",
                       strerror(errno));
     }
@@ -855,6 +954,10 @@ static int blk_init(struct XenDevice *xendev)
     xenstore_write_be_int(&blkdev->xendev, "feature-flush-cache", 1);
     xenstore_write_be_int(&blkdev->xendev, "feature-persistent", 1);
     xenstore_write_be_int(&blkdev->xendev, "info", info);
+    if (blkdev->feature_indirect) {
+        xenstore_write_be_int(&blkdev->xendev, "feature-max-indirect-segments", 
+                              MAX_INDIRECT_SEGMENTS);
+    }
 
     blk_parse_discard(blkdev);
 
@@ -1008,7 +1111,11 @@ static int blk_connect(struct XenDevice *xendev)
 
     if (blkdev->feature_persistent) {
         /* Init persistent grants */
-        blkdev->max_grants = max_requests * BLKIF_MAX_SEGMENTS_PER_REQUEST;
+        if (blkdev->feature_indirect) {
+            blkdev->max_grants = max_requests * MAX_INDIRECT_SEGMENTS;
+        } else {
+            blkdev->max_grants = max_requests * BLKIF_MAX_SEGMENTS_PER_REQUEST;
+        }
         blkdev->persistent_gnts = g_tree_new_full((GCompareDataFunc)int_cmp,
                                              NULL, NULL,
                                              batch_maps ?
diff --git a/include/hw/xen/xen_backend.h b/include/hw/xen/xen_backend.h
index 3b4125e..6836f98 100644
--- a/include/hw/xen/xen_backend.h
+++ b/include/hw/xen/xen_backend.h
@@ -15,6 +15,8 @@ struct XenDevice;
 #define DEVOPS_FLAG_NEED_GNTDEV   1
 /* don't expect frontend doing correct state transitions (aka console quirk) */
 #define DEVOPS_FLAG_IGNORE_STATE  2
+/* */
+#define MAX_INDIRECT_SEGMENTS 32
 
 struct XenDevOps {
     size_t    size;
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2016-06-15 17:02 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-06-15 17:01 [PATCH 0/1] qemu-qdisk: indirect descriptors Paulina Szubarczyk
2016-06-15 17:01 ` [PATCH 1/1] " Paulina Szubarczyk

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).