diff for duplicates of <13744465-ca7a-0aaf-5abb-43a70a39c167@linux.ibm.com>
diff --git a/a/1.txt b/N1/1.txt
index 9cf17e1..cc62e45 100644
--- a/a/1.txt
+++ b/N1/1.txt
@@ -192,8 +192,4 @@ okay.
Thanks,
-Shiva
-
-_______________________________________________
-Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
-To unsubscribe send an email to linux-nvdimm-leave@lists.01.org
\ No newline at end of file
+Shiva
\ No newline at end of file
diff --git a/N1/2.bin b/N1/2.bin
new file mode 100644
index 0000000..79500a3
--- /dev/null
+++ b/N1/2.bin
@@ -0,0 +1,260 @@
+<html>
+ <head>
+ <meta http-equiv="Content-Type" content="text/html;
+ charset=windows-1252">
+ </head>
+ <body>
+ <p><br>
+ </p>
+ <div class="moz-cite-prefix">On 3/24/21 8:37 AM, David Gibson wrote:<br>
+ </div>
+ <blockquote type="cite" cite="mid:YFqs8M1dHAFhdCL6@yekko.fritz.box">
+ <pre class="moz-quote-pre" wrap="">On Tue, Mar 23, 2021 at 09:47:38AM -0400, Shivaprasad G Bhat wrote:
+</pre>
+ <blockquote type="cite">
+ <pre class="moz-quote-pre" wrap="">
+machine vmstate.
+
+Signed-off-by: Shivaprasad G Bhat <a class="moz-txt-link-rfc2396E" href="mailto:sbhat@linux.ibm.com"><sbhat@linux.ibm.com></a>
+</pre>
+ </blockquote>
+ <pre class="moz-quote-pre" wrap="">An overal question: surely the same issue must arise on x86 with
+file-backed NVDIMMs. How do they handle this case?
+</pre>
+ </blockquote>
+ <p>Discussed in other threads..</p>
+ <p>....<br>
+ </p>
+ <blockquote type="cite" cite="mid:YFqs8M1dHAFhdCL6@yekko.fritz.box">
+ <blockquote type="cite">
+ <pre class="moz-quote-pre" wrap=""> };
+@@ -2997,6 +3000,9 @@ static void spapr_machine_init(MachineState *machine)
+ }
+
+ qemu_cond_init(&spapr->fwnmi_machine_check_interlock_cond);
++ qemu_mutex_init(&spapr->spapr_nvdimm_flush_states_lock);
+</pre>
+ </blockquote>
+ <pre class="moz-quote-pre" wrap="">Do you actually need an extra mutex, or can you rely on the BQL?</pre>
+ </blockquote>
+ <p>I verified BQL is held at all places where it matters in the
+ context of this patch.</p>
+ <p>Safe to get rid of this extra mutex. <br>
+ </p>
+ <p>...<br>
+ </p>
+ <blockquote type="cite" cite="mid:YFqs8M1dHAFhdCL6@yekko.fritz.box"><br>
+ <blockquote type="cite">
+ <pre class="moz-quote-pre" wrap="">+{
++ SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
++
++ return (!QLIST_EMPTY(&spapr->pending_flush_states) ||
++ !QLIST_EMPTY(&spapr->completed_flush_states));
++}
++
++static int spapr_nvdimm_pre_save(void *opaque)
++{
++ SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
++
++ while (!QLIST_EMPTY(&spapr->pending_flush_states)) {
++ aio_poll(qemu_get_aio_context(), true);
+</pre>
+ </blockquote>
+ <pre class="moz-quote-pre" wrap="">Hmm... how long could waiting for all the pending flushes to complete
+take? This could add substanially to the guest's migration downtime,
+couldn't it?</pre>
+ </blockquote>
+ <pre><span style="color: rgb(29, 28, 29); font-size: 15px; font-style: normal; font-variant-ligatures: common-ligatures; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; text-align: left; text-indent: 0px; text-transform: none; white-space: normal; word-spacing: 0px; -webkit-text-stroke-width: 0px; background-color: rgb(248, 248, 248); text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial; display: inline !important; float: none;"></span>
+</pre>
+ <pre><span style="color: rgb(29, 28, 29); font-size: 15px; font-style: normal; font-variant-ligatures: common-ligatures; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; text-align: left; text-indent: 0px; text-transform: none; white-space: normal; word-spacing: 0px; -webkit-text-stroke-width: 0px; background-color: rgb(248, 248, 248); text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial; display: inline !important; float: none;"><span style="color: rgb(29, 28, 29); font-size: 15px; font-style: normal; font-variant-ligatures: common-ligatures; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; text-align: left; text-indent: 0px; text-transform: none; white-space: normal; word-spacing: 0px; -webkit-text-stroke-width: 0px; background-color: rgb(248, 248, 248); text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial; display: inline !important; float: none;"></span>
+
+
+<span style="color: rgb(29, 28, 29); font-size: 15px; font-style: normal; font-variant-ligatures: common-ligatures; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; text-align: left; text-indent: 0px; text-transform: none; white-space: normal; word-spacing: 0px; -webkit-text-stroke-width: 0px; background-color: rgb(248, 248, 248); text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial; display: inline !important; float: none;"> </span></span><span style="color: rgb(29, 28, 29); font-size: 15px; font-style: normal; font-variant-ligatures: common-ligatures; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; text-align: left; text-indent: 0px; text-transform: none; white-space: normal; word-spacing: 0px; -webkit-text-stroke-width: 0px; background-color: rgb(248, 248, 248); text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial; display: inline !important; float: none;"><span style="color: rgb(29, 28, 29); font-size: 15px; font-style: normal; font-variant-ligatures: common-ligatures; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; text-align: left; text-indent: 0px; text-transform: none; white-space: normal; word-spacing: 0px; -webkit-text-stroke-width: 0px; background-color: rgb(248, 248, 248); text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial; display: inline !important; float: none;"><span style="color: rgb(29, 28, 29); font-size: 15px; font-style: normal; font-variant-ligatures: common-ligatures; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; text-align: left; text-indent: 0px; text-transform: none; white-space: normal; word-spacing: 0px; -webkit-text-stroke-width: 0px; background-color: rgb(248, 248, 248); text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial; display: inline !important; float: none;"><span style="color: rgb(29, 28, 29); font-size: 15px; font-style: normal; font-variant-ligatures: common-ligatures; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; text-align: left; text-indent: 0px; text-transform: none; white-space: normal; word-spacing: 0px; -webkit-text-stroke-width: 0px; background-color: rgb(248, 248, 248); text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial; display: inline !important; float: none;"><span style="color: rgb(29, 28, 29); font-size: 15px; font-style: normal; font-variant-ligatures: common-ligatures; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; text-align: left; text-indent: 0px; text-transform: none; white-space: normal; word-spacing: 0px; -webkit-text-stroke-width: 0px; background-color: rgb(248, 248, 248); text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial; display: inline !important; float: none;"><span style="color: rgb(29, 28, 29); font-size: 15px; font-style: normal; font-variant-ligatures: common-ligatures; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; text-align: left; text-indent: 0px; text-transform: none; white-space: normal; word-spacing: 0px; -webkit-text-stroke-width: 0px; background-color: rgb(248, 248, 248); text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial; display: inline !important; float: none;">The time taken depends on the number of dirtied pages and
+the disk io write</span>
+speed. The number of dirty pages on host
+is configureable with tunables
+</span>
+vm.dirty_background_ratio
+(10% default on Fedora 32, Ubuntu 20.04),
+</span>
+vm.dirty_ratio(20%)
+of host memory and|or vm.dirty_expire_centisecs(30 seconds).</span>
+So, the host itself would be flushing the mmaped file on its
+own from time to time.
+
+</span>
+For guests using the nvdimms with filesystem, the flushes
+would have come frequently and the number of dirty pages
+might be less. The pmem applications can use the nvdimms
+without a filesystem. And for such guests, the chances that
+a flush request can come from pmem applications at the time
+of migration is less or is random. But, the host would have
+flushed the pagecache on its own when vm.dirty_background_ratio
+is crossed or vm.dirty_expire_centisecs expired.
+
+So, the worst case would stands at disk io latency for writing
+the dirtied pages in the last vm.dirty_expire_centisecs on host
+OR latency for writing maximum vm.dirty_background_ratio(10%)
+of host RAM.
+
+If you want me to calibrate any particular size, scenario and get
+the numbers please let me know.</span></pre>
+ ...
+ <blockquote type="cite" cite="mid:YFqs8M1dHAFhdCL6@yekko.fritz.box">
+ <blockquote type="cite">
+ <pre class="moz-quote-pre" wrap="">+
++/*
++ * Acquire a unique token and reserve it for the new flush state.
++ */
++static SpaprNVDIMMDeviceFlushState *spapr_nvdimm_init_new_flush_state(void)
++{
++ Error *err = NULL;
++ uint64_t token;
++ SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
++ SpaprNVDIMMDeviceFlushState *tmp, *next, *state;
++
++ state = g_malloc0(sizeof(*state));
++
++ qemu_mutex_lock(&spapr->spapr_nvdimm_flush_states_lock);
++retry:
++ if (qemu_guest_getrandom(&token, sizeof(token), &err) < 0) {
+</pre>
+ </blockquote>
+ <pre class="moz-quote-pre" wrap="">Using getrandom seems like overkill, why not just use a counter?</pre>
+ </blockquote>
+ <p>I didnt want a spurious guest to abuse by consuming the return
+ value providing <br>
+ </p>
+ <p>a valid "guess-able" counter and the real driver failing
+ subsequently. Also, across</p>
+ <p> guest migrations carrying the global counter to destination is
+ another thing to ponder.</p>
+ <p><br>
+ </p>
+ <p>Let me know if you want me to reconsider using counter.</p>
+ <p>...<br>
+ </p>
+ <blockquote type="cite" cite="mid:YFqs8M1dHAFhdCL6@yekko.fritz.box">
+ <blockquote type="cite">
+ <pre class="moz-quote-pre" wrap="">mm_flush_states_lock);
++
++ return state;
++}
++
++/*
++ * spapr_nvdimm_finish_flushes
++ * Waits for all pending flush requests to complete
++ * their execution and free the states
++ */
++void spapr_nvdimm_finish_flushes(void)
++{
++ SpaprNVDIMMDeviceFlushState *state, *next;
++ SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+</pre>
+ </blockquote>
+ <pre class="moz-quote-pre" wrap="">The caller has natural access to the machine, so pass it in rather
+than using the global.</pre>
+ </blockquote>
+ <p>okay</p>
+ <p>... </p>
+ <blockquote type="cite" cite="mid:YFqs8M1dHAFhdCL6@yekko.fritz.box">
+ <blockquote type="cite">
+ <pre class="moz-quote-pre" wrap="">+
++/*
++ * spapr_nvdimm_get_hcall_status
++ * Fetches the status of the hcall worker and returns H_BUSY
++ * if the worker is still running.
++ */
++static int spapr_nvdimm_get_flush_status(uint64_t token)
++{
++ int ret = H_LONG_BUSY_ORDER_10_MSEC;
++ SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+</pre>
+ </blockquote>
+ <pre class="moz-quote-pre" wrap="">The callers have natural access to spapr, so pass it in rather than
+using the global.</pre>
+ </blockquote>
+ <p>Okay</p>
+ <p>...</p>
+ <blockquote type="cite" cite="mid:YFqs8M1dHAFhdCL6@yekko.fritz.box">
+ <blockquote type="cite">
+ <pre class="moz-quote-pre" wrap="">+
++/*
++ * H_SCM_FLUSH
++ * Input: drc_index, continue-token
++ * Out: continue-token
++ * Return Value: H_SUCCESS, H_Parameter, H_P2, H_BUSY
++ *
++ * Given a DRC Index Flush the data to backend NVDIMM device.
++ * The hcall returns H_BUSY when the flush takes longer time and the hcall
+</pre>
+ </blockquote>
+ <pre class="moz-quote-pre" wrap="">It returns one of the H_LONG_BUSY values, not actual H_BUSY, doesn't
+it?
+</pre>
+ </blockquote>
+ <p>Yes. I thought its okay to call it just H_BUSY in a generic way.
+ Will fix it.</p>
+ <p><br>
+ </p>
+ <blockquote type="cite" cite="mid:YFqs8M1dHAFhdCL6@yekko.fritz.box">
+ <blockquote type="cite">
+ <pre class="moz-quote-pre" wrap="">+ * needs to be issued multiple times in order to be completely serviced.
++ }
++
++ return ret;
++ }
++
++ dimm = PC_DIMM(drc->dev);
++ backend = MEMORY_BACKEND(dimm->hostmem);
++
++ state = spapr_nvdimm_init_new_flush_state();
++ if (!state) {
++ return H_P2;
+</pre>
+ </blockquote>
+ <pre class="moz-quote-pre" wrap="">AFAICT the only way init_new_flush_state() fails is a failure in the
+RNG, which definitely isn't a parameter problem.</pre>
+ </blockquote>
+ <p>Will change it to H_HARDWARE. <br>
+ </p>
+ <p><br>
+ </p>
+ <p> </p>
+ <blockquote type="cite" cite="mid:YFqs8M1dHAFhdCL6@yekko.fritz.box">
+ <blockquote type="cite">
+ <pre class="moz-quote-pre" wrap="">+ }
++
++ state->backend_fd = memory_region_get_fd(&backend->mr);
+</pre>
+ </blockquote>
+ <pre class="moz-quote-pre" wrap="">Is this guaranteed to return a usable fd in all configurations?</pre>
+ </blockquote>
+ <p>Right, for memory-backend-ram this wont work. I think we should</p>
+ <p>not set the hcall-flush-required too for memory-backend-ram. Will
+ fix this.<br>
+ </p>
+ <blockquote type="cite" cite="mid:YFqs8M1dHAFhdCL6@yekko.fritz.box">
+ <blockquote type="cite">
+ <pre class="moz-quote-pre" wrap="">+ thread_pool_submit_aio(pool, flush_worker_cb, state,
++ spapr_nvdimm_flush_completion_cb, state);
++
++ ret = spapr_nvdimm_get_flush_status(state->continue_token);
++ if (H_IS_LONG_BUSY(ret)) {
++ args[0] = state->continue_token;
++ }
++
++ return ret;
+</pre>
+ </blockquote>
+ <pre class="moz-quote-pre" wrap="">I believe you can rearrange this so the get_flush_status / check /
+return is shared between the args[0] == 0 and args[0] == token paths.</pre>
+ </blockquote>
+ okay.<br>
+ <blockquote type="cite" cite="mid:YFqs8M1dHAFhdCL6@yekko.fritz.box">
+ </blockquote>
+ <p>Thanks,</p>
+ <p>Shiva<br>
+ </p>
+ </body>
+</html>
\ No newline at end of file
diff --git a/N1/2.hdr b/N1/2.hdr
new file mode 100644
index 0000000..3a590bd
--- /dev/null
+++ b/N1/2.hdr
@@ -0,0 +1,2 @@
+Content-Type: text/html; charset=windows-1252
+Content-Transfer-Encoding: base64
diff --git a/a/content_digest b/N1/content_digest
index 85628db..9242df9 100644
--- a/a/content_digest
+++ b/N1/content_digest
@@ -20,23 +20,22 @@
"To\0David Gibson <david\@gibson.dropbear.id.au>\0"
]
[
- "Cc\0sbhat\@linux.vnet.ibm.com",
- " groug\@kaod.org",
- " qemu-ppc\@nongnu.org",
- " ehabkost\@redhat.com",
- " marcel.apfelbaum\@gmail.com",
+ "Cc\0ehabkost\@redhat.com",
" mst\@redhat.com",
- " imammedo\@redhat.com",
- " xiaoguangrong.eric\@gmail.com",
- " qemu-devel\@nongnu.org",
" aneesh.kumar\@linux.ibm.com",
+ " bharata\@linux.vnet.ibm.com",
" linux-nvdimm\@lists.01.org",
+ " groug\@kaod.org",
" kvm-ppc\@vger.kernel.org",
+ " qemu-devel\@nongnu.org",
" shivaprasadbhat\@gmail.com",
- " bharata\@linux.vnet.ibm.com\0"
+ " qemu-ppc\@nongnu.org",
+ " imammedo\@redhat.com",
+ " sbhat\@linux.vnet.ibm.com",
+ " xiaoguangrong.eric\@gmail.com\0"
]
[
- "\0000:1\0"
+ "\0001:1\0"
]
[
"b\0"
@@ -236,11 +235,275 @@
"\n",
"Thanks,\n",
"\n",
- "Shiva\n",
- "\n",
- "_______________________________________________\n",
- "Linux-nvdimm mailing list -- linux-nvdimm\@lists.01.org\n",
- "To unsubscribe send an email to linux-nvdimm-leave\@lists.01.org"
+ "Shiva"
+]
+[
+ "\0001:2\0"
+]
+[
+ "b\0"
+]
+[
+ "<html>\n",
+ " <head>\n",
+ " <meta http-equiv=\"Content-Type\" content=\"text/html;\n",
+ " charset=windows-1252\">\n",
+ " </head>\n",
+ " <body>\n",
+ " <p><br>\n",
+ " </p>\n",
+ " <div class=\"moz-cite-prefix\">On 3/24/21 8:37 AM, David Gibson wrote:<br>\n",
+ " </div>\n",
+ " <blockquote type=\"cite\" cite=\"mid:YFqs8M1dHAFhdCL6\@yekko.fritz.box\">\n",
+ " <pre class=\"moz-quote-pre\" wrap=\"\">On Tue, Mar 23, 2021 at 09:47:38AM -0400, Shivaprasad G Bhat wrote:\n",
+ "</pre>\n",
+ " <blockquote type=\"cite\">\n",
+ " <pre class=\"moz-quote-pre\" wrap=\"\">\n",
+ "machine vmstate.\n",
+ "\n",
+ "Signed-off-by: Shivaprasad G Bhat <a class=\"moz-txt-link-rfc2396E\" href=\"mailto:sbhat\@linux.ibm.com\"><sbhat\@linux.ibm.com></a>\n",
+ "</pre>\n",
+ " </blockquote>\n",
+ " <pre class=\"moz-quote-pre\" wrap=\"\">An overal question: surely the same issue must arise on x86 with\n",
+ "file-backed NVDIMMs. How do they handle this case?\n",
+ "</pre>\n",
+ " </blockquote>\n",
+ " <p>Discussed in other threads..</p>\n",
+ " <p>....<br>\n",
+ " </p>\n",
+ " <blockquote type=\"cite\" cite=\"mid:YFqs8M1dHAFhdCL6\@yekko.fritz.box\">\n",
+ " <blockquote type=\"cite\">\n",
+ " <pre class=\"moz-quote-pre\" wrap=\"\"> };\n",
+ "\@\@ -2997,6 +3000,9 \@\@ static void spapr_machine_init(MachineState *machine)\n",
+ " }\n",
+ " \n",
+ " qemu_cond_init(&spapr->fwnmi_machine_check_interlock_cond);\n",
+ "+ qemu_mutex_init(&spapr->spapr_nvdimm_flush_states_lock);\n",
+ "</pre>\n",
+ " </blockquote>\n",
+ " <pre class=\"moz-quote-pre\" wrap=\"\">Do you actually need an extra mutex, or can you rely on the BQL?</pre>\n",
+ " </blockquote>\n",
+ " <p>I verified BQL is held at all places where it matters in the\n",
+ " context of this patch.</p>\n",
+ " <p>Safe to get rid of this extra mutex. <br>\n",
+ " </p>\n",
+ " <p>...<br>\n",
+ " </p>\n",
+ " <blockquote type=\"cite\" cite=\"mid:YFqs8M1dHAFhdCL6\@yekko.fritz.box\"><br>\n",
+ " <blockquote type=\"cite\">\n",
+ " <pre class=\"moz-quote-pre\" wrap=\"\">+{\n",
+ "+ SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());\n",
+ "+\n",
+ "+ return (!QLIST_EMPTY(&spapr->pending_flush_states) ||\n",
+ "+ !QLIST_EMPTY(&spapr->completed_flush_states));\n",
+ "+}\n",
+ "+\n",
+ "+static int spapr_nvdimm_pre_save(void *opaque)\n",
+ "+{\n",
+ "+ SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());\n",
+ "+\n",
+ "+ while (!QLIST_EMPTY(&spapr->pending_flush_states)) {\n",
+ "+ aio_poll(qemu_get_aio_context(), true);\n",
+ "</pre>\n",
+ " </blockquote>\n",
+ " <pre class=\"moz-quote-pre\" wrap=\"\">Hmm... how long could waiting for all the pending flushes to complete\n",
+ "take? This could add substanially to the guest's migration downtime,\n",
+ "couldn't it?</pre>\n",
+ " </blockquote>\n",
+ " <pre><span style=\"color: rgb(29, 28, 29); font-size: 15px; font-style: normal; font-variant-ligatures: common-ligatures; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; text-align: left; text-indent: 0px; text-transform: none; white-space: normal; word-spacing: 0px; -webkit-text-stroke-width: 0px; background-color: rgb(248, 248, 248); text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial; display: inline !important; float: none;\"></span>\n",
+ "</pre>\n",
+ " <pre><span style=\"color: rgb(29, 28, 29); font-size: 15px; font-style: normal; font-variant-ligatures: common-ligatures; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; text-align: left; text-indent: 0px; text-transform: none; white-space: normal; word-spacing: 0px; -webkit-text-stroke-width: 0px; background-color: rgb(248, 248, 248); text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial; display: inline !important; float: none;\"><span style=\"color: rgb(29, 28, 29); font-size: 15px; font-style: normal; font-variant-ligatures: common-ligatures; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; text-align: left; text-indent: 0px; text-transform: none; white-space: normal; word-spacing: 0px; -webkit-text-stroke-width: 0px; background-color: rgb(248, 248, 248); text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial; display: inline !important; float: none;\"></span>\n",
+ "\n",
+ " \n",
+ "<span style=\"color: rgb(29, 28, 29); font-size: 15px; font-style: normal; font-variant-ligatures: common-ligatures; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; text-align: left; text-indent: 0px; text-transform: none; white-space: normal; word-spacing: 0px; -webkit-text-stroke-width: 0px; background-color: rgb(248, 248, 248); text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial; display: inline !important; float: none;\"> </span></span><span style=\"color: rgb(29, 28, 29); font-size: 15px; font-style: normal; font-variant-ligatures: common-ligatures; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; text-align: left; text-indent: 0px; text-transform: none; white-space: normal; word-spacing: 0px; -webkit-text-stroke-width: 0px; background-color: rgb(248, 248, 248); text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial; display: inline !important; float: none;\"><span style=\"color: rgb(29, 28, 29); font-size: 15px; font-style: normal; font-variant-ligatures: common-ligatures; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; text-align: left; text-indent: 0px; text-transform: none; white-space: normal; word-spacing: 0px; -webkit-text-stroke-width: 0px; background-color: rgb(248, 248, 248); text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial; display: inline !important; float: none;\"><span style=\"color: rgb(29, 28, 29); font-size: 15px; font-style: normal; font-variant-ligatures: common-ligatures; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; text-align: left; text-indent: 0px; text-transform: none; white-space: normal; word-spacing: 0px; -webkit-text-stroke-width: 0px; background-color: rgb(248, 248, 248); text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial; display: inline !important; float: none;\"><span style=\"color: rgb(29, 28, 29); font-size: 15px; font-style: normal; font-variant-ligatures: common-ligatures; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; text-align: left; text-indent: 0px; text-transform: none; white-space: normal; word-spacing: 0px; -webkit-text-stroke-width: 0px; background-color: rgb(248, 248, 248); text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial; display: inline !important; float: none;\"><span style=\"color: rgb(29, 28, 29); font-size: 15px; font-style: normal; font-variant-ligatures: common-ligatures; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; text-align: left; text-indent: 0px; text-transform: none; white-space: normal; word-spacing: 0px; -webkit-text-stroke-width: 0px; background-color: rgb(248, 248, 248); text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial; display: inline !important; float: none;\"><span style=\"color: rgb(29, 28, 29); font-size: 15px; font-style: normal; font-variant-ligatures: common-ligatures; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; text-align: left; text-indent: 0px; text-transform: none; white-space: normal; word-spacing: 0px; -webkit-text-stroke-width: 0px; background-color: rgb(248, 248, 248); text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial; display: inline !important; float: none;\">The time taken depends on the number of dirtied pages and\n",
+ "the disk io write</span>\n",
+ "speed. The number of dirty pages on host\n",
+ "is configureable with tunables \n",
+ "</span>\n",
+ "vm.dirty_background_ratio\n",
+ "(10% default on Fedora 32, Ubuntu 20.04), \n",
+ "</span>\n",
+ "vm.dirty_ratio(20%)\n",
+ "of host memory and|or vm.dirty_expire_centisecs(30 seconds).</span>\n",
+ "So, the host itself would be flushing the mmaped file on its\n",
+ "own from time to time.\n",
+ "\n",
+ "</span>\n",
+ "For guests using the nvdimms with filesystem, the flushes\n",
+ "would have come frequently and the number of dirty pages\n",
+ "might be less. The pmem applications can use the nvdimms\n",
+ "without a filesystem. And for such guests, the chances that\n",
+ "a flush request can come from pmem applications at the time\n",
+ "of migration is less or is random. But, the host would have\n",
+ "flushed the pagecache on its own when vm.dirty_background_ratio\n",
+ "is crossed or vm.dirty_expire_centisecs expired. \n",
+ "\n",
+ "So, the worst case would stands at disk io latency for writing\n",
+ "the dirtied pages in the last vm.dirty_expire_centisecs on host\n",
+ "OR latency for writing maximum vm.dirty_background_ratio(10%)\n",
+ "of host RAM.\n",
+ "\n",
+ "If you want me to calibrate any particular size, scenario and get\n",
+ "the numbers please let me know.</span></pre>\n",
+ " ...\n",
+ " <blockquote type=\"cite\" cite=\"mid:YFqs8M1dHAFhdCL6\@yekko.fritz.box\">\n",
+ " <blockquote type=\"cite\">\n",
+ " <pre class=\"moz-quote-pre\" wrap=\"\">+\n",
+ "+/*\n",
+ "+ * Acquire a unique token and reserve it for the new flush state.\n",
+ "+ */\n",
+ "+static SpaprNVDIMMDeviceFlushState *spapr_nvdimm_init_new_flush_state(void)\n",
+ "+{\n",
+ "+ Error *err = NULL;\n",
+ "+ uint64_t token;\n",
+ "+ SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());\n",
+ "+ SpaprNVDIMMDeviceFlushState *tmp, *next, *state;\n",
+ "+\n",
+ "+ state = g_malloc0(sizeof(*state));\n",
+ "+\n",
+ "+ qemu_mutex_lock(&spapr->spapr_nvdimm_flush_states_lock);\n",
+ "+retry:\n",
+ "+ if (qemu_guest_getrandom(&token, sizeof(token), &err) < 0) {\n",
+ "</pre>\n",
+ " </blockquote>\n",
+ " <pre class=\"moz-quote-pre\" wrap=\"\">Using getrandom seems like overkill, why not just use a counter?</pre>\n",
+ " </blockquote>\n",
+ " <p>I didnt want a spurious guest to abuse by consuming the return\n",
+ " value providing <br>\n",
+ " </p>\n",
+ " <p>a valid \"guess-able\" counter and the real driver failing\n",
+ " subsequently. Also, across</p>\n",
+ " <p> guest migrations carrying the global counter to destination is\n",
+ " another thing to ponder.</p>\n",
+ " <p><br>\n",
+ " </p>\n",
+ " <p>Let me know if you want me to reconsider using counter.</p>\n",
+ " <p>...<br>\n",
+ " </p>\n",
+ " <blockquote type=\"cite\" cite=\"mid:YFqs8M1dHAFhdCL6\@yekko.fritz.box\">\n",
+ " <blockquote type=\"cite\">\n",
+ " <pre class=\"moz-quote-pre\" wrap=\"\">mm_flush_states_lock);\n",
+ "+\n",
+ "+ return state;\n",
+ "+}\n",
+ "+\n",
+ "+/*\n",
+ "+ * spapr_nvdimm_finish_flushes\n",
+ "+ * Waits for all pending flush requests to complete\n",
+ "+ * their execution and free the states\n",
+ "+ */\n",
+ "+void spapr_nvdimm_finish_flushes(void)\n",
+ "+{\n",
+ "+ SpaprNVDIMMDeviceFlushState *state, *next;\n",
+ "+ SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());\n",
+ "</pre>\n",
+ " </blockquote>\n",
+ " <pre class=\"moz-quote-pre\" wrap=\"\">The caller has natural access to the machine, so pass it in rather\n",
+ "than using the global.</pre>\n",
+ " </blockquote>\n",
+ " <p>okay</p>\n",
+ " <p>... </p>\n",
+ " <blockquote type=\"cite\" cite=\"mid:YFqs8M1dHAFhdCL6\@yekko.fritz.box\">\n",
+ " <blockquote type=\"cite\">\n",
+ " <pre class=\"moz-quote-pre\" wrap=\"\">+\n",
+ "+/*\n",
+ "+ * spapr_nvdimm_get_hcall_status\n",
+ "+ * Fetches the status of the hcall worker and returns H_BUSY\n",
+ "+ * if the worker is still running.\n",
+ "+ */\n",
+ "+static int spapr_nvdimm_get_flush_status(uint64_t token)\n",
+ "+{\n",
+ "+ int ret = H_LONG_BUSY_ORDER_10_MSEC;\n",
+ "+ SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());\n",
+ "</pre>\n",
+ " </blockquote>\n",
+ " <pre class=\"moz-quote-pre\" wrap=\"\">The callers have natural access to spapr, so pass it in rather than\n",
+ "using the global.</pre>\n",
+ " </blockquote>\n",
+ " <p>Okay</p>\n",
+ " <p>...</p>\n",
+ " <blockquote type=\"cite\" cite=\"mid:YFqs8M1dHAFhdCL6\@yekko.fritz.box\">\n",
+ " <blockquote type=\"cite\">\n",
+ " <pre class=\"moz-quote-pre\" wrap=\"\">+\n",
+ "+/*\n",
+ "+ * H_SCM_FLUSH\n",
+ "+ * Input: drc_index, continue-token\n",
+ "+ * Out: continue-token\n",
+ "+ * Return Value: H_SUCCESS, H_Parameter, H_P2, H_BUSY\n",
+ "+ *\n",
+ "+ * Given a DRC Index Flush the data to backend NVDIMM device.\n",
+ "+ * The hcall returns H_BUSY when the flush takes longer time and the hcall\n",
+ "</pre>\n",
+ " </blockquote>\n",
+ " <pre class=\"moz-quote-pre\" wrap=\"\">It returns one of the H_LONG_BUSY values, not actual H_BUSY, doesn't\n",
+ "it?\n",
+ "</pre>\n",
+ " </blockquote>\n",
+ " <p>Yes. I thought its okay to call it just H_BUSY in a generic way.\n",
+ " Will fix it.</p>\n",
+ " <p><br>\n",
+ " </p>\n",
+ " <blockquote type=\"cite\" cite=\"mid:YFqs8M1dHAFhdCL6\@yekko.fritz.box\">\n",
+ " <blockquote type=\"cite\">\n",
+ " <pre class=\"moz-quote-pre\" wrap=\"\">+ * needs to be issued multiple times in order to be completely serviced.\n",
+ "+ }\n",
+ "+\n",
+ "+ return ret;\n",
+ "+ }\n",
+ "+\n",
+ "+ dimm = PC_DIMM(drc->dev);\n",
+ "+ backend = MEMORY_BACKEND(dimm->hostmem);\n",
+ "+\n",
+ "+ state = spapr_nvdimm_init_new_flush_state();\n",
+ "+ if (!state) {\n",
+ "+ return H_P2;\n",
+ "</pre>\n",
+ " </blockquote>\n",
+ " <pre class=\"moz-quote-pre\" wrap=\"\">AFAICT the only way init_new_flush_state() fails is a failure in the\n",
+ "RNG, which definitely isn't a parameter problem.</pre>\n",
+ " </blockquote>\n",
+ " <p>Will change it to H_HARDWARE. <br>\n",
+ " </p>\n",
+ " <p><br>\n",
+ " </p>\n",
+ " <p> </p>\n",
+ " <blockquote type=\"cite\" cite=\"mid:YFqs8M1dHAFhdCL6\@yekko.fritz.box\">\n",
+ " <blockquote type=\"cite\">\n",
+ " <pre class=\"moz-quote-pre\" wrap=\"\">+ }\n",
+ "+\n",
+ "+ state->backend_fd = memory_region_get_fd(&backend->mr);\n",
+ "</pre>\n",
+ " </blockquote>\n",
+ " <pre class=\"moz-quote-pre\" wrap=\"\">Is this guaranteed to return a usable fd in all configurations?</pre>\n",
+ " </blockquote>\n",
+ " <p>Right, for memory-backend-ram this wont work. I think we should</p>\n",
+ " <p>not set the hcall-flush-required too for memory-backend-ram. Will\n",
+ " fix this.<br>\n",
+ " </p>\n",
+ " <blockquote type=\"cite\" cite=\"mid:YFqs8M1dHAFhdCL6\@yekko.fritz.box\">\n",
+ " <blockquote type=\"cite\">\n",
+ " <pre class=\"moz-quote-pre\" wrap=\"\">+ thread_pool_submit_aio(pool, flush_worker_cb, state,\n",
+ "+ spapr_nvdimm_flush_completion_cb, state);\n",
+ "+\n",
+ "+ ret = spapr_nvdimm_get_flush_status(state->continue_token);\n",
+ "+ if (H_IS_LONG_BUSY(ret)) {\n",
+ "+ args[0] = state->continue_token;\n",
+ "+ }\n",
+ "+\n",
+ "+ return ret;\n",
+ "</pre>\n",
+ " </blockquote>\n",
+ " <pre class=\"moz-quote-pre\" wrap=\"\">I believe you can rearrange this so the get_flush_status / check /\n",
+ "return is shared between the args[0] == 0 and args[0] == token paths.</pre>\n",
+ " </blockquote>\n",
+ " okay.<br>\n",
+ " <blockquote type=\"cite\" cite=\"mid:YFqs8M1dHAFhdCL6\@yekko.fritz.box\">\n",
+ " </blockquote>\n",
+ " <p>Thanks,</p>\n",
+ " <p>Shiva<br>\n",
+ " </p>\n",
+ " </body>\n",
+ "</html>\n"
]
-54227e2e623fcb8c3e6cefaff353efb2d1a9b08bbc7d997681dee5729a3d5ab7
+dd3e4b9beb5f6ecf4be1260d050a68f96c10a8fbeac43c097e795ea7c86f9b7b
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.