diff for duplicates of <1536957543.12990.9.camel@intel.com>
diff --git a/a/content_digest b/N1/content_digest
index 88b054e..856d609 100644
--- a/a/content_digest
+++ b/N1/content_digest
@@ -66,9 +66,7 @@
" Mike Kravetz <mike.kravetz\@oracle.com>",
" Nadav Amit <nadav.amit\@gmail.com>",
" Oleg Nesterov <oleg\@redhat.com>",
- " Pavel Machek <pavel\@ucw.cz>",
- " ravi.v.shankar\@intel.com",
- " vedvyas.shanbhogue\@intel.com\0"
+ " Pavel Machek <pavel\@ucw.cz>\0"
]
[
"\0000:1\0"
@@ -145,4 +143,4 @@
"\302\240}"
]
-688220fb0040e94d7d8e8625dd50df1758580853a90d7832c2872d714a41da50
+32b70e7976d080f456ba7a0406cf0bbce61e3f30a34b44696e9f190fce358c05
diff --git a/a/1.txt b/N2/1.txt
index 47a3609..a1907c2 100644
--- a/a/1.txt
+++ b/N2/1.txt
@@ -11,14 +11,14 @@ On Fri, 2018-08-31 at 18:29 +0200, Peter Zijlstra wrote:
> > We don't need a race.
> >
> > I think the cmpxchg will be slower, even without a race, than the code
-> > that was there before. The cmpxchg is a simple, straightforward
+> > that was there before.A A The cmpxchg is a simple, straightforward
> > solution, but we're putting it in place of a plain memory write, which
> > is suboptimal.
> Note quite, the clear_bit() is LOCK prefixed.
With the updated ptep_set_wrprotect() below, I did MADV_WILLNEED to a shadow
stack of 8 MB, then 10,000 fork()'s, but could not prove it is more or less
-efficient than the other. So can we say this is probably fine in terms of
+efficient than the other. A So can we say this is probably fine in terms of
efficiency?
Yu-cheng
@@ -30,30 +30,30 @@ Yu-cheng
+++ b/arch/x86/include/asm/pgtable.h
@@ -1203,7 +1203,36 @@ static inline pte_t ptep_get_and_clear_full(struct
mm_struct *mm,
- static inline void ptep_set_wrprotect(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep)
- {
+A static inline void ptep_set_wrprotect(struct mm_struct *mm,
+A A A A A A A unsigned long addr, pte_t *ptep)
+A {
+#ifdef CONFIG_X86_INTEL_SHADOW_STACK_USER
+ pte_t new_pte, pte = READ_ONCE(*ptep);
+
+ /*
-+ * Some processors can start a write, but end up
-+ * seeing a read-only PTE by the time they get
-+ * to the Dirty bit. In this case, they will
-+ * set the Dirty bit, leaving a read-only, Dirty
-+ * PTE which looks like a Shadow Stack PTE.
-+ *
-+ * However, this behavior has been improved and
-+ * will not occur on processors supporting
-+ * Shadow Stacks. Without this guarantee, a
-+ * transition to a non-present PTE and flush the
-+ * TLB would be needed.
-+ *
-+ * When changing a writable PTE to read-only and
-+ * if the PTE has _PAGE_DIRTY_HW set, we move
-+ * that bit to _PAGE_DIRTY_SW so that the PTE is
-+ * not a valid Shadow Stack PTE.
-+ */
++ A * Some processors can start a write, but end up
++ A * seeing a read-only PTE by the time they get
++ A * to the Dirty bit.A A In this case, they will
++ A * set the Dirty bit, leaving a read-only, Dirty
++ A * PTE which looks like a Shadow Stack PTE.
++ A *
++ A * However, this behavior has been improved and
++ A * will not occur on processors supporting
++ A * Shadow Stacks.A A Without this guarantee, a
++ A * transition to a non-present PTE and flush the
++ A * TLB would be needed.
++ A *
++ A * When changing a writable PTE to read-only and
++ A * if the PTE has _PAGE_DIRTY_HW set, we move
++ A * that bit to _PAGE_DIRTY_SW so that the PTE is
++ A * not a valid Shadow Stack PTE.
++ A */
+ do {
+ new_pte = pte_wrprotect(pte);
+ new_pte.pte |= (new_pte.pte & _PAGE_DIRTY_HW) >>
@@ -61,6 +61,6 @@ mm_struct *mm,
+ new_pte.pte &= ~_PAGE_DIRTY_HW;
+ } while (!try_cmpxchg(ptep, &pte, new_pte));
+#else
- clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte);
+A clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte);
+#endif
- }
\ No newline at end of file
+A }
\ No newline at end of file
diff --git a/a/content_digest b/N2/content_digest
index 88b054e..8bbd9d0 100644
--- a/a/content_digest
+++ b/N2/content_digest
@@ -90,14 +90,14 @@
"> > We don't need a race.\n",
"> > \n",
"> > I think the cmpxchg will be slower, even without a race, than the code\n",
- "> > that was there before.\302\240\302\240The cmpxchg is a simple, straightforward\n",
+ "> > that was there before.A A The cmpxchg is a simple, straightforward\n",
"> > solution, but we're putting it in place of a plain memory write, which\n",
"> > is suboptimal.\n",
"> Note quite, the clear_bit() is LOCK prefixed.\n",
"\n",
"With the updated ptep_set_wrprotect() below, I did MADV_WILLNEED to a shadow\n",
"stack of 8 MB, then 10,000 fork()'s, but could not prove it is more or less\n",
- "efficient than the other. \302\240So can we say this is probably fine in terms of\n",
+ "efficient than the other. A So can we say this is probably fine in terms of\n",
"efficiency?\n",
"\n",
"Yu-cheng\n",
@@ -109,30 +109,30 @@
"+++ b/arch/x86/include/asm/pgtable.h\n",
"\@\@ -1203,7 +1203,36 \@\@ static inline pte_t ptep_get_and_clear_full(struct\n",
"mm_struct *mm,\n",
- "\302\240static inline void ptep_set_wrprotect(struct mm_struct *mm,\n",
- "\302\240\t\t\t\t\302\240\302\240\302\240\302\240\302\240\302\240unsigned long addr, pte_t *ptep)\n",
- "\302\240{\n",
+ "A static inline void ptep_set_wrprotect(struct mm_struct *mm,\n",
+ "A \t\t\t\tA A A A A A unsigned long addr, pte_t *ptep)\n",
+ "A {\n",
"+#ifdef CONFIG_X86_INTEL_SHADOW_STACK_USER\n",
"+\tpte_t new_pte, pte = READ_ONCE(*ptep);\n",
"+\n",
"+\t/*\n",
- "+\t\302\240* Some processors can start a write, but end up\n",
- "+\t\302\240* seeing a read-only PTE by the time they get\n",
- "+\t\302\240* to the Dirty bit.\302\240\302\240In this case, they will\n",
- "+\t\302\240* set the Dirty bit, leaving a read-only, Dirty\n",
- "+\t\302\240* PTE which looks like a Shadow Stack PTE.\n",
- "+\t\302\240*\n",
- "+\t\302\240* However, this behavior has been improved and\n",
- "+\t\302\240* will not occur on processors supporting\n",
- "+\t\302\240* Shadow Stacks.\302\240\302\240Without this guarantee, a\n",
- "+\t\302\240* transition to a non-present PTE and flush the\n",
- "+\t\302\240* TLB would be needed.\n",
- "+\t\302\240*\n",
- "+\t\302\240* When changing a writable PTE to read-only and\n",
- "+\t\302\240* if the PTE has _PAGE_DIRTY_HW set, we move\n",
- "+\t\302\240* that bit to _PAGE_DIRTY_SW so that the PTE is\n",
- "+\t\302\240* not a valid Shadow Stack PTE.\n",
- "+\t\302\240*/\n",
+ "+\tA * Some processors can start a write, but end up\n",
+ "+\tA * seeing a read-only PTE by the time they get\n",
+ "+\tA * to the Dirty bit.A A In this case, they will\n",
+ "+\tA * set the Dirty bit, leaving a read-only, Dirty\n",
+ "+\tA * PTE which looks like a Shadow Stack PTE.\n",
+ "+\tA *\n",
+ "+\tA * However, this behavior has been improved and\n",
+ "+\tA * will not occur on processors supporting\n",
+ "+\tA * Shadow Stacks.A A Without this guarantee, a\n",
+ "+\tA * transition to a non-present PTE and flush the\n",
+ "+\tA * TLB would be needed.\n",
+ "+\tA *\n",
+ "+\tA * When changing a writable PTE to read-only and\n",
+ "+\tA * if the PTE has _PAGE_DIRTY_HW set, we move\n",
+ "+\tA * that bit to _PAGE_DIRTY_SW so that the PTE is\n",
+ "+\tA * not a valid Shadow Stack PTE.\n",
+ "+\tA */\n",
"+\tdo {\n",
"+\t\tnew_pte = pte_wrprotect(pte);\n",
"+\t\tnew_pte.pte |= (new_pte.pte & _PAGE_DIRTY_HW) >>\n",
@@ -140,9 +140,9 @@
"+\t\tnew_pte.pte &= ~_PAGE_DIRTY_HW;\n",
"+\t} while (!try_cmpxchg(ptep, &pte, new_pte));\n",
"+#else\n",
- "\302\240\tclear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte);\n",
+ "A \tclear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte);\n",
"+#endif\n",
- "\302\240}"
+ "A }"
]
-688220fb0040e94d7d8e8625dd50df1758580853a90d7832c2872d714a41da50
+c8d1f9d012d5939a34a7ec1bbef78a1ef9ed10faa6ad440dd2a45db16cf29b4c
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.