The reasoning is the same as in http://git.infradead.org/users/jjs/linux-tpmdd.git/commit/abd55954f91a3aacc1d260d2411cf776ec4d5fd2 Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> --- arch/x86/kernel/cpu/sgx/ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c index 5b28a9c0cb68..d53aee5a64c1 100644 --- a/arch/x86/kernel/cpu/sgx/ioctl.c +++ b/arch/x86/kernel/cpu/sgx/ioctl.c @@ -259,7 +259,7 @@ static long sgx_ioc_enclave_create(struct sgx_encl *encl, void __user *arg) if (copy_from_user(&ecreate, arg, sizeof(ecreate))) return -EFAULT; - secs_page = alloc_page(GFP_HIGHUSER); + secs_page = alloc_page(GFP_KERNEL); if (!secs_page) return -ENOMEM; @@ -674,7 +674,7 @@ static long sgx_ioc_enclave_init(struct sgx_encl *encl, void __user *arg) if (copy_from_user(&einit, arg, sizeof(einit))) return -EFAULT; - initp_page = alloc_page(GFP_HIGHUSER); + initp_page = alloc_page(GFP_KERNEL); if (!initp_page) return -ENOMEM; -- 2.20.1
__sgx_encl_add_page() can only fail in the case of EPCM conflict at least in non-artificial situations. Also, it consistent semantics in rollback is something to pursue for. Thus, destroy enclave when the EADD fails as we do when EEXTEND fails already. In the cases it is sane to return -EIO. From this the caller can deduce the failure and knows that the enclave was destroyed. The previous -EFAULT could happen in numerous situations. Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> --- arch/x86/kernel/cpu/sgx/ioctl.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c index d53aee5a64c1..289af607f634 100644 --- a/arch/x86/kernel/cpu/sgx/ioctl.c +++ b/arch/x86/kernel/cpu/sgx/ioctl.c @@ -338,7 +338,7 @@ static int __sgx_encl_add_page(struct sgx_encl *encl, kunmap_atomic((void *)pginfo.contents); put_page(src_page); - return ret ? -EFAULT : 0; + return ret ? -EIO : 0; } static int __sgx_encl_extend(struct sgx_encl *encl, @@ -353,7 +353,7 @@ static int __sgx_encl_extend(struct sgx_encl *encl, if (ret) { if (encls_failed(ret)) ENCLS_WARN(ret, "EEXTEND"); - return -EFAULT; + return -EIO; } } @@ -413,8 +413,10 @@ static int sgx_encl_add_page(struct sgx_encl *encl, ret = __sgx_encl_add_page(encl, encl_page, epc_page, secinfo, addp->src); - if (ret) + if (ret) { + sgx_encl_destroy(encl); goto err_out; + } /* * Complete the "add" before doing the "extend" so that the "add" @@ -498,10 +500,9 @@ static int sgx_encl_add_page(struct sgx_encl *encl, * * Return: * 0 on success, - * -EINVAL if any input param or the SECINFO contains invalid data, * -EACCES if an executable source page is located in a noexec partition, - * -ENOMEM if any memory allocation, including EPC, fails, - * -ERESTARTSYS if a pending signal is recognized + * -EIO if either ENCLS[EADD] or ENCLS[EEXTEND] fails + * -errno otherwise */ static long sgx_ioc_enclave_add_pages(struct sgx_encl *encl, void __user *arg) { -- 2.20.1
The subordinate clause of last sentence of the sgx_ioc_enclave_pages() does not provide any insight not already provided. Thus, remove it. Also, using "i.e." (and "e.g.") in the documentation should be considered a bad practice because it leaves it open ended. Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> --- arch/x86/kernel/cpu/sgx/ioctl.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c index 289af607f634..87b2fb62825a 100644 --- a/arch/x86/kernel/cpu/sgx/ioctl.c +++ b/arch/x86/kernel/cpu/sgx/ioctl.c @@ -468,11 +468,9 @@ static int sgx_encl_add_page(struct sgx_encl *encl, * @encl: pointer to an enclave instance (via ioctl() file pointer) * @arg: a user pointer to a struct sgx_enclave_add_pages instance * - * Add (EADD) one or more pages to an uninitialized enclave, and optionally - * extend (EEXTEND) the measurement with the contents of the page. The range of - * pages must be virtually contiguous. The SECINFO and measurement mask are - * applied to all pages, i.e. pages with different properties must be added in - * separate calls. + * Add one or more pages to an uninitialized enclave, and optionally extend the + * measurement with the contents of the page. The address range of pages must + * be contiguous. The SECINFO and measurement mask are applied to all pages. * * A SECINFO for a TCS is required to always contain zero permissions because * CPU silently zeros them. Allowing anything else would cause a mismatch in -- 2.20.1
On Mon, Nov 04, 2019 at 10:01:39PM +0200, Jarkko Sakkinen wrote: > The reasoning is the same as in > > http://git.infradead.org/users/jjs/linux-tpmdd.git/commit/abd55954f91a3aacc1d260d2411cf776ec4d5fd2 > > Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> > --- > arch/x86/kernel/cpu/sgx/ioctl.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c > index 5b28a9c0cb68..d53aee5a64c1 100644 > --- a/arch/x86/kernel/cpu/sgx/ioctl.c > +++ b/arch/x86/kernel/cpu/sgx/ioctl.c > @@ -259,7 +259,7 @@ static long sgx_ioc_enclave_create(struct sgx_encl *encl, void __user *arg) > if (copy_from_user(&ecreate, arg, sizeof(ecreate))) > return -EFAULT; > > - secs_page = alloc_page(GFP_HIGHUSER); > + secs_page = alloc_page(GFP_KERNEL); > if (!secs_page) > return -ENOMEM; > > @@ -674,7 +674,7 @@ static long sgx_ioc_enclave_init(struct sgx_encl *encl, void __user *arg) > if (copy_from_user(&einit, arg, sizeof(einit))) > return -EFAULT; > > - initp_page = alloc_page(GFP_HIGHUSER); > + initp_page = alloc_page(GFP_KERNEL); Would it make sense to use GFP_KERNEL_ACCOUNT? The accounting would be weird for the case where userspace is using a builder process, but even in that case it's not flat out wrong to account per-enclave memory allocations. > if (!initp_page) > return -ENOMEM; > > -- > 2.20.1 >
On Mon, Nov 04, 2019 at 10:01:40PM +0200, Jarkko Sakkinen wrote: > __sgx_encl_add_page() can only fail in the case of EPCM conflict at least > in non-artificial situations. Huh? EADD can fail for a variety of reasons. I can't think of a use case where userspace _won't_ kill the enclave in response to failure, but that doesn't justify killing the enclave, e.g. we don't kill the enclave in any other error path that is just as indicative of a userspace bug. > Also, it consistent semantics in rollback is something to pursue for. I don't follow this at all. How is it inconsistent to state that errors are handled gracefully unless they're unrecoverable? > Thus, destroy enclave when the EADD fails as we do when EEXTEND fails > already. > > In the cases it is sane to return -EIO. From this the caller can deduce > the failure and knows that the enclave was destroyed. The previous > -EFAULT could happen in numerous situations. This should be a separate patch. > Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> > --- > arch/x86/kernel/cpu/sgx/ioctl.c | 13 +++++++------ > 1 file changed, 7 insertions(+), 6 deletions(-) > > diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c > index d53aee5a64c1..289af607f634 100644 > --- a/arch/x86/kernel/cpu/sgx/ioctl.c > +++ b/arch/x86/kernel/cpu/sgx/ioctl.c > @@ -338,7 +338,7 @@ static int __sgx_encl_add_page(struct sgx_encl *encl, > kunmap_atomic((void *)pginfo.contents); > put_page(src_page); > > - return ret ? -EFAULT : 0; > + return ret ? -EIO : 0; > } > > static int __sgx_encl_extend(struct sgx_encl *encl, > @@ -353,7 +353,7 @@ static int __sgx_encl_extend(struct sgx_encl *encl, > if (ret) { > if (encls_failed(ret)) > ENCLS_WARN(ret, "EEXTEND"); > - return -EFAULT; > + return -EIO; > } > } > > @@ -413,8 +413,10 @@ static int sgx_encl_add_page(struct sgx_encl *encl, > > ret = __sgx_encl_add_page(encl, encl_page, epc_page, secinfo, > addp->src); > - if (ret) > + if (ret) { > + sgx_encl_destroy(encl); > goto err_out; > + } > > /* > * Complete the "add" before doing the "extend" so that the "add" > @@ -498,10 +500,9 @@ static int sgx_encl_add_page(struct sgx_encl *encl, > * > * Return: > * 0 on success, > - * -EINVAL if any input param or the SECINFO contains invalid data, > * -EACCES if an executable source page is located in a noexec partition, > - * -ENOMEM if any memory allocation, including EPC, fails, > - * -ERESTARTSYS if a pending signal is recognized Why are you removing the documentation for EINVAL, ENOMEM and ERESTARTSYS? > + * -EIO if either ENCLS[EADD] or ENCLS[EEXTEND] fails > + * -errno otherwise > */ > static long sgx_ioc_enclave_add_pages(struct sgx_encl *encl, void __user *arg) > { > -- > 2.20.1 >
On Mon, Nov 04, 2019 at 10:01:41PM +0200, Jarkko Sakkinen wrote: > The subordinate clause of last sentence of the sgx_ioc_enclave_pages() > does not provide any insight not already provided. Thus, remove it. Isn't the whole point of the documentation to help the user understand *how* to use the API, not simply state exactly what the code does? > Also, using "i.e." (and "e.g.") in the documentation should be > considered a bad practice because it leaves it open ended. How does stating the practical effect of the semantics leave the docs open ended? The man pages for open[1], close[2], read[3], dlopen[4], etc... all provide examples (usually with "for example" verbiage) to call out common scenarios to help the reader understand the details, and close() also uses "i.e." to clarify. [1] http://man7.org/linux/man-pages/man2/open.2.html [2] http://man7.org/linux/man-pages/man2/close.2.html [3] http://man7.org/linux/man-pages/man2/read.2.html [4] http://man7.org/linux/man-pages/man3/dlopen.3.html > Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> > --- > arch/x86/kernel/cpu/sgx/ioctl.c | 8 +++----- > 1 file changed, 3 insertions(+), 5 deletions(-) > > diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c > index 289af607f634..87b2fb62825a 100644 > --- a/arch/x86/kernel/cpu/sgx/ioctl.c > +++ b/arch/x86/kernel/cpu/sgx/ioctl.c > @@ -468,11 +468,9 @@ static int sgx_encl_add_page(struct sgx_encl *encl, > * @encl: pointer to an enclave instance (via ioctl() file pointer) > * @arg: a user pointer to a struct sgx_enclave_add_pages instance > * > - * Add (EADD) one or more pages to an uninitialized enclave, and optionally > - * extend (EEXTEND) the measurement with the contents of the page. The range of Why are you dropping the reference to the ENCLS functions? Yes, it's stating the obvious for those of us that have been buried in SGX for the last few years, but for newbies reading the docs I think there is value in explicitly connecting the actions to the ENCLS function. > - * pages must be virtually contiguous. The SECINFO and measurement mask are > - * applied to all pages, i.e. pages with different properties must be added in > - * separate calls. > + * Add one or more pages to an uninitialized enclave, and optionally extend the > + * measurement with the contents of the page. The address range of pages must > + * be contiguous. The SECINFO and measurement mask are applied to all pages. I like making it "address range", but I'd prefer to keep the "virtually" qualifier. Again, probably stating the obvious for many readers, but I don't think it's ever harmful and I like the clarification it provides without having to stop and consider the context. > * > * A SECINFO for a TCS is required to always contain zero permissions because > * CPU silently zeros them. Allowing anything else would cause a mismatch in > -- > 2.20.1 >
On Mon, Nov 04, 2019 at 12:46:02PM -0800, Sean Christopherson wrote:
> On Mon, Nov 04, 2019 at 10:01:39PM +0200, Jarkko Sakkinen wrote:
> > The reasoning is the same as in
> >
> > http://git.infradead.org/users/jjs/linux-tpmdd.git/commit/abd55954f91a3aacc1d260d2411cf776ec4d5fd2
> >
> > Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
> > ---
> > arch/x86/kernel/cpu/sgx/ioctl.c | 4 ++--
> > 1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
> > index 5b28a9c0cb68..d53aee5a64c1 100644
> > --- a/arch/x86/kernel/cpu/sgx/ioctl.c
> > +++ b/arch/x86/kernel/cpu/sgx/ioctl.c
> > @@ -259,7 +259,7 @@ static long sgx_ioc_enclave_create(struct sgx_encl *encl, void __user *arg)
> > if (copy_from_user(&ecreate, arg, sizeof(ecreate)))
> > return -EFAULT;
> >
> > - secs_page = alloc_page(GFP_HIGHUSER);
> > + secs_page = alloc_page(GFP_KERNEL);
> > if (!secs_page)
> > return -ENOMEM;
> >
> > @@ -674,7 +674,7 @@ static long sgx_ioc_enclave_init(struct sgx_encl *encl, void __user *arg)
> > if (copy_from_user(&einit, arg, sizeof(einit)))
> > return -EFAULT;
> >
> > - initp_page = alloc_page(GFP_HIGHUSER);
> > + initp_page = alloc_page(GFP_KERNEL);
>
> Would it make sense to use GFP_KERNEL_ACCOUNT? The accounting would be
> weird for the case where userspace is using a builder process, but even in
> that case it's not flat out wrong to account per-enclave memory allocations.
I did not find a single call site that would use that for allocating
memory for function-internal data.
/Jarkko
On Mon, Nov 04, 2019 at 12:54:01PM -0800, Sean Christopherson wrote: > On Mon, Nov 04, 2019 at 10:01:40PM +0200, Jarkko Sakkinen wrote: > > __sgx_encl_add_page() can only fail in the case of EPCM conflict at least > > in non-artificial situations. > > Huh? EADD can fail for a variety of reasons. I can't think of a use case > where userspace _won't_ kill the enclave in response to failure, but that > doesn't justify killing the enclave, e.g. we don't kill the enclave in any > other error path that is just as indicative of a userspace bug. I think it does because it is the only sane metrics to take and it also makes the semantics more sound and coherent. > > Also, it consistent semantics in rollback is something to pursue for. > > I don't follow this at all. How is it inconsistent to state that errors > are handled gracefully unless they're unrecoverable? Always when the user space gets -EIO it will know that enclave ceased to exist. That is very consistent. > > Thus, destroy enclave when the EADD fails as we do when EEXTEND fails > > already. > > > > In the cases it is sane to return -EIO. From this the caller can deduce > > the failure and knows that the enclave was destroyed. The previous > > -EFAULT could happen in numerous situations. > > This should be a separate patch. No it shouldn't because it is so closely connected to the semantics change. /Jarkko
On Mon, Nov 04, 2019 at 01:21:22PM -0800, Sean Christopherson wrote: > On Mon, Nov 04, 2019 at 10:01:41PM +0200, Jarkko Sakkinen wrote: > > The subordinate clause of last sentence of the sgx_ioc_enclave_pages() > > does not provide any insight not already provided. Thus, remove it. > > Isn't the whole point of the documentation to help the user understand > *how* to use the API, not simply state exactly what the code does? For kdoc it should explain in clear and concise way That clause does not provide any help for that. It just repeats with different vocabulary the exact same thing as was already said. > > Also, using "i.e." (and "e.g.") in the documentation should be > > considered a bad practice because it leaves it open ended. > > How does stating the practical effect of the semantics leave the docs open > ended? The man pages for open[1], close[2], read[3], dlopen[4], etc... > all provide examples (usually with "for example" verbiage) to call out > common scenarios to help the reader understand the details, and close() > also uses "i.e." to clarify. I did not find the use of "i.e." or "e.g." from those. When they introduce an example it is done in a more structured way, not as a subordinate clause. /Jarkko
On Tue, Nov 05, 2019 at 12:36:59AM +0200, Jarkko Sakkinen wrote:
> On Mon, Nov 04, 2019 at 01:21:22PM -0800, Sean Christopherson wrote:
> > On Mon, Nov 04, 2019 at 10:01:41PM +0200, Jarkko Sakkinen wrote:
> > > The subordinate clause of last sentence of the sgx_ioc_enclave_pages()
> > > does not provide any insight not already provided. Thus, remove it.
> >
> > Isn't the whole point of the documentation to help the user understand
> > *how* to use the API, not simply state exactly what the code does?
>
> For kdoc it should explain in clear and concise way
... what the function does.
/Jarkko
On Tue, Nov 05, 2019 at 12:26:58AM +0200, Jarkko Sakkinen wrote:
> On Mon, Nov 04, 2019 at 12:46:02PM -0800, Sean Christopherson wrote:
> > On Mon, Nov 04, 2019 at 10:01:39PM +0200, Jarkko Sakkinen wrote:
> > > The reasoning is the same as in
> > >
> > > http://git.infradead.org/users/jjs/linux-tpmdd.git/commit/abd55954f91a3aacc1d260d2411cf776ec4d5fd2
> > >
> > > Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
> > > ---
> > > arch/x86/kernel/cpu/sgx/ioctl.c | 4 ++--
> > > 1 file changed, 2 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
> > > index 5b28a9c0cb68..d53aee5a64c1 100644
> > > --- a/arch/x86/kernel/cpu/sgx/ioctl.c
> > > +++ b/arch/x86/kernel/cpu/sgx/ioctl.c
> > > @@ -259,7 +259,7 @@ static long sgx_ioc_enclave_create(struct sgx_encl *encl, void __user *arg)
> > > if (copy_from_user(&ecreate, arg, sizeof(ecreate)))
> > > return -EFAULT;
> > >
> > > - secs_page = alloc_page(GFP_HIGHUSER);
> > > + secs_page = alloc_page(GFP_KERNEL);
> > > if (!secs_page)
> > > return -ENOMEM;
> > >
> > > @@ -674,7 +674,7 @@ static long sgx_ioc_enclave_init(struct sgx_encl *encl, void __user *arg)
> > > if (copy_from_user(&einit, arg, sizeof(einit)))
> > > return -EFAULT;
> > >
> > > - initp_page = alloc_page(GFP_HIGHUSER);
> > > + initp_page = alloc_page(GFP_KERNEL);
> >
> > Would it make sense to use GFP_KERNEL_ACCOUNT? The accounting would be
> > weird for the case where userspace is using a builder process, but even in
> > that case it's not flat out wrong to account per-enclave memory allocations.
>
> I did not find a single call site that would use that for allocating
> memory for function-internal data.
Actually, the fact that the allocations are transient is an even better
argument for accounting the memory, as the weirdness I was referring to
doesn't exist for the builder concept.
But looking more closely, Documentation/core-api/memory-allocation.rst
states:
* Untrusted allocations triggered from userspace should be a subject
of kmem accounting and must have ``__GFP_ACCOUNT`` bit set. There
is the handy ``GFP_KERNEL_ACCOUNT`` shortcut for ``GFP_KERNEL``
allocations that should be accounted.
That means all uses of GFP_KERNEL except in sgx_alloc_epc_section() should
be converted to GFP_KERNEL_ACCOUNTED. As is, depending on fd limits[*], a
single process can easily burn through multiple GBs of memory simply by
opening /dev/sgx/enclave in a loop.
[*] AFAICT, systemd is upping the max number of open files to 1M on my
systems. I don't _think_ I changed a setting anywhere?
On Mon, Nov 04, 2019 at 06:17:20PM -0800, Sean Christopherson wrote: > On Tue, Nov 05, 2019 at 12:26:58AM +0200, Jarkko Sakkinen wrote: > > On Mon, Nov 04, 2019 at 12:46:02PM -0800, Sean Christopherson wrote: > > > On Mon, Nov 04, 2019 at 10:01:39PM +0200, Jarkko Sakkinen wrote: > > > > The reasoning is the same as in > > > > > > > > http://git.infradead.org/users/jjs/linux-tpmdd.git/commit/abd55954f91a3aacc1d260d2411cf776ec4d5fd2 > > > > > > > > Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> > > > > --- > > > > arch/x86/kernel/cpu/sgx/ioctl.c | 4 ++-- > > > > 1 file changed, 2 insertions(+), 2 deletions(-) > > > > > > > > diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c > > > > index 5b28a9c0cb68..d53aee5a64c1 100644 > > > > --- a/arch/x86/kernel/cpu/sgx/ioctl.c > > > > +++ b/arch/x86/kernel/cpu/sgx/ioctl.c > > > > @@ -259,7 +259,7 @@ static long sgx_ioc_enclave_create(struct sgx_encl *encl, void __user *arg) > > > > if (copy_from_user(&ecreate, arg, sizeof(ecreate))) > > > > return -EFAULT; > > > > > > > > - secs_page = alloc_page(GFP_HIGHUSER); > > > > + secs_page = alloc_page(GFP_KERNEL); > > > > if (!secs_page) > > > > return -ENOMEM; > > > > > > > > @@ -674,7 +674,7 @@ static long sgx_ioc_enclave_init(struct sgx_encl *encl, void __user *arg) > > > > if (copy_from_user(&einit, arg, sizeof(einit))) > > > > return -EFAULT; > > > > > > > > - initp_page = alloc_page(GFP_HIGHUSER); > > > > + initp_page = alloc_page(GFP_KERNEL); > > > > > > Would it make sense to use GFP_KERNEL_ACCOUNT? The accounting would be > > > weird for the case where userspace is using a builder process, but even in > > > that case it's not flat out wrong to account per-enclave memory allocations. > > > > I did not find a single call site that would use that for allocating > > memory for function-internal data. > > Actually, the fact that the allocations are transient is an even better > argument for accounting the memory, as the weirdness I was referring to > doesn't exist for the builder concept. > > But looking more closely, Documentation/core-api/memory-allocation.rst > states: > > * Untrusted allocations triggered from userspace should be a subject > of kmem accounting and must have ``__GFP_ACCOUNT`` bit set. There > is the handy ``GFP_KERNEL_ACCOUNT`` shortcut for ``GFP_KERNEL`` > allocations that should be accounted. > > That means all uses of GFP_KERNEL except in sgx_alloc_epc_section() should > be converted to GFP_KERNEL_ACCOUNTED. As is, depending on fd limits[*], a > single process can easily burn through multiple GBs of memory simply by > opening /dev/sgx/enclave in a loop. What does the documentation mean by untrusted allocaton? __GFP_ACCOUNT kernel and GFP_KERNEL_ACCOUNT are both quite alien flags to me as is kmemcg. Things that I know that exist but have never had to deal with them. Looking at the kernel source code they rarely get used. Many drivers have process bound data structures but none of the drivers use these flags. I'm wondering why. Why sgx_alloc_epc_section() is a use case given that it is something that allocates memory for the global EPC database? > [*] AFAICT, systemd is upping the max number of open files to 1M on my > systems. I don't _think_ I changed a setting anywhere? /Jarkko
On Wed, Nov 06, 2019 at 11:54:38PM +0200, Jarkko Sakkinen wrote: > On Mon, Nov 04, 2019 at 06:17:20PM -0800, Sean Christopherson wrote: > > On Tue, Nov 05, 2019 at 12:26:58AM +0200, Jarkko Sakkinen wrote: > > > On Mon, Nov 04, 2019 at 12:46:02PM -0800, Sean Christopherson wrote: > > > > On Mon, Nov 04, 2019 at 10:01:39PM +0200, Jarkko Sakkinen wrote: > > > > > The reasoning is the same as in > > > > > > > > > > http://git.infradead.org/users/jjs/linux-tpmdd.git/commit/abd55954f91a3aacc1d260d2411cf776ec4d5fd2 > > > > > > > > > > Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> > > > > > --- > > > > > arch/x86/kernel/cpu/sgx/ioctl.c | 4 ++-- > > > > > 1 file changed, 2 insertions(+), 2 deletions(-) > > > > > > > > > > diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c > > > > > index 5b28a9c0cb68..d53aee5a64c1 100644 > > > > > --- a/arch/x86/kernel/cpu/sgx/ioctl.c > > > > > +++ b/arch/x86/kernel/cpu/sgx/ioctl.c > > > > > @@ -259,7 +259,7 @@ static long sgx_ioc_enclave_create(struct sgx_encl *encl, void __user *arg) > > > > > if (copy_from_user(&ecreate, arg, sizeof(ecreate))) > > > > > return -EFAULT; > > > > > > > > > > - secs_page = alloc_page(GFP_HIGHUSER); > > > > > + secs_page = alloc_page(GFP_KERNEL); > > > > > if (!secs_page) > > > > > return -ENOMEM; > > > > > > > > > > @@ -674,7 +674,7 @@ static long sgx_ioc_enclave_init(struct sgx_encl *encl, void __user *arg) > > > > > if (copy_from_user(&einit, arg, sizeof(einit))) > > > > > return -EFAULT; > > > > > > > > > > - initp_page = alloc_page(GFP_HIGHUSER); > > > > > + initp_page = alloc_page(GFP_KERNEL); > > > > > > > > Would it make sense to use GFP_KERNEL_ACCOUNT? The accounting would be > > > > weird for the case where userspace is using a builder process, but even in > > > > that case it's not flat out wrong to account per-enclave memory allocations. > > > > > > I did not find a single call site that would use that for allocating > > > memory for function-internal data. > > > > Actually, the fact that the allocations are transient is an even better > > argument for accounting the memory, as the weirdness I was referring to > > doesn't exist for the builder concept. > > > > But looking more closely, Documentation/core-api/memory-allocation.rst > > states: > > > > * Untrusted allocations triggered from userspace should be a subject > > of kmem accounting and must have ``__GFP_ACCOUNT`` bit set. There > > is the handy ``GFP_KERNEL_ACCOUNT`` shortcut for ``GFP_KERNEL`` > > allocations that should be accounted. > > > > That means all uses of GFP_KERNEL except in sgx_alloc_epc_section() should > > be converted to GFP_KERNEL_ACCOUNTED. As is, depending on fd limits[*], a > > single process can easily burn through multiple GBs of memory simply by > > opening /dev/sgx/enclave in a loop. > > What does the documentation mean by untrusted allocaton? > > __GFP_ACCOUNT kernel and GFP_KERNEL_ACCOUNT are both quite alien flags > to me as is kmemcg. Things that I know that exist but have never had to > deal with them. > > Looking at the kernel source code they rarely get used. Many drivers > have process bound data structures but none of the drivers use these > flags. I'm wondering why. > > Why sgx_alloc_epc_section() is a use case given that it is something > that allocates memory for the global EPC database? > > > [*] AFAICT, systemd is upping the max number of open files to 1M on my > > systems. I don't _think_ I changed a setting anywhere? Anyway, the tree is now updated: diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c index 5b82670bb79a..d53aee5a64c1 100644 --- a/arch/x86/kernel/cpu/sgx/ioctl.c +++ b/arch/x86/kernel/cpu/sgx/ioctl.c @@ -259,7 +259,7 @@ static long sgx_ioc_enclave_create(struct sgx_encl *encl, void __user *arg) if (copy_from_user(&ecreate, arg, sizeof(ecreate))) return -EFAULT; - secs_page = alloc_page(GFP_HIGHUSER); + secs_page = alloc_page(GFP_KERNEL); if (!secs_page) return -ENOMEM; @@ -427,12 +427,20 @@ static int sgx_encl_add_page(struct sgx_encl *encl, if (addp->flags & SGX_PAGE_MEASURE) { ret = __sgx_encl_extend(encl, epc_page); - if (ret) + + /* + * Destroy the enclave if EEXTEND fails, EADD can't be undone. + * Note, destroy() also frees the resources for the added page. + */ + if (ret) { sgx_encl_destroy(encl); - else - sgx_mark_page_reclaimable(encl_page->epc_page); + goto out_unlock; + } } + sgx_mark_page_reclaimable(encl_page->epc_page); + +out_unlock: mutex_unlock(&encl->lock); up_read(¤t->mm->mmap_sem); return ret; @@ -666,7 +674,7 @@ static long sgx_ioc_enclave_init(struct sgx_encl *encl, void __user *arg) if (copy_from_user(&einit, arg, sizeof(einit))) return -EFAULT; - initp_page = alloc_page(GFP_HIGHUSER); + initp_page = alloc_page(GFP_KERNEL); if (!initp_page) return -ENOMEM; Hope that all the updates will be fairly localized :-) /Jarkko
On Wed, Nov 06, 2019 at 11:59:42PM +0200, Jarkko Sakkinen wrote:
> On Wed, Nov 06, 2019 at 11:54:38PM +0200, Jarkko Sakkinen wrote:
> > On Mon, Nov 04, 2019 at 06:17:20PM -0800, Sean Christopherson wrote:
> > > On Tue, Nov 05, 2019 at 12:26:58AM +0200, Jarkko Sakkinen wrote:
> > > > On Mon, Nov 04, 2019 at 12:46:02PM -0800, Sean Christopherson wrote:
> > > > > On Mon, Nov 04, 2019 at 10:01:39PM +0200, Jarkko Sakkinen wrote:
> > > > > > The reasoning is the same as in
> > > > > >
> > > > > > http://git.infradead.org/users/jjs/linux-tpmdd.git/commit/abd55954f91a3aacc1d260d2411cf776ec4d5fd2
> > > > > >
> > > > > > Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
> > > > > > ---
> > > > > > arch/x86/kernel/cpu/sgx/ioctl.c | 4 ++--
> > > > > > 1 file changed, 2 insertions(+), 2 deletions(-)
> > > > > >
> > > > > > diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
> > > > > > index 5b28a9c0cb68..d53aee5a64c1 100644
> > > > > > --- a/arch/x86/kernel/cpu/sgx/ioctl.c
> > > > > > +++ b/arch/x86/kernel/cpu/sgx/ioctl.c
> > > > > > @@ -259,7 +259,7 @@ static long sgx_ioc_enclave_create(struct sgx_encl *encl, void __user *arg)
> > > > > > if (copy_from_user(&ecreate, arg, sizeof(ecreate)))
> > > > > > return -EFAULT;
> > > > > >
> > > > > > - secs_page = alloc_page(GFP_HIGHUSER);
> > > > > > + secs_page = alloc_page(GFP_KERNEL);
> > > > > > if (!secs_page)
> > > > > > return -ENOMEM;
> > > > > >
> > > > > > @@ -674,7 +674,7 @@ static long sgx_ioc_enclave_init(struct sgx_encl *encl, void __user *arg)
> > > > > > if (copy_from_user(&einit, arg, sizeof(einit)))
> > > > > > return -EFAULT;
> > > > > >
> > > > > > - initp_page = alloc_page(GFP_HIGHUSER);
> > > > > > + initp_page = alloc_page(GFP_KERNEL);
> > > > >
> > > > > Would it make sense to use GFP_KERNEL_ACCOUNT? The accounting would be
> > > > > weird for the case where userspace is using a builder process, but even in
> > > > > that case it's not flat out wrong to account per-enclave memory allocations.
> > > >
> > > > I did not find a single call site that would use that for allocating
> > > > memory for function-internal data.
> > >
> > > Actually, the fact that the allocations are transient is an even better
> > > argument for accounting the memory, as the weirdness I was referring to
> > > doesn't exist for the builder concept.
> > >
> > > But looking more closely, Documentation/core-api/memory-allocation.rst
> > > states:
> > >
> > > * Untrusted allocations triggered from userspace should be a subject
> > > of kmem accounting and must have ``__GFP_ACCOUNT`` bit set. There
> > > is the handy ``GFP_KERNEL_ACCOUNT`` shortcut for ``GFP_KERNEL``
> > > allocations that should be accounted.
> > >
> > > That means all uses of GFP_KERNEL except in sgx_alloc_epc_section() should
> > > be converted to GFP_KERNEL_ACCOUNTED. As is, depending on fd limits[*], a
> > > single process can easily burn through multiple GBs of memory simply by
> > > opening /dev/sgx/enclave in a loop.
> >
> > What does the documentation mean by untrusted allocaton?
> >
> > __GFP_ACCOUNT kernel and GFP_KERNEL_ACCOUNT are both quite alien flags
> > to me as is kmemcg. Things that I know that exist but have never had to
> > deal with them.
> >
> > Looking at the kernel source code they rarely get used. Many drivers
> > have process bound data structures but none of the drivers use these
> > flags. I'm wondering why.
> >
> > Why sgx_alloc_epc_section() is a use case given that it is something
> > that allocates memory for the global EPC database?
> >
> > > [*] AFAICT, systemd is upping the max number of open files to 1M on my
> > > systems. I don't _think_ I changed a setting anywhere?
>
> Anyway, the tree is now updated:
>
> diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
> index 5b82670bb79a..d53aee5a64c1 100644
> --- a/arch/x86/kernel/cpu/sgx/ioctl.c
> +++ b/arch/x86/kernel/cpu/sgx/ioctl.c
> @@ -259,7 +259,7 @@ static long sgx_ioc_enclave_create(struct sgx_encl *encl, void __user *arg)
> if (copy_from_user(&ecreate, arg, sizeof(ecreate)))
> return -EFAULT;
>
> - secs_page = alloc_page(GFP_HIGHUSER);
> + secs_page = alloc_page(GFP_KERNEL);
> if (!secs_page)
> return -ENOMEM;
>
> @@ -427,12 +427,20 @@ static int sgx_encl_add_page(struct sgx_encl *encl,
>
> if (addp->flags & SGX_PAGE_MEASURE) {
> ret = __sgx_encl_extend(encl, epc_page);
> - if (ret)
> +
> + /*
> + * Destroy the enclave if EEXTEND fails, EADD can't be undone.
> + * Note, destroy() also frees the resources for the added page.
> + */
> + if (ret) {
> sgx_encl_destroy(encl);
> - else
> - sgx_mark_page_reclaimable(encl_page->epc_page);
> + goto out_unlock;
> + }
> }
>
> + sgx_mark_page_reclaimable(encl_page->epc_page);
> +
> +out_unlock:
> mutex_unlock(&encl->lock);
> up_read(¤t->mm->mmap_sem);
> return ret;
> @@ -666,7 +674,7 @@ static long sgx_ioc_enclave_init(struct sgx_encl *encl, void __user *arg)
> if (copy_from_user(&einit, arg, sizeof(einit)))
> return -EFAULT;
>
> - initp_page = alloc_page(GFP_HIGHUSER);
> + initp_page = alloc_page(GFP_KERNEL);
> if (!initp_page)
> return -ENOMEM;
>
> Hope that all the updates will be fairly localized :-)
Also removed some patches on top that were pushed by accident
(patches under review).
/Jarkko