On Tue, Nov 07, 2017 at 02:40:37AM +0000, Mathieu Desnoyers wrote: > ----- On Nov 6, 2017, at 9:07 PM, Boqun Feng boqun.feng@gmail.com wrote: > > > On Mon, Nov 06, 2017 at 03:56:38PM -0500, Mathieu Desnoyers wrote: > > [...] > >> +static int cpu_op_pin_pages(unsigned long addr, unsigned long len, > >> + struct page ***pinned_pages_ptr, size_t *nr_pinned, > >> + int write) > >> +{ > >> + struct page *pages[2]; > >> + int ret, nr_pages; > >> + > >> + if (!len) > >> + return 0; > >> + nr_pages = cpu_op_range_nr_pages(addr, len); > >> + BUG_ON(nr_pages > 2); > >> + if (*nr_pinned + nr_pages > NR_PINNED_PAGES_ON_STACK) { > > > > Is this a bug? Seems you will kzalloc() every time if *nr_pinned is > > bigger than NR_PINNED_PAGES_ON_STACK, which will result in memory > > leaking. > > > > I think the logic here is complex enough for us to introduce a > > structure, like: > > > > struct cpu_opv_page_pinner { > > int nr_pinned; > > bool is_kmalloc; > > struct page **pinned_pages; > > }; > > > > Thoughts? > > Good catch ! > > How about the attached diff ? I'll fold it into the rseq/dev tree. > Looks good to me ;-) Regards, Boqun > Thanks, > > Mathieu > > -- > Mathieu Desnoyers > EfficiOS Inc. > http://www.efficios.com > diff --git a/kernel/cpu_opv.c b/kernel/cpu_opv.c > index 09754bbe6a4f..3d8fd66416a0 100644 > --- a/kernel/cpu_opv.c > +++ b/kernel/cpu_opv.c > @@ -46,6 +46,12 @@ union op_fn_data { > #endif > }; > > +struct cpu_opv_pinned_pages { > + struct page **pages; > + size_t nr; > + bool is_kmalloc; > +}; > + > typedef int (*op_fn_t)(union op_fn_data *data, uint64_t v, uint32_t len); > > static DEFINE_MUTEX(cpu_opv_offline_lock); > @@ -217,8 +223,7 @@ static int cpu_op_check_pages(struct page **pages, > } > > static int cpu_op_pin_pages(unsigned long addr, unsigned long len, > - struct page ***pinned_pages_ptr, size_t *nr_pinned, > - int write) > + struct cpu_opv_pinned_pages *pin_pages, int write) > { > struct page *pages[2]; > int ret, nr_pages; > @@ -227,15 +232,17 @@ static int cpu_op_pin_pages(unsigned long addr, unsigned long len, > return 0; > nr_pages = cpu_op_range_nr_pages(addr, len); > BUG_ON(nr_pages > 2); > - if (*nr_pinned + nr_pages > NR_PINNED_PAGES_ON_STACK) { > + if (!pin_pages->is_kmalloc && pin_pages->nr + nr_pages > + > NR_PINNED_PAGES_ON_STACK) { > struct page **pinned_pages = > kzalloc(CPU_OP_VEC_LEN_MAX * CPU_OP_MAX_PAGES > * sizeof(struct page *), GFP_KERNEL); > if (!pinned_pages) > return -ENOMEM; > - memcpy(pinned_pages, *pinned_pages_ptr, > - *nr_pinned * sizeof(struct page *)); > - *pinned_pages_ptr = pinned_pages; > + memcpy(pinned_pages, pin_pages->pages, > + pin_pages->nr * sizeof(struct page *)); > + pin_pages->pages = pinned_pages; > + pin_pages->is_kmalloc = true; > } > again: > ret = get_user_pages_fast(addr, nr_pages, write, pages); > @@ -257,9 +264,9 @@ static int cpu_op_pin_pages(unsigned long addr, unsigned long len, > } > if (ret) > goto error; > - (*pinned_pages_ptr)[(*nr_pinned)++] = pages[0]; > + pin_pages->pages[pin_pages->nr++] = pages[0]; > if (nr_pages > 1) > - (*pinned_pages_ptr)[(*nr_pinned)++] = pages[1]; > + pin_pages->pages[pin_pages->nr++] = pages[1]; > return 0; > > error: > @@ -270,7 +277,7 @@ static int cpu_op_pin_pages(unsigned long addr, unsigned long len, > } > > static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt, > - struct page ***pinned_pages_ptr, size_t *nr_pinned) > + struct cpu_opv_pinned_pages *pin_pages) > { > int ret, i; > bool expect_fault = false; > @@ -289,7 +296,7 @@ static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt, > goto error; > ret = cpu_op_pin_pages( > (unsigned long)op->u.compare_op.a, > - op->len, pinned_pages_ptr, nr_pinned, 0); > + op->len, pin_pages, 0); > if (ret) > goto error; > ret = -EFAULT; > @@ -299,7 +306,7 @@ static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt, > goto error; > ret = cpu_op_pin_pages( > (unsigned long)op->u.compare_op.b, > - op->len, pinned_pages_ptr, nr_pinned, 0); > + op->len, pin_pages, 0); > if (ret) > goto error; > break; > @@ -311,7 +318,7 @@ static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt, > goto error; > ret = cpu_op_pin_pages( > (unsigned long)op->u.memcpy_op.dst, > - op->len, pinned_pages_ptr, nr_pinned, 1); > + op->len, pin_pages, 1); > if (ret) > goto error; > ret = -EFAULT; > @@ -321,7 +328,7 @@ static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt, > goto error; > ret = cpu_op_pin_pages( > (unsigned long)op->u.memcpy_op.src, > - op->len, pinned_pages_ptr, nr_pinned, 0); > + op->len, pin_pages, 0); > if (ret) > goto error; > break; > @@ -333,7 +340,7 @@ static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt, > goto error; > ret = cpu_op_pin_pages( > (unsigned long)op->u.arithmetic_op.p, > - op->len, pinned_pages_ptr, nr_pinned, 1); > + op->len, pin_pages, 1); > if (ret) > goto error; > break; > @@ -347,7 +354,7 @@ static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt, > goto error; > ret = cpu_op_pin_pages( > (unsigned long)op->u.bitwise_op.p, > - op->len, pinned_pages_ptr, nr_pinned, 1); > + op->len, pin_pages, 1); > if (ret) > goto error; > break; > @@ -360,7 +367,7 @@ static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt, > goto error; > ret = cpu_op_pin_pages( > (unsigned long)op->u.shift_op.p, > - op->len, pinned_pages_ptr, nr_pinned, 1); > + op->len, pin_pages, 1); > if (ret) > goto error; > break; > @@ -373,9 +380,9 @@ static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt, > return 0; > > error: > - for (i = 0; i < *nr_pinned; i++) > - put_page((*pinned_pages_ptr)[i]); > - *nr_pinned = 0; > + for (i = 0; i < pin_pages->nr; i++) > + put_page(pin_pages->pages[i]); > + pin_pages->nr = 0; > /* > * If faulting access is expected, return EAGAIN to user-space. > * It allows user-space to distinguish between a fault caused by > @@ -923,9 +930,12 @@ SYSCALL_DEFINE4(cpu_opv, struct cpu_op __user *, ucpuopv, int, cpuopcnt, > { > struct cpu_op cpuopv[CPU_OP_VEC_LEN_MAX]; > struct page *pinned_pages_on_stack[NR_PINNED_PAGES_ON_STACK]; > - struct page **pinned_pages = pinned_pages_on_stack; > + struct cpu_opv_pinned_pages pin_pages = { > + .pages = pinned_pages_on_stack, > + .nr = 0, > + .is_kmalloc = false, > + }; > int ret, i; > - size_t nr_pinned = 0; > > if (unlikely(flags)) > return -EINVAL; > @@ -938,15 +948,14 @@ SYSCALL_DEFINE4(cpu_opv, struct cpu_op __user *, ucpuopv, int, cpuopcnt, > ret = cpu_opv_check(cpuopv, cpuopcnt); > if (ret) > return ret; > - ret = cpu_opv_pin_pages(cpuopv, cpuopcnt, > - &pinned_pages, &nr_pinned); > + ret = cpu_opv_pin_pages(cpuopv, cpuopcnt, &pin_pages); > if (ret) > goto end; > ret = do_cpu_opv(cpuopv, cpuopcnt, cpu); > - for (i = 0; i < nr_pinned; i++) > - put_page(pinned_pages[i]); > + for (i = 0; i < pin_pages.nr; i++) > + put_page(pin_pages.pages[i]); > end: > - if (pinned_pages != pinned_pages_on_stack) > - kfree(pinned_pages); > + if (pin_pages.is_kmalloc) > + kfree(pin_pages.pages); > return ret; > }