* [PATCH RFC 1/2] rlimits: add infra to report violations
2016-09-07 10:27 ` [PATCH RFC 0/2] " Yauheni Kaliuta
@ 2016-09-07 10:27 ` Yauheni Kaliuta
2016-09-07 10:27 ` [PATCH RFC 2/2] rlimits: report resource limits violations Yauheni Kaliuta
2016-09-09 9:28 ` [PATCH RFC 0/2] rlimit exceed notification events Christoph Hellwig
2 siblings, 0 replies; 9+ messages in thread
From: Yauheni Kaliuta @ 2016-09-07 10:27 UTC (permalink / raw)
To: linux-kernel; +Cc: aris, jolsa
The patch defines tracepoints for resource limits (rlimits) violations
reporting and adds a thin layer to be called from rlimits aware code
without direct dependency of the tracepoints.
Signed-off-by: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
---
include/linux/resource.h | 5 +++
kernel/Makefile | 4 +-
kernel/rlimit.c | 26 +++++++++++
kernel/trace-rlimit.h | 112 +++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 146 insertions(+), 1 deletion(-)
create mode 100644 kernel/rlimit.c
create mode 100644 kernel/trace-rlimit.h
diff --git a/include/linux/resource.h b/include/linux/resource.h
index 5bc3116e649c..f4de2827e647 100644
--- a/include/linux/resource.h
+++ b/include/linux/resource.h
@@ -9,5 +9,10 @@ struct task_struct;
int getrusage(struct task_struct *p, int who, struct rusage __user *ru);
int do_prlimit(struct task_struct *tsk, unsigned int resource,
struct rlimit *new_rlim, struct rlimit *old_rlim);
+void rlimit_exceeded_task(int rlimit_id, u64 req, struct task_struct *task);
+void rlimit_exceeded(int rlimit_id, u64 req);
+void rlimit_hard_exceeded_task(int rlimit_id, u64 req,
+ struct task_struct *task);
+void rlimit_hard_exceeded(int rlimit_id, u64 req);
#endif
diff --git a/kernel/Makefile b/kernel/Makefile
index e2ec54e2b952..30999d83a261 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -9,7 +9,7 @@ obj-y = fork.o exec_domain.o panic.o \
extable.o params.o \
kthread.o sys_ni.o nsproxy.o \
notifier.o ksysfs.o cred.o reboot.o \
- async.o range.o smpboot.o
+ async.o range.o smpboot.o rlimit.o
obj-$(CONFIG_MULTIUSER) += groups.o
@@ -18,6 +18,8 @@ ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_irq_work.o = $(CC_FLAGS_FTRACE)
endif
+CFLAGS_rlimit.o := -I$(src)
+
# Prevents flicker of uninteresting __do_softirq()/__local_bh_disable_ip()
# in coverage traces.
KCOV_INSTRUMENT_softirq.o := n
diff --git a/kernel/rlimit.c b/kernel/rlimit.c
new file mode 100644
index 000000000000..0b42ebc3a9d5
--- /dev/null
+++ b/kernel/rlimit.c
@@ -0,0 +1,26 @@
+
+#include <linux/resource.h>
+
+#define CREATE_TRACE_POINTS
+#include "trace-rlimit.h"
+
+void rlimit_exceeded_task(int rlimit_id, u64 req, struct task_struct *task)
+{
+ trace_rlimit_exceeded(rlimit_id, task_rlimit(task, rlimit_id), req,
+ task_pid_nr(task), task->comm);
+}
+
+void rlimit_exceeded(int rlimit_id, u64 req)
+{
+ rlimit_exceeded_task(rlimit_id, req, current);
+}
+
+void rlimit_hard_exceeded_task(int rlimit_id, u64 req, struct task_struct *task)
+{
+ trace_rlimit_hard_exceeded(rlimit_id, task_rlimit_max(task, rlimit_id),
+ req, task_pid_nr(task), task->comm);
+}
+void rlimit_hard_exceeded(int rlimit_id, u64 req)
+{
+ rlimit_hard_exceeded_task(rlimit_id, req, current);
+}
diff --git a/kernel/trace-rlimit.h b/kernel/trace-rlimit.h
new file mode 100644
index 000000000000..e7433ae8a09e
--- /dev/null
+++ b/kernel/trace-rlimit.h
@@ -0,0 +1,112 @@
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rlimit
+
+#if !defined(_TRACE_RLIMIT_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_RLIMIT_H
+#include <linux/tracepoint.h>
+
+TRACE_DEFINE_ENUM(RLIMIT_CPU);
+TRACE_DEFINE_ENUM(RLIMIT_FSIZE);
+TRACE_DEFINE_ENUM(RLIMIT_DATA);
+TRACE_DEFINE_ENUM(RLIMIT_STACK);
+TRACE_DEFINE_ENUM(RLIMIT_CORE);
+TRACE_DEFINE_ENUM(RLIMIT_RSS);
+TRACE_DEFINE_ENUM(RLIMIT_NPROC);
+TRACE_DEFINE_ENUM(RLIMIT_NOFILE);
+TRACE_DEFINE_ENUM(RLIMIT_MEMLOCK);
+TRACE_DEFINE_ENUM(RLIMIT_AS);
+TRACE_DEFINE_ENUM(RLIMIT_LOCKS);
+TRACE_DEFINE_ENUM(RLIMIT_SIGPENDING);
+TRACE_DEFINE_ENUM(RLIMIT_MSGQUEUE);
+TRACE_DEFINE_ENUM(RLIMIT_NICE);
+TRACE_DEFINE_ENUM(RLIMIT_RTPRIO);
+TRACE_DEFINE_ENUM(RLIMIT_RTTIME);
+
+
+#define __print_rlimit_name(id_var) \
+ __print_symbolic(id_var, \
+ { RLIMIT_CPU, "CPU" }, \
+ { RLIMIT_FSIZE, "FSIZE" }, \
+ { RLIMIT_DATA, "DATA" }, \
+ { RLIMIT_STACK, "STACK" }, \
+ { RLIMIT_CORE, "CORE" }, \
+ { RLIMIT_RSS, "RSS" }, \
+ { RLIMIT_NPROC, "NPROC" }, \
+ { RLIMIT_NOFILE, "NOFILE" }, \
+ { RLIMIT_MEMLOCK, "MEMLOCK" }, \
+ { RLIMIT_AS, "AS" }, \
+ { RLIMIT_LOCKS, "LOCKS" }, \
+ { RLIMIT_SIGPENDING, "SIGPENDING" }, \
+ { RLIMIT_MSGQUEUE, "MSGQUEUE" }, \
+ { RLIMIT_NICE, "NICE" }, \
+ { RLIMIT_RTPRIO, "RTPRIO" }, \
+ { RLIMIT_RTTIME, "RTTIME" })
+
+DECLARE_EVENT_CLASS(rlimit_exceeded_template,
+
+ TP_PROTO(int rlimit_id,
+ unsigned long long cur,
+ unsigned long long req,
+ pid_t pid,
+ char *comm),
+
+ TP_ARGS(rlimit_id, cur, req, pid, comm),
+
+ TP_STRUCT__entry(
+ __field(int, rlimit_id)
+ __field(unsigned long long, cur)
+ __field(unsigned long long, req)
+ __field(pid_t, pid)
+ __string(comm, comm)
+ ),
+ TP_fast_assign(
+ __entry->rlimit_id = rlimit_id;
+ __entry->cur = cur;
+ __entry->req = req;
+ __entry->pid = pid;
+ __assign_str(comm, comm);
+ ),
+ TP_printk("RLIMIT %s violation [%s:%d]. Limit %llu, requested %s",
+ __print_rlimit_name(__entry->rlimit_id),
+ __get_str(comm),
+ __entry->pid,
+ __entry->cur,
+ __print_symbolic(__entry->req,
+ {(unsigned long long)-1, "Unknown"}))
+ );
+
+DEFINE_EVENT(rlimit_exceeded_template, rlimit_exceeded,
+ TP_PROTO(int rlimit_id,
+ unsigned long long cur,
+ unsigned long long req,
+ pid_t pid,
+ char *comm),
+
+ TP_ARGS(rlimit_id, cur, req, pid, comm)
+ );
+
+DEFINE_EVENT_PRINT(rlimit_exceeded_template, rlimit_hard_exceeded,
+ TP_PROTO(int rlimit_id,
+ unsigned long long cur,
+ unsigned long long req,
+ pid_t pid,
+ char *comm),
+
+ TP_ARGS(rlimit_id, cur, req, pid, comm),
+
+ TP_printk("Hard RLIMIT %s violation [%s:%d]. Limit %llu, requested %s",
+ __print_rlimit_name(__entry->rlimit_id),
+ __get_str(comm),
+ __entry->pid,
+ __entry->cur,
+ __print_symbolic(__entry->req,
+ {(unsigned long long)-1, "Unknown"}))
+ );
+
+#endif /* _TRACE_RLIMIT_H */
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace-rlimit
+#include <trace/define_trace.h>
--
2.7.4
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH RFC 2/2] rlimits: report resource limits violations
2016-09-07 10:27 ` [PATCH RFC 0/2] " Yauheni Kaliuta
2016-09-07 10:27 ` [PATCH RFC 1/2] rlimits: add infra to report violations Yauheni Kaliuta
@ 2016-09-07 10:27 ` Yauheni Kaliuta
2016-09-07 21:20 ` Alexei Starovoitov
2016-09-09 9:28 ` [PATCH RFC 0/2] rlimit exceed notification events Christoph Hellwig
2 siblings, 1 reply; 9+ messages in thread
From: Yauheni Kaliuta @ 2016-09-07 10:27 UTC (permalink / raw)
To: linux-kernel; +Cc: aris, jolsa
The patch instrument different places of resource limits checks with
reporting using the infrastructure from the previous patch.
Signed-off-by: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
---
arch/ia64/kernel/perfmon.c | 4 +++-
arch/powerpc/kvm/book3s_64_vio.c | 6 ++++--
arch/powerpc/mm/mmu_context_iommu.c | 6 ++++--
drivers/android/binder.c | 7 ++++++-
drivers/infiniband/core/umem.c | 1 +
drivers/infiniband/hw/hfi1/user_pages.c | 5 ++++-
drivers/infiniband/hw/qib/qib_user_pages.c | 1 +
drivers/infiniband/hw/usnic/usnic_uiom.c | 1 +
drivers/misc/mic/scif/scif_rma.c | 1 +
drivers/vfio/vfio_iommu_spapr_tce.c | 6 ++++--
drivers/vfio/vfio_iommu_type1.c | 4 ++++
fs/attr.c | 4 +++-
fs/binfmt_aout.c | 4 +++-
fs/binfmt_flat.c | 1 +
fs/coredump.c | 4 +++-
fs/exec.c | 14 ++++++++++----
fs/file.c | 26 +++++++++++++++++++++-----
fs/select.c | 4 +++-
include/linux/mm.h | 7 ++++++-
ipc/mqueue.c | 10 ++++++++--
ipc/shm.c | 1 +
kernel/bpf/syscall.c | 15 ++++++++++++---
kernel/events/core.c | 1 +
kernel/fork.c | 9 ++++++---
kernel/sched/core.c | 17 +++++++++++++----
kernel/signal.c | 7 ++++---
kernel/sys.c | 9 ++++++---
kernel/time/posix-cpu-timers.c | 8 ++++++++
mm/mlock.c | 14 +++++++++++++-
mm/mmap.c | 19 +++++++++++++++----
mm/mremap.c | 4 +++-
net/unix/af_unix.c | 9 ++++++---
32 files changed, 179 insertions(+), 50 deletions(-)
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 2436ad5f92c1..c765e94a7089 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2259,8 +2259,10 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t
* if ((mm->total_vm << PAGE_SHIFT) + len> task->rlim[RLIMIT_AS].rlim_cur)
* return -ENOMEM;
*/
- if (size > task_rlimit(task, RLIMIT_MEMLOCK))
+ if (size > task_rlimit(task, RLIMIT_MEMLOCK)) {
+ rlimit_exceeded_task(RLIMIT_MEMLOCK, size, task);
return -ENOMEM;
+ }
/*
* We do the easy to undo allocations first.
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index c379ff5a4438..a0477260d398 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -67,10 +67,12 @@ static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc)
locked = current->mm->locked_vm + stt_pages;
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
- if (locked > lock_limit && !capable(CAP_IPC_LOCK))
+ if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
+ rlimit_exceeded(RLIMIT_MEMLOCK, locked << PAGE_SHIFT);
ret = -ENOMEM;
- else
+ } else {
current->mm->locked_vm += stt_pages;
+ }
} else {
if (WARN_ON_ONCE(stt_pages > current->mm->locked_vm))
stt_pages = current->mm->locked_vm;
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index da6a2168ae9e..421890d325df 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -42,10 +42,12 @@ static long mm_iommu_adjust_locked_vm(struct mm_struct *mm,
if (incr) {
locked = mm->locked_vm + npages;
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
- if (locked > lock_limit && !capable(CAP_IPC_LOCK))
+ if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
+ rlimit_exceeded(RLIMIT_MEMLOCK, locked << PAGE_SHIFT);
ret = -ENOMEM;
- else
+ } else {
mm->locked_vm += npages;
+ }
} else {
if (WARN_ON_ONCE(npages > mm->locked_vm))
npages = mm->locked_vm;
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 16288e777ec3..a44021051a02 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -379,6 +379,7 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
struct files_struct *files = proc->files;
unsigned long rlim_cur;
unsigned long irqs;
+ int ret;
if (files == NULL)
return -ESRCH;
@@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
unlock_task_sighand(proc->tsk, &irqs);
- return __alloc_fd(files, 0, rlim_cur, flags);
+ ret = __alloc_fd(files, 0, rlim_cur, flags);
+ if (ret == -EMFILE)
+ rlimit_exceeded_task(RLIMIT_NOFILE, (u64)-1, proc->tsk);
+
+ return ret;
}
/*
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index c68746ce6624..8d7746b3a5c9 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -168,6 +168,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
+ rlimit_exceeded(RLIMIT_MEMLOCK, locked << PAGE_SHIFT);
ret = -ENOMEM;
goto out;
}
diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c
index 20f4ddcac3b0..1f510a13ed3b 100644
--- a/drivers/infiniband/hw/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c
@@ -95,8 +95,11 @@ bool hfi1_can_pin_pages(struct hfi1_devdata *dd, struct mm_struct *mm,
up_read(&mm->mmap_sem);
/* First, check the absolute limit against all pinned pages. */
- if (pinned + npages >= ulimit && !can_lock)
+ if (pinned + npages >= ulimit && !can_lock) {
+ /* if it's in pages, should be converted to bytes? */
+ rlimit_exceeded(RLIMIT_MEMLOCK, pinned + npages);
return false;
+ }
return ((nlocked + npages) <= size) || can_lock;
}
diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c
index 2d2b94fd3633..649a0a1317bb 100644
--- a/drivers/infiniband/hw/qib/qib_user_pages.c
+++ b/drivers/infiniband/hw/qib/qib_user_pages.c
@@ -61,6 +61,7 @@ static int __qib_get_user_pages(unsigned long start_page, size_t num_pages,
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
if (num_pages > lock_limit && !capable(CAP_IPC_LOCK)) {
+ rlimit_exceeded(RLIMIT_MEMLOCK, num_pages << PAGE_SHIFT);
ret = -ENOMEM;
goto bail;
}
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index a0b6ebee4d8a..11367fcc238b 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -129,6 +129,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
+ rlimit_exceeded(RLIMIT_MEMLOCK, locked << PAGE_SHIFT);
ret = -ENOMEM;
goto out;
}
diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c
index e0203b1a20fd..0e83d14cda06 100644
--- a/drivers/misc/mic/scif/scif_rma.c
+++ b/drivers/misc/mic/scif/scif_rma.c
@@ -303,6 +303,7 @@ static inline int __scif_check_inc_pinned_vm(struct mm_struct *mm,
dev_err(scif_info.mdev.this_device,
"locked(%lu) > lock_limit(%lu)\n",
locked, lock_limit);
+ rlimit_exceeded(RLIMIT_MEMLOCK, locked << PAGE_SHIFT);
return -ENOMEM;
}
mm->pinned_vm = locked;
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index 80378ddadc5c..5ff1773b01f4 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -44,10 +44,12 @@ static long try_increment_locked_vm(long npages)
down_write(¤t->mm->mmap_sem);
locked = current->mm->locked_vm + npages;
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
- if (locked > lock_limit && !capable(CAP_IPC_LOCK))
+ if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
+ rlimit_exceeded(RLIMIT_MEMLOCK, locked << PAGE_SHIFT);
ret = -ENOMEM;
- else
+ } else {
current->mm->locked_vm += npages;
+ }
pr_debug("[%d] RLIMIT_MEMLOCK +%ld %ld/%ld%s\n", current->pid,
npages << PAGE_SHIFT,
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 2ba19424e4a1..6929c0eaac9d 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -280,6 +280,8 @@ static long vfio_pin_pages(unsigned long vaddr, long npage,
put_pfn(*pfn_base, prot);
pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__,
limit << PAGE_SHIFT);
+ rlimit_exceeded(RLIMIT_MEMLOCK,
+ (current->mm->locked_vm + 1) << PAGE_SHIFT);
return -ENOMEM;
}
@@ -308,6 +310,8 @@ static long vfio_pin_pages(unsigned long vaddr, long npage,
put_pfn(pfn, prot);
pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",
__func__, limit << PAGE_SHIFT);
+ rlimit_exceeded(RLIMIT_MEMLOCK,
+ (current->mm->locked_vm + i + 1) << PAGE_SHIFT);
break;
}
}
diff --git a/fs/attr.c b/fs/attr.c
index 42bb42bb3c72..62d3de88ab42 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -102,8 +102,10 @@ int inode_newsize_ok(const struct inode *inode, loff_t offset)
unsigned long limit;
limit = rlimit(RLIMIT_FSIZE);
- if (limit != RLIM_INFINITY && offset > limit)
+ if (limit != RLIM_INFINITY && offset > limit) {
+ rlimit_exceeded(RLIMIT_FSIZE, offset);
goto out_sig;
+ }
if (offset > inode->i_sb->s_maxbytes)
goto out_big;
} else {
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index ae1b5404fced..9041ef2d419a 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -228,8 +228,10 @@ static int load_aout_binary(struct linux_binprm * bprm)
rlim = rlimit(RLIMIT_DATA);
if (rlim >= RLIM_INFINITY)
rlim = ~0;
- if (ex.a_data + ex.a_bss > rlim)
+ if (ex.a_data + ex.a_bss > rlim) {
+ rlimit_exceeded(RLIMIT_DATA, data_len + bss_len);
return -ENOMEM;
+ }
/* Flush all traces of the currently running executable */
retval = flush_old_exec(bprm);
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 9b2917a30294..042864d44dff 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -512,6 +512,7 @@ static int load_flat_file(struct linux_binprm *bprm,
if (rlim >= RLIM_INFINITY)
rlim = ~0;
if (data_len + bss_len > rlim) {
+ rlimit_exceeded(RLIMIT_DATA, data_len + bss_len);
ret = -ENOMEM;
goto err;
}
diff --git a/fs/coredump.c b/fs/coredump.c
index 281b768000e6..8c7b6cadf262 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -784,8 +784,10 @@ int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
struct file *file = cprm->file;
loff_t pos = file->f_pos;
ssize_t n;
- if (cprm->written + nr > cprm->limit)
+ if (cprm->written + nr > cprm->limit) {
+ rlimit_exceeded(RLIMIT_CORE, cprm->written + nr);
return 0;
+ }
while (nr) {
if (dump_interrupted())
return 0;
diff --git a/fs/exec.c b/fs/exec.c
index 6fcfb3f7b137..6edc0eeeece0 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -230,6 +230,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
*/
rlim = current->signal->rlim;
if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4) {
+ /* should it be reported somehow? */
put_page(page);
return NULL;
}
@@ -1650,10 +1651,15 @@ static int do_execveat_common(int fd, struct filename *filename,
* don't check setuid() return code. Here we additionally recheck
* whether NPROC limit is still exceeded.
*/
- if ((current->flags & PF_NPROC_EXCEEDED) &&
- atomic_read(¤t_user()->processes) > rlimit(RLIMIT_NPROC)) {
- retval = -EAGAIN;
- goto out_ret;
+ if (current->flags & PF_NPROC_EXCEEDED) {
+ int nproc;
+
+ nproc = atomic_read(¤t_user()->processes);
+ if (nproc > rlimit(RLIMIT_NPROC)) {
+ rlimit_exceeded(RLIMIT_NPROC, nproc);
+ retval = -EAGAIN;
+ goto out_ret;
+ }
}
/* We're below the limit (still or again), so we don't want to make
diff --git a/fs/file.c b/fs/file.c
index 6b1acdfe59da..d76fbb15e4ec 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -554,12 +554,22 @@ out:
static int alloc_fd(unsigned start, unsigned flags)
{
- return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags);
+ int ret;
+
+ ret = __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags);
+ if (ret == -EMFILE)
+ rlimit_exceeded(RLIMIT_NOFILE, (u64)-1);
+ return ret;
}
int get_unused_fd_flags(unsigned flags)
{
- return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags);
+ int ret;
+
+ ret = __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags);
+ if (ret == -EMFILE)
+ rlimit_exceeded(RLIMIT_NOFILE, (u64)-1);
+ return ret;
}
EXPORT_SYMBOL(get_unused_fd_flags);
@@ -872,8 +882,10 @@ int replace_fd(unsigned fd, struct file *file, unsigned flags)
if (!file)
return __close_fd(files, fd);
- if (fd >= rlimit(RLIMIT_NOFILE))
+ if (fd >= rlimit(RLIMIT_NOFILE)) {
+ rlimit_exceeded(RLIMIT_NOFILE, fd);
return -EBADF;
+ }
spin_lock(&files->file_lock);
err = expand_files(files, fd);
@@ -898,8 +910,10 @@ SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
if (unlikely(oldfd == newfd))
return -EINVAL;
- if (newfd >= rlimit(RLIMIT_NOFILE))
+ if (newfd >= rlimit(RLIMIT_NOFILE)) {
+ rlimit_exceeded(RLIMIT_NOFILE, newfd);
return -EBADF;
+ }
spin_lock(&files->file_lock);
err = expand_files(files, newfd);
@@ -953,8 +967,10 @@ SYSCALL_DEFINE1(dup, unsigned int, fildes)
int f_dupfd(unsigned int from, struct file *file, unsigned flags)
{
int err;
- if (from >= rlimit(RLIMIT_NOFILE))
+ if (from >= rlimit(RLIMIT_NOFILE)) {
+ rlimit_exceeded(RLIMIT_NOFILE, from);
return -EINVAL;
+ }
err = alloc_fd(from, flags);
if (err >= 0) {
get_file(file);
diff --git a/fs/select.c b/fs/select.c
index 8ed9da50896a..adb057ce7897 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -886,8 +886,10 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds,
struct poll_list *walk = head;
unsigned long todo = nfds;
- if (nfds > rlimit(RLIMIT_NOFILE))
+ if (nfds > rlimit(RLIMIT_NOFILE)) {
+ rlimit_exceeded(RLIMIT_NOFILE, nfds);
return -EINVAL;
+ }
len = min_t(unsigned int, nfds, N_STACK_PPS);
for (;;) {
diff --git a/include/linux/mm.h b/include/linux/mm.h
index ef815b9cd426..1ef5ed878895 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2002,8 +2002,13 @@ static inline int check_data_rlimit(unsigned long rlim,
unsigned long start_data)
{
if (rlim < RLIM_INFINITY) {
- if (((new - start) + (end_data - start_data)) > rlim)
+ unsigned long data_size;
+
+ data_size = (new - start) + (end_data - start_data);
+ if (data_size > rlim) {
+ rlimit_exceeded(RLIMIT_DATA, data_size);
return -ENOSPC;
+ }
}
return 0;
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 0b13ace266f2..85ac1b643522 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -275,13 +275,19 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
info->attr.mq_msgsize);
spin_lock(&mq_lock);
- if (u->mq_bytes + mq_bytes < u->mq_bytes ||
- u->mq_bytes + mq_bytes > rlimit(RLIMIT_MSGQUEUE)) {
+ if (u->mq_bytes + mq_bytes < u->mq_bytes) {
spin_unlock(&mq_lock);
/* mqueue_evict_inode() releases info->messages */
ret = -EMFILE;
goto out_inode;
}
+ if (u->mq_bytes + mq_bytes > rlimit(RLIMIT_MSGQUEUE)) {
+ spin_unlock(&mq_lock);
+ rlimit_exceeded(RLIMIT_MSGQUEUE, u->mq_bytes + mq_bytes);
+ /* mqueue_evict_inode() releases info->messages */
+ ret = -EMFILE;
+ goto out_inode;
+ }
u->mq_bytes += mq_bytes;
spin_unlock(&mq_lock);
diff --git a/ipc/shm.c b/ipc/shm.c
index dbac8860c721..640f17ae6094 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -1034,6 +1034,7 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
goto out_unlock0;
}
if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) {
+ rlimit_exceeded(RLIMIT_MEMLOCK, (u64)-1);
err = -EPERM;
goto out_unlock0;
}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 228f962447a5..8494c1fe921e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -56,8 +56,11 @@ int bpf_map_precharge_memlock(u32 pages)
memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
cur = atomic_long_read(&user->locked_vm);
free_uid(user);
- if (cur + pages > memlock_limit)
+ if (cur + pages > memlock_limit) {
+ rlimit_exceeded(RLIMIT_MEMLOCK,
+ (cur + pages) << PAGE_SHIFT);
return -EPERM;
+ }
return 0;
}
@@ -65,14 +68,17 @@ static int bpf_map_charge_memlock(struct bpf_map *map)
{
struct user_struct *user = get_current_user();
unsigned long memlock_limit;
+ int npages;
memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
atomic_long_add(map->pages, &user->locked_vm);
- if (atomic_long_read(&user->locked_vm) > memlock_limit) {
+ npages = atomic_long_read(&user->locked_vm);
+ if (npages > memlock_limit) {
atomic_long_sub(map->pages, &user->locked_vm);
free_uid(user);
+ rlimit_exceeded(RLIMIT_MEMLOCK, npages << PAGE_SHIFT);
return -EPERM;
}
map->user = user;
@@ -603,13 +609,16 @@ static int bpf_prog_charge_memlock(struct bpf_prog *prog)
{
struct user_struct *user = get_current_user();
unsigned long memlock_limit;
+ int npages;
memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
atomic_long_add(prog->pages, &user->locked_vm);
- if (atomic_long_read(&user->locked_vm) > memlock_limit) {
+ npages = atomic_long_read(&user->locked_vm);
+ if (npages > memlock_limit) {
atomic_long_sub(prog->pages, &user->locked_vm);
free_uid(user);
+ rlimit_exceeded(RLIMIT_MEMLOCK, npages << PAGE_SHIFT);
return -EPERM;
}
prog->aux->user = user;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3cfabdf7b942..b74bf90d1fd4 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5166,6 +5166,7 @@ accounting:
if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() &&
!capable(CAP_IPC_LOCK)) {
+ rlimit_exceeded(RLIMIT_MEMLOCK, locked << PAGE_SHIFT);
ret = -EPERM;
goto unlock;
}
diff --git a/kernel/fork.c b/kernel/fork.c
index beb31725f7e2..a80f2e11788d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1307,6 +1307,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
{
int retval;
struct task_struct *p;
+ int nproc;
if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
return ERR_PTR(-EINVAL);
@@ -1368,11 +1369,13 @@ static struct task_struct *copy_process(unsigned long clone_flags,
DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
#endif
retval = -EAGAIN;
- if (atomic_read(&p->real_cred->user->processes) >=
- task_rlimit(p, RLIMIT_NPROC)) {
+ nproc = atomic_read(&p->real_cred->user->processes);
+ if (nproc >= task_rlimit(p, RLIMIT_NPROC)) {
if (p->real_cred->user != INIT_USER &&
- !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
+ !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) {
+ rlimit_exceeded_task(RLIMIT_NPROC, nproc, p);
goto bad_fork_free;
+ }
}
current->flags &= ~PF_NPROC_EXCEEDED;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2a906f20fba7..1c66b3088684 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3736,9 +3736,13 @@ int can_nice(const struct task_struct *p, const int nice)
{
/* convert nice value [19,-20] to rlimit style value [1,40] */
int nice_rlim = nice_to_rlimit(nice);
+ int ret;
- return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
- capable(CAP_SYS_NICE));
+ ret = (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
+ capable(CAP_SYS_NICE));
+ if (!ret)
+ rlimit_exceeded(RLIMIT_NICE, nice_rlim);
+ return ret;
}
#ifdef __ARCH_WANT_SYS_NICE
@@ -4070,13 +4074,18 @@ recheck:
task_rlimit(p, RLIMIT_RTPRIO);
/* can't set/change the rt policy */
- if (policy != p->policy && !rlim_rtprio)
+ if (policy != p->policy && !rlim_rtprio) {
+ rlimit_exceeded(RLIMIT_RTPRIO, (u64)-1);
return -EPERM;
+ }
/* can't increase priority */
if (attr->sched_priority > p->rt_priority &&
- attr->sched_priority > rlim_rtprio)
+ attr->sched_priority > rlim_rtprio) {
+ rlimit_exceeded(RLIMIT_RTPRIO,
+ attr->sched_priority);
return -EPERM;
+ }
}
/*
diff --git a/kernel/signal.c b/kernel/signal.c
index af21afc00d08..1c03ca7484f7 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -362,6 +362,7 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi
{
struct sigqueue *q = NULL;
struct user_struct *user;
+ int nsigs;
/*
* Protect access to @t credentials. This can go away when all
@@ -372,11 +373,11 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi
atomic_inc(&user->sigpending);
rcu_read_unlock();
- if (override_rlimit ||
- atomic_read(&user->sigpending) <=
- task_rlimit(t, RLIMIT_SIGPENDING)) {
+ nsigs = atomic_read(&user->sigpending);
+ if (override_rlimit || nsigs <= task_rlimit(t, RLIMIT_SIGPENDING)) {
q = kmem_cache_alloc(sigqueue_cachep, flags);
} else {
+ rlimit_exceeded_task(RLIMIT_SIGPENDING, nsigs, t);
print_dropped_signal(sig);
}
diff --git a/kernel/sys.c b/kernel/sys.c
index 89d5be418157..28b718ac1fb1 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -421,6 +421,7 @@ error:
static int set_user(struct cred *new)
{
struct user_struct *new_user;
+ int nproc;
new_user = alloc_uid(new->uid);
if (!new_user)
@@ -433,11 +434,13 @@ static int set_user(struct cred *new)
* for programs doing set*uid()+execve() by harmlessly deferring the
* failure to the execve() stage.
*/
- if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) &&
- new_user != INIT_USER)
+ nproc = atomic_read(&new_user->processes);
+ if (nproc >= rlimit(RLIMIT_NPROC) && new_user != INIT_USER) {
+ rlimit_exceeded(RLIMIT_NPROC, nproc);
current->flags |= PF_NPROC_EXCEEDED;
- else
+ } else {
current->flags &= ~PF_NPROC_EXCEEDED;
+ }
free_uid(new->user);
new->user = new_user;
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 39008d78927a..ce50f2166776 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -860,6 +860,9 @@ static void check_thread_timers(struct task_struct *tsk,
if (hard != RLIM_INFINITY &&
tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {
+ rlimit_hard_exceeded_task(RLIMIT_RTTIME,
+ tsk->rt.timeout,
+ tsk);
/*
* At the hard limit, we just die.
* No need to calculate anything else now.
@@ -875,6 +878,9 @@ static void check_thread_timers(struct task_struct *tsk,
soft += USEC_PER_SEC;
sig->rlim[RLIMIT_RTTIME].rlim_cur = soft;
}
+ rlimit_exceeded_task(RLIMIT_RTTIME,
+ tsk->rt.timeout,
+ tsk);
printk(KERN_INFO
"RT Watchdog Timeout: %s[%d]\n",
tsk->comm, task_pid_nr(tsk));
@@ -980,6 +986,7 @@ static void check_process_timers(struct task_struct *tsk,
READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_max);
cputime_t x;
if (psecs >= hard) {
+ rlimit_hard_exceeded(RLIMIT_CPU, psecs);
/*
* At the hard limit, we just die.
* No need to calculate anything else now.
@@ -988,6 +995,7 @@ static void check_process_timers(struct task_struct *tsk,
return;
}
if (psecs >= soft) {
+ rlimit_exceeded(RLIMIT_CPU, psecs);
/*
* At the soft limit, send a SIGXCPU every second.
*/
diff --git a/mm/mlock.c b/mm/mlock.c
index 14645be06e30..016c7089db04 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -28,6 +28,9 @@ bool can_do_mlock(void)
{
if (rlimit(RLIMIT_MEMLOCK) != 0)
return true;
+ else
+ rlimit_exceeded(RLIMIT_MEMLOCK, (u64)-1);
+
if (capable(CAP_IPC_LOCK))
return true;
return false;
@@ -643,6 +646,8 @@ static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t fla
/* check against resource limits */
if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))
error = apply_vma_lock_flags(start, len, flags);
+ else
+ rlimit_exceeded(RLIMIT_MEMLOCK, locked << PAGE_SHIFT);
up_write(¤t->mm->mmap_sem);
if (error)
@@ -757,6 +762,10 @@ SYSCALL_DEFINE1(mlockall, int, flags)
if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) ||
capable(CAP_IPC_LOCK))
ret = apply_mlockall_flags(flags);
+ else
+ rlimit_exceeded(RLIMIT_MEMLOCK,
+ current->mm->total_vm << PAGE_SHIFT);
+
up_write(¤t->mm->mmap_sem);
if (!ret && (flags & MCL_CURRENT))
mm_populate(0, TASK_SIZE);
@@ -793,8 +802,11 @@ int user_shm_lock(size_t size, struct user_struct *user)
lock_limit >>= PAGE_SHIFT;
spin_lock(&shmlock_user_lock);
if (!allowed &&
- locked + user->locked_shm > lock_limit && !capable(CAP_IPC_LOCK))
+ locked + user->locked_shm > lock_limit && !capable(CAP_IPC_LOCK)) {
+ rlimit_exceeded(RLIMIT_MEMLOCK,
+ (locked + user->locked_shm) << PAGE_SHIFT);
goto out;
+ }
get_uid(user);
user->locked_shm += locked;
allowed = 1;
diff --git a/mm/mmap.c b/mm/mmap.c
index ca9d91bca0d6..500a247f1759 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1139,8 +1139,10 @@ static inline int mlock_future_check(struct mm_struct *mm,
locked += mm->locked_vm;
lock_limit = rlimit(RLIMIT_MEMLOCK);
lock_limit >>= PAGE_SHIFT;
- if (locked > lock_limit && !capable(CAP_IPC_LOCK))
+ if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
+ rlimit_exceeded(RLIMIT_MEMLOCK, locked << PAGE_SHIFT);
return -EAGAIN;
+ }
}
return 0;
}
@@ -2012,8 +2014,10 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
actual_size = size;
if (size && (vma->vm_flags & (VM_GROWSUP | VM_GROWSDOWN)))
actual_size -= PAGE_SIZE;
- if (actual_size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
+ if (actual_size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur)) {
+ rlimit_exceeded(RLIMIT_STACK, actual_size);
return -ENOMEM;
+ }
/* mlock limit tests */
if (vma->vm_flags & VM_LOCKED) {
@@ -2022,8 +2026,10 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
locked = mm->locked_vm + grow;
limit = READ_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur);
limit >>= PAGE_SHIFT;
- if (locked > limit && !capable(CAP_IPC_LOCK))
+ if (locked > limit && !capable(CAP_IPC_LOCK)) {
+ rlimit_exceeded(RLIMIT_MEMLOCK, locked << PAGE_SHIFT);
return -ENOMEM;
+ }
}
/* Check to ensure the stack will not grow into a hugetlb-only region */
@@ -2925,8 +2931,11 @@ out:
*/
bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages)
{
- if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT)
+ if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT) {
+ rlimit_exceeded(RLIMIT_AS,
+ (mm->total_vm + npages) << PAGE_SHIFT);
return false;
+ }
if (is_data_mapping(flags) &&
mm->data_vm + npages > rlimit(RLIMIT_DATA) >> PAGE_SHIFT) {
@@ -2935,6 +2944,8 @@ bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages)
mm->data_vm + npages <= rlimit_max(RLIMIT_DATA) >> PAGE_SHIFT)
return true;
if (!ignore_rlimit_data) {
+ rlimit_exceeded(RLIMIT_DATA,
+ (mm->data_vm + npages) << PAGE_SHIFT);
pr_warn_once("%s (%d): VmData %lu exceed data ulimit %lu. Update limits or use boot option ignore_rlimit_data.\n",
current->comm, current->pid,
(mm->data_vm + npages) << PAGE_SHIFT,
diff --git a/mm/mremap.c b/mm/mremap.c
index da22ad2a5678..8755433ec79c 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -378,8 +378,10 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr,
locked = mm->locked_vm << PAGE_SHIFT;
lock_limit = rlimit(RLIMIT_MEMLOCK);
locked += new_len - old_len;
- if (locked > lock_limit && !capable(CAP_IPC_LOCK))
+ if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
+ rlimit_exceeded(RLIMIT_MEMLOCK, locked << PAGE_SHIFT);
return ERR_PTR(-EAGAIN);
+ }
}
if (!may_expand_vm(mm, vma->vm_flags,
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index f1dffe84f0d5..c365e5ab9ace 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1523,10 +1523,13 @@ static void unix_destruct_scm(struct sk_buff *skb)
static inline bool too_many_unix_fds(struct task_struct *p)
{
struct user_struct *user = current_user();
+ bool ret = false;
- if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
- return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
- return false;
+ if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE))) {
+ rlimit_exceeded_task(RLIMIT_NOFILE, user->unix_inflight, p);
+ ret = !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
+ }
+ return ret;
}
#define MAX_RECURSION_LEVEL 4
--
2.7.4
^ permalink raw reply related [flat|nested] 9+ messages in thread