All of lore.kernel.org
 help / color / mirror / Atom feed
* Re: blkdev loop UAF
       [not found] <654967ea.6cce.160e43379f6.Coremail.long7573@126.com>
@ 2018-01-11  8:24 ` Dan Carpenter
  2018-01-11 11:22   ` Hou Tao
  0 siblings, 1 reply; 5+ messages in thread
From: Dan Carpenter @ 2018-01-11  8:24 UTC (permalink / raw)
  To: Foy, Paolo Valente, Jens Axboe; +Cc: security, syzkaller, linux-block

Thanks for your report and the patch.  I am sending it to the
linux-block devs since it's already public.

regards,
dan carpenter

On Thu, Jan 11, 2018 at 03:51:06PM +0800, Foy wrote:
> BUG:
> diff --git a/fs/block_dev.c b/fs/block_dev.c
> index 4a181fc..db919a9 100644
> --- a/fs/block_dev.c
> +++ b/fs/block_dev.c
> @@ -1430,12 +1430,15 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
>   restart:
>  
>         ret = -ENXIO;
> +       //2. Process C: loop_control_ioctl ==> LOOP_CTL_REMOVE ==> idr_remove 
> +       //3. Process B: get_gendisk ==> get_gendisk ==> kobj_lookup ==> loop_probe ==> loop_add, get a new disk(2)
>         disk = get_gendisk(bdev->bd_dev, &partno);
>         if (!disk)
>                 goto out;
>         owner = disk->fops->owner;
>  
>         disk_block_events(disk);
> +       //1. Process A get the disk(1),before the mutex_lock_nested.And then be scheduled 
>         mutex_lock_nested(&bdev->bd_mutex, for_part);
>         if (!bdev->bd_openers) {
>                 bdev->bd_disk = disk;
> @@ -1524,14 +1527,17 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
>                         if (ret)
>                                 goto out_unlock_bdev;
>                 }
> +               //5. Process A disk(1) will be free,because disk(1)'s refs == 1
>                 /* only one opener holds refs to the module and disk */
>                 put_disk(disk);
>                 module_put(owner);
>         }
> +       //4. Process B: bdev->bd_openers != 0
>         bdev->bd_openers++;
>         if (for_part)
>                 bdev->bd_part_count++;
>         mutex_unlock(&bdev->bd_mutex);
> +       //6. Process A the disk(1) will be use
>         disk_unblock_events(disk);
>         return 0;
> 
> 
> 
> 
> Patch:
> diff --git a/fs/block_dev.c b/fs/block_dev.c
> index 4a181fc..1f5c7bf 100644
> --- a/fs/block_dev.c
> +++ b/fs/block_dev.c
> @@ -1526,13 +1526,15 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
>  }
>  /* only one opener holds refs to the module and disk */
>  put_disk(disk);
> +disk = NULL;
>  module_put(owner);
>  }
>  bdev->bd_openers++;
>  if (for_part)
>  bdev->bd_part_count++;
>  mutex_unlock(&bdev->bd_mutex);
> -disk_unblock_events(disk);
> +if (disk)
> +disk_unblock_events(disk);
>  return 0;
>  
>   out_clear:
> 
> 
> 
> 
> Crash:
> ==================================================================
> BUG: KASAN: use-after-free in disk_unblock_events+0x4b/0x50 block/genhd.c:1657
> Read of size 8 at addr ffff880035c273f8 by task syz-executor6/21165
> 
> 
> CPU: 0 PID: 21165 Comm: syz-executor6 Not tainted 4.15.0-rc6 #18
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
> Call Trace:
>  __dump_stack lib/dump_stack.c:17 [inline]
>  dump_stack+0x104/0x1c5 lib/dump_stack.c:53
>  print_address_description+0x6e/0x280 mm/kasan/report.c:252
>  kasan_report_error mm/kasan/report.c:351 [inline]
>  kasan_report+0x254/0x340 mm/kasan/report.c:409
>  disk_unblock_events+0x4b/0x50 block/genhd.c:1657
>  __blkdev_get+0x5f5/0xdb0 fs/block_dev.c:1535
>  blkdev_get+0x338/0x9e0 fs/block_dev.c:1591
>  blkdev_open+0x1bd/0x240 fs/block_dev.c:1749
>  do_dentry_open+0x682/0xd80 fs/open.c:752
>  vfs_open+0x107/0x220 fs/open.c:866
>  do_last fs/namei.c:3379 [inline]
>  path_openat+0x1051/0x3220 fs/namei.c:3519
>  do_filp_open+0x25b/0x3b0 fs/namei.c:3554
>  do_sys_open+0x4ab/0x650 fs/open.c:1059
>  entry_SYSCALL_64_fastpath+0x1f/0x96
> RIP: 0033:0x40cd41
> RSP: 002b:00007ff1c06e5780 EFLAGS: 00000293 ORIG_RAX: 0000000000000002
> RAX: ffffffffffffffda RBX: 000000000071bf58 RCX: 000000000040cd41
> RDX: 0000000000000000 RSI: 0000000000080102 RDI: 00007ff1c06e5830
> RBP: 00000000000001e4 R08: 000000000000ffff R09: 0000000000000000
> R10: 0000000020024400 R11: 0000000000000293 R12: 00000000006efe00
> R13: 00000000ffffffff R14: 00007ff1c06e66d4 R15: 0000000000000002
> 
> 
> Allocated by task 21138:
>  set_track mm/kasan/kasan.c:459 [inline]
>  kasan_kmalloc+0xa9/0xd0 mm/kasan/kasan.c:551
>  kmem_cache_alloc_node_trace+0x153/0x280 mm/slub.c:2780
>  kmalloc_node include/linux/slab.h:537 [inline]
>  kzalloc_node include/linux/slab.h:699 [inline]
>  __alloc_disk_node+0xab/0x490 block/genhd.c:1400
>  loop_add+0x42f/0xa00 drivers/block/loop.c:1808
>  loop_control_ioctl+0x11c/0x450 drivers/block/loop.c:1940
>  vfs_ioctl fs/ioctl.c:46 [inline]
>  do_vfs_ioctl+0x18b/0x13c0 fs/ioctl.c:686
>  SYSC_ioctl fs/ioctl.c:701 [inline]
>  SyS_ioctl+0x7e/0xb0 fs/ioctl.c:692
>  entry_SYSCALL_64_fastpath+0x1f/0x96
> 
> 
> Freed by task 21165:
>  set_track mm/kasan/kasan.c:459 [inline]
>  kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524
>  slab_free_hook mm/slub.c:1391 [inline]
>  slab_free_freelist_hook mm/slub.c:1412 [inline]
>  slab_free mm/slub.c:2968 [inline]
>  kfree+0xe2/0x2c0 mm/slub.c:3899
>  disk_release+0x300/0x3c0 block/genhd.c:1249
>  device_release+0x76/0x200 drivers/base/core.c:814
>  kobject_cleanup lib/kobject.c:648 [inline]
>  kobject_release lib/kobject.c:677 [inline]
>  kref_put include/linux/kref.h:70 [inline]
>  kobject_put+0x13d/0x230 lib/kobject.c:694
>  put_disk+0x1f/0x30 block/genhd.c:1465
>  __blkdev_get+0x560/0xdb0 fs/block_dev.c:1528
>  blkdev_get+0x338/0x9e0 fs/block_dev.c:1591
>  blkdev_open+0x1bd/0x240 fs/block_dev.c:1749
>  do_dentry_open+0x682/0xd80 fs/open.c:752
>  vfs_open+0x107/0x220 fs/open.c:866
>  do_last fs/namei.c:3379 [inline]
>  path_openat+0x1051/0x3220 fs/namei.c:3519
>  do_filp_open+0x25b/0x3b0 fs/namei.c:3554
>  do_sys_open+0x4ab/0x650 fs/open.c:1059
>  entry_SYSCALL_64_fastpath+0x1f/0x96
> 
> 
> The buggy address belongs to the object at ffff880035c26e80
>  which belongs to the cache kmalloc-2048 of size 2048
> The buggy address is located 1400 bytes inside of
>  2048-byte region [ffff880035c26e80, ffff880035c27680)
> The buggy address belongs to the page:
> page:0000000003af101f count:1 mapcount:0 mapping:          (null) index:0x0 compound_mapcount: 0
> flags: 0x100000000008100(slab|head)
> raw: 0100000000008100 0000000000000000 0000000000000000 00000001000f000f
> raw: ffffea0000946a00 0000000200000002 ffff880035c02d80 0000000000000000
> page dumped because: kasan: bad access detected
> 
> 
> Memory state around the buggy address:
>  ffff880035c27280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
>  ffff880035c27300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
> >ffff880035c27380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
>                                                                 ^
>  ffff880035c27400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
>  ffff880035c27480: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
> ==================================================================
> 
> 
> 
> 
> 


> #include <stdio.h>
> #include <string.h>
> #include <errno.h>
> #include <stdlib.h>
> #include <unistd.h>
> #include <fcntl.h>
> #include <pthread.h>
> #include <arpa/inet.h>
> #include <sys/types.h>
> #include <sys/socket.h>
> #include <sys/syscall.h>
> #include <sys/mman.h>
> #include <sys/time.h>
> #include <sys/resource.h>
> #include <netinet/in.h>
> #include <netinet/udp.h>
> #include <netinet/ip.h>
> #include <linux/xfrm.h>
> #include <linux/netlink.h>
> #include <linux/loop.h>
> #include <stdarg.h>
> #include <stdbool.h>
> #include <stddef.h>
> #include <sys/prctl.h>
> #include <sys/time.h>
> #include <sys/resource.h>
> 
> #define false 0
> #define true 1
> int controlfd;
> int count = 0;
> 
> #define ERR_EXIT1(m) \
>         do \
>         { \
>                 perror(m); \
>                 exit(EXIT_FAILURE); \
>         } while(0)
> 
> #define ERR_EXIT(m) \
>         do \
>         { \
>                 perror(m); \
>         } while(0)
> 
> void send_fd(int sock_fd, int send_fd) {
> 	int ret;
> 	struct msghdr msg;
> 	struct cmsghdr *p_cmsg;
> 	struct iovec vec;
> 	char cmsgbuf[CMSG_SPACE(sizeof(send_fd) * 253)];
> 	printf("cmsgbuf size:%d \n", sizeof(cmsgbuf));
> 	int *p_fds;
> 	char sendchar = 0;
> 	int i;
> 	msg.msg_control = cmsgbuf;
> 	msg.msg_controllen = sizeof(cmsgbuf);
> 	p_cmsg = CMSG_FIRSTHDR(&msg);
> 	p_cmsg->cmsg_level = SOL_SOCKET;
> 	p_cmsg->cmsg_type = SCM_RIGHTS;
> 	p_cmsg->cmsg_len = CMSG_LEN(sizeof(send_fd) * 253);
> 	p_fds = (int *) CMSG_DATA(p_cmsg);
> 	for (i = 0; i < 253; i++) {
> 		p_fds[i] = send_fd; // ??????????????????????????????????????????????????????
> 	}
> 
> 	msg.msg_name = NULL;
> 	msg.msg_namelen = 0;
> 	msg.msg_iov = &vec;
> 	msg.msg_iovlen = 1; //??????????????????????????????????????????1?????????
> 	msg.msg_flags = 0;
> 
> 	vec.iov_base = &sendchar;
> 	vec.iov_len = sizeof(sendchar);
> 	while (1) {
> 		ret = sendmsg(sock_fd, &msg, 0);
> 		if (ret != 1)
> 			ERR_EXIT("sendmsg");
> 	}
> }
> 
> int recv_fd(const int sock_fd) {
> 	int ret;
> 	struct msghdr msg;
> 	char recvchar;
> 	struct iovec vec;
> 	int recv_fd;
> 	char cmsgbuf[CMSG_SPACE(sizeof(recv_fd))];
> 	struct cmsghdr *p_cmsg;
> 	int *p_fd, i;
> 	vec.iov_base = &recvchar;
> 	vec.iov_len = sizeof(recvchar);
> 	msg.msg_name = NULL;
> 	msg.msg_namelen = 0;
> 	msg.msg_iov = &vec;
> 	msg.msg_iovlen = 1;
> 	msg.msg_control = cmsgbuf;
> 	msg.msg_controllen = sizeof(cmsgbuf);
> 	msg.msg_flags = 0;
> 
> 	p_fd = (int *) CMSG_DATA(CMSG_FIRSTHDR(&msg));
> 	*p_fd = -1;
> 	while (1) {
> 		ret = recvmsg(sock_fd, &msg, 0);
> 		if (ret < 0) {
> 			char buff[256];
> 			snprintf(buff, "recvmsg11: sock_fd:%d ret:%d", sock_fd, ret);
> 			ERR_EXIT(buff);
> 		}
> 
> 		p_cmsg = CMSG_FIRSTHDR(&msg);
> 		if (p_cmsg == NULL)
> 			ERR_EXIT("no passed fd");
> 
> 		p_fd = (int *) CMSG_DATA(p_cmsg);
> 		for (i = 0; i < 253; i++) {
> 			close(p_fd[i]);
> 		}
> 	}
> 	recv_fd = *p_fd;
> 	if (recv_fd == -1)
> 		ERR_EXIT("no passed fd");
> 
> 	return recv_fd;
> }
> 
> int test_main(void) {
> 	int sockfds[2];
> 	/* ??????unix????????????????????????????????????????????????????????????????????????????????????????????????
> 	 * ?????????????????????socketpair???????????????socket()?????? */
> 	if (socketpair(PF_UNIX, SOCK_STREAM, 0, sockfds) < 0)
> 		ERR_EXIT("socketpair");
> 
> 	pid_t pid;
> 	pid = fork();
> 	if (pid == -1)
> 		ERR_EXIT("fork");
> 	/* ??????????????????????????????????????????????????????????????????
> 	 * ??????????????????????????????????????????????????????????????????????????????????????? */
> 	if (pid > 0) {
> 		close(sockfds[1]);
> 		int fd = recv_fd(sockfds[0]);
> 		char buf[1024] = { 0 };
> 		read(fd, buf, sizeof(buf));
> 	} else if (pid == 0) {
> 		close(sockfds[0]);
> 		int fd;
> 		fd = open("test.txt", O_RDONLY);
> 		if (fd == -1)
> 			ERR_EXIT("open");
> 		send_fd(sockfds[1], fd);
> 	}
> 	return 0;
> }
> 
> int main(int argc, char *argv[]) {
> 	int ret;
> 	int child_pid;
> 	int fd;
> 	int loop_index;
> 	if (argc >= 2) {
> 		loop_index = atoi(argv[1]);
> 	} else {
> 		loop_index = 9;
> 	}
> 	child_pid = fork();
> 	if (child_pid) {
> 		child_pid = fork();
> 		if (child_pid) {
> 			child_pid = fork();
> 			child_pid = fork();
> 			while (1) {
> 				count++;
> 				count = count * 2;
> 			}
> 		} else {
> 			test_main();
> 		}
> 	}
> 	child_pid = fork();
> 
> 	if (child_pid) {
> 		printf("pid:%d nice:%d \n", getpid(), getpriority(PRIO_PROCESS, getpid()));
> 		controlfd = open("/dev/loop-control", O_RDWR);
> 		while (controlfd >= 0) {
> 			ret = ioctl(controlfd, LOOP_CTL_ADD, loop_index);
> 			ret = ioctl(controlfd, LOOP_CTL_REMOVE, loop_index);
> 		}
> 		close(controlfd);
> 	} else {
> 		child_pid = fork();
> 		printf("pid:%d  nice:%d \n", getpid(), getpriority(PRIO_PROCESS, getpid()));
> 		char name[100];
> 		memset(name, 0, 100);
> 		snprintf(name, 100, "/dev/loop%d", loop_index);
> 		if (child_pid) {
> 			prctl(PR_SET_NAME, "ppp");
> 			setpriority(PRIO_PROCESS, getpid(), 0);
> 			while (1) {
> 				fd = open(name, O_RDONLY);
> 				if (fd <= 0) {
> 					continue;
> 				}
> 				ret = close(fd);
> 			}
> 		} else {
> 			child_pid = fork();
> 			prctl(PR_SET_NAME, "ccc");
> 			setpriority(PRIO_PROCESS, getpid(), -5);
> 			while (1) {
> 				fd = open(name, O_RDONLY);
> 				if (fd <= 0) {
> 					continue;
> 				}
> 				ret = close(fd);
> 			}
> 		}
> 	}
> }

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: blkdev loop UAF
  2018-01-11  8:24 ` blkdev loop UAF Dan Carpenter
@ 2018-01-11 11:22   ` Hou Tao
  2018-01-11 17:00     ` Jan Kara
  0 siblings, 1 reply; 5+ messages in thread
From: Hou Tao @ 2018-01-11 11:22 UTC (permalink / raw)
  To: Foy, Jens Axboe, Jan Kara
  Cc: Dan Carpenter, Paolo Valente, security, syzkaller, linux-block

Hi,

On 2018/1/11 16:24, Dan Carpenter wrote:
> Thanks for your report and the patch.  I am sending it to the
> linux-block devs since it's already public.
> 
> regards,
> dan carpenter

The User-after-free problem is not specific for loop device, it can also
be reproduced on scsi device, and there are more race problems caused by
the race between bdev open and gendisk shutdown [1].

The cause of the UAF problem is that there are two instances of gendisk which share
the same bdev. After the process owning the new gendisk increases bdev->bd_openers,
the other process which owns the older gendisk will find bdev->bd_openers is not zero
and will put the last reference of the older gendisk and cause User-after-free.

I had proposed a patch for the problem, but it's still an incomplete fix for the race
between gendisk shutdown and bdev opening.

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 4a181fc..5ecdb9f 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1510,6 +1510,11 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
                if (bdev->bd_bdi == &noop_backing_dev_info)
                        bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info);
        } else {
+               if (bdev->bd_disk != disk) {
+                       ret = -ENXIO;
+                       goto out_unlock_bdev;
+               }
+
                if (bdev->bd_contains == bdev) {
                        ret = 0;
                        if (bdev->bd_disk->fops->open)


As far as I know, Jan Kara is working on these problems. So, Jan, any suggestions ?

Regards
Tao

[1]: https://www.spinics.net/lists/linux-block/msg20066.html

> On Thu, Jan 11, 2018 at 03:51:06PM +0800, Foy wrote:
>> BUG:
>> diff --git a/fs/block_dev.c b/fs/block_dev.c
>> index 4a181fc..db919a9 100644
>> --- a/fs/block_dev.c
>> +++ b/fs/block_dev.c
>> @@ -1430,12 +1430,15 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
>>   restart:
>>  
>>         ret = -ENXIO;
>> +       //2. Process C: loop_control_ioctl ==> LOOP_CTL_REMOVE ==> idr_remove 
>> +       //3. Process B: get_gendisk ==> get_gendisk ==> kobj_lookup ==> loop_probe ==> loop_add, get a new disk(2)
>>         disk = get_gendisk(bdev->bd_dev, &partno);
>>         if (!disk)
>>                 goto out;
>>         owner = disk->fops->owner;
>>  
>>         disk_block_events(disk);
>> +       //1. Process A get the disk(1),before the mutex_lock_nested.And then be scheduled 
>>         mutex_lock_nested(&bdev->bd_mutex, for_part);
>>         if (!bdev->bd_openers) {
>>                 bdev->bd_disk = disk;
>> @@ -1524,14 +1527,17 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
>>                         if (ret)
>>                                 goto out_unlock_bdev;
>>                 }
>> +               //5. Process A disk(1) will be free,because disk(1)'s refs == 1
>>                 /* only one opener holds refs to the module and disk */
>>                 put_disk(disk);
>>                 module_put(owner);
>>         }
>> +       //4. Process B: bdev->bd_openers != 0
>>         bdev->bd_openers++;
>>         if (for_part)
>>                 bdev->bd_part_count++;
>>         mutex_unlock(&bdev->bd_mutex);
>> +       //6. Process A the disk(1) will be use
>>         disk_unblock_events(disk);
>>         return 0;
>>
>>
>>
>>
>> Patch:
>> diff --git a/fs/block_dev.c b/fs/block_dev.c
>> index 4a181fc..1f5c7bf 100644
>> --- a/fs/block_dev.c
>> +++ b/fs/block_dev.c
>> @@ -1526,13 +1526,15 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
>>  }
>>  /* only one opener holds refs to the module and disk */
>>  put_disk(disk);
>> +disk = NULL;
>>  module_put(owner);
>>  }
>>  bdev->bd_openers++;
>>  if (for_part)
>>  bdev->bd_part_count++;
>>  mutex_unlock(&bdev->bd_mutex);
>> -disk_unblock_events(disk);
>> +if (disk)
>> +disk_unblock_events(disk);
>>  return 0;
>>  
>>   out_clear:
>>
>>
>>
>>
>> Crash:
>> ==================================================================
>> BUG: KASAN: use-after-free in disk_unblock_events+0x4b/0x50 block/genhd.c:1657
>> Read of size 8 at addr ffff880035c273f8 by task syz-executor6/21165
>>
>>
>> CPU: 0 PID: 21165 Comm: syz-executor6 Not tainted 4.15.0-rc6 #18
>> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
>> Call Trace:
>>  __dump_stack lib/dump_stack.c:17 [inline]
>>  dump_stack+0x104/0x1c5 lib/dump_stack.c:53
>>  print_address_description+0x6e/0x280 mm/kasan/report.c:252
>>  kasan_report_error mm/kasan/report.c:351 [inline]
>>  kasan_report+0x254/0x340 mm/kasan/report.c:409
>>  disk_unblock_events+0x4b/0x50 block/genhd.c:1657
>>  __blkdev_get+0x5f5/0xdb0 fs/block_dev.c:1535
>>  blkdev_get+0x338/0x9e0 fs/block_dev.c:1591
>>  blkdev_open+0x1bd/0x240 fs/block_dev.c:1749
>>  do_dentry_open+0x682/0xd80 fs/open.c:752
>>  vfs_open+0x107/0x220 fs/open.c:866
>>  do_last fs/namei.c:3379 [inline]
>>  path_openat+0x1051/0x3220 fs/namei.c:3519
>>  do_filp_open+0x25b/0x3b0 fs/namei.c:3554
>>  do_sys_open+0x4ab/0x650 fs/open.c:1059
>>  entry_SYSCALL_64_fastpath+0x1f/0x96
>> RIP: 0033:0x40cd41
>> RSP: 002b:00007ff1c06e5780 EFLAGS: 00000293 ORIG_RAX: 0000000000000002
>> RAX: ffffffffffffffda RBX: 000000000071bf58 RCX: 000000000040cd41
>> RDX: 0000000000000000 RSI: 0000000000080102 RDI: 00007ff1c06e5830
>> RBP: 00000000000001e4 R08: 000000000000ffff R09: 0000000000000000
>> R10: 0000000020024400 R11: 0000000000000293 R12: 00000000006efe00
>> R13: 00000000ffffffff R14: 00007ff1c06e66d4 R15: 0000000000000002
>>
>>
>> Allocated by task 21138:
>>  set_track mm/kasan/kasan.c:459 [inline]
>>  kasan_kmalloc+0xa9/0xd0 mm/kasan/kasan.c:551
>>  kmem_cache_alloc_node_trace+0x153/0x280 mm/slub.c:2780
>>  kmalloc_node include/linux/slab.h:537 [inline]
>>  kzalloc_node include/linux/slab.h:699 [inline]
>>  __alloc_disk_node+0xab/0x490 block/genhd.c:1400
>>  loop_add+0x42f/0xa00 drivers/block/loop.c:1808
>>  loop_control_ioctl+0x11c/0x450 drivers/block/loop.c:1940
>>  vfs_ioctl fs/ioctl.c:46 [inline]
>>  do_vfs_ioctl+0x18b/0x13c0 fs/ioctl.c:686
>>  SYSC_ioctl fs/ioctl.c:701 [inline]
>>  SyS_ioctl+0x7e/0xb0 fs/ioctl.c:692
>>  entry_SYSCALL_64_fastpath+0x1f/0x96
>>
>>
>> Freed by task 21165:
>>  set_track mm/kasan/kasan.c:459 [inline]
>>  kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524
>>  slab_free_hook mm/slub.c:1391 [inline]
>>  slab_free_freelist_hook mm/slub.c:1412 [inline]
>>  slab_free mm/slub.c:2968 [inline]
>>  kfree+0xe2/0x2c0 mm/slub.c:3899
>>  disk_release+0x300/0x3c0 block/genhd.c:1249
>>  device_release+0x76/0x200 drivers/base/core.c:814
>>  kobject_cleanup lib/kobject.c:648 [inline]
>>  kobject_release lib/kobject.c:677 [inline]
>>  kref_put include/linux/kref.h:70 [inline]
>>  kobject_put+0x13d/0x230 lib/kobject.c:694
>>  put_disk+0x1f/0x30 block/genhd.c:1465
>>  __blkdev_get+0x560/0xdb0 fs/block_dev.c:1528
>>  blkdev_get+0x338/0x9e0 fs/block_dev.c:1591
>>  blkdev_open+0x1bd/0x240 fs/block_dev.c:1749
>>  do_dentry_open+0x682/0xd80 fs/open.c:752
>>  vfs_open+0x107/0x220 fs/open.c:866
>>  do_last fs/namei.c:3379 [inline]
>>  path_openat+0x1051/0x3220 fs/namei.c:3519
>>  do_filp_open+0x25b/0x3b0 fs/namei.c:3554
>>  do_sys_open+0x4ab/0x650 fs/open.c:1059
>>  entry_SYSCALL_64_fastpath+0x1f/0x96
>>
>>
>> The buggy address belongs to the object at ffff880035c26e80
>>  which belongs to the cache kmalloc-2048 of size 2048
>> The buggy address is located 1400 bytes inside of
>>  2048-byte region [ffff880035c26e80, ffff880035c27680)
>> The buggy address belongs to the page:
>> page:0000000003af101f count:1 mapcount:0 mapping:          (null) index:0x0 compound_mapcount: 0
>> flags: 0x100000000008100(slab|head)
>> raw: 0100000000008100 0000000000000000 0000000000000000 00000001000f000f
>> raw: ffffea0000946a00 0000000200000002 ffff880035c02d80 0000000000000000
>> page dumped because: kasan: bad access detected
>>
>>
>> Memory state around the buggy address:
>>  ffff880035c27280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
>>  ffff880035c27300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
>>> ffff880035c27380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
>>                                                                 ^
>>  ffff880035c27400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
>>  ffff880035c27480: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
>> ==================================================================
>>
>>
>>
>>
>>
> 
> 
>> #include <stdio.h>
>> #include <string.h>
>> #include <errno.h>
>> #include <stdlib.h>
>> #include <unistd.h>
>> #include <fcntl.h>
>> #include <pthread.h>
>> #include <arpa/inet.h>
>> #include <sys/types.h>
>> #include <sys/socket.h>
>> #include <sys/syscall.h>
>> #include <sys/mman.h>
>> #include <sys/time.h>
>> #include <sys/resource.h>
>> #include <netinet/in.h>
>> #include <netinet/udp.h>
>> #include <netinet/ip.h>
>> #include <linux/xfrm.h>
>> #include <linux/netlink.h>
>> #include <linux/loop.h>
>> #include <stdarg.h>
>> #include <stdbool.h>
>> #include <stddef.h>
>> #include <sys/prctl.h>
>> #include <sys/time.h>
>> #include <sys/resource.h>
>>
>> #define false 0
>> #define true 1
>> int controlfd;
>> int count = 0;
>>
>> #define ERR_EXIT1(m) \
>>         do \
>>         { \
>>                 perror(m); \
>>                 exit(EXIT_FAILURE); \
>>         } while(0)
>>
>> #define ERR_EXIT(m) \
>>         do \
>>         { \
>>                 perror(m); \
>>         } while(0)
>>
>> void send_fd(int sock_fd, int send_fd) {
>> 	int ret;
>> 	struct msghdr msg;
>> 	struct cmsghdr *p_cmsg;
>> 	struct iovec vec;
>> 	char cmsgbuf[CMSG_SPACE(sizeof(send_fd) * 253)];
>> 	printf("cmsgbuf size:%d \n", sizeof(cmsgbuf));
>> 	int *p_fds;
>> 	char sendchar = 0;
>> 	int i;
>> 	msg.msg_control = cmsgbuf;
>> 	msg.msg_controllen = sizeof(cmsgbuf);
>> 	p_cmsg = CMSG_FIRSTHDR(&msg);
>> 	p_cmsg->cmsg_level = SOL_SOCKET;
>> 	p_cmsg->cmsg_type = SCM_RIGHTS;
>> 	p_cmsg->cmsg_len = CMSG_LEN(sizeof(send_fd) * 253);
>> 	p_fds = (int *) CMSG_DATA(p_cmsg);
>> 	for (i = 0; i < 253; i++) {
>> 		p_fds[i] = send_fd; // ??????????????????????????????????????????????????????
>> 	}
>>
>> 	msg.msg_name = NULL;
>> 	msg.msg_namelen = 0;
>> 	msg.msg_iov = &vec;
>> 	msg.msg_iovlen = 1; //??????????????????????????????????????????1?????????
>> 	msg.msg_flags = 0;
>>
>> 	vec.iov_base = &sendchar;
>> 	vec.iov_len = sizeof(sendchar);
>> 	while (1) {
>> 		ret = sendmsg(sock_fd, &msg, 0);
>> 		if (ret != 1)
>> 			ERR_EXIT("sendmsg");
>> 	}
>> }
>>
>> int recv_fd(const int sock_fd) {
>> 	int ret;
>> 	struct msghdr msg;
>> 	char recvchar;
>> 	struct iovec vec;
>> 	int recv_fd;
>> 	char cmsgbuf[CMSG_SPACE(sizeof(recv_fd))];
>> 	struct cmsghdr *p_cmsg;
>> 	int *p_fd, i;
>> 	vec.iov_base = &recvchar;
>> 	vec.iov_len = sizeof(recvchar);
>> 	msg.msg_name = NULL;
>> 	msg.msg_namelen = 0;
>> 	msg.msg_iov = &vec;
>> 	msg.msg_iovlen = 1;
>> 	msg.msg_control = cmsgbuf;
>> 	msg.msg_controllen = sizeof(cmsgbuf);
>> 	msg.msg_flags = 0;
>>
>> 	p_fd = (int *) CMSG_DATA(CMSG_FIRSTHDR(&msg));
>> 	*p_fd = -1;
>> 	while (1) {
>> 		ret = recvmsg(sock_fd, &msg, 0);
>> 		if (ret < 0) {
>> 			char buff[256];
>> 			snprintf(buff, "recvmsg11: sock_fd:%d ret:%d", sock_fd, ret);
>> 			ERR_EXIT(buff);
>> 		}
>>
>> 		p_cmsg = CMSG_FIRSTHDR(&msg);
>> 		if (p_cmsg == NULL)
>> 			ERR_EXIT("no passed fd");
>>
>> 		p_fd = (int *) CMSG_DATA(p_cmsg);
>> 		for (i = 0; i < 253; i++) {
>> 			close(p_fd[i]);
>> 		}
>> 	}
>> 	recv_fd = *p_fd;
>> 	if (recv_fd == -1)
>> 		ERR_EXIT("no passed fd");
>>
>> 	return recv_fd;
>> }
>>
>> int test_main(void) {
>> 	int sockfds[2];
>> 	/* ??????unix????????????????????????????????????????????????????????????????????????????????????????????????
>> 	 * ?????????????????????socketpair???????????????socket()?????? */
>> 	if (socketpair(PF_UNIX, SOCK_STREAM, 0, sockfds) < 0)
>> 		ERR_EXIT("socketpair");
>>
>> 	pid_t pid;
>> 	pid = fork();
>> 	if (pid == -1)
>> 		ERR_EXIT("fork");
>> 	/* ??????????????????????????????????????????????????????????????????
>> 	 * ??????????????????????????????????????????????????????????????????????????????????????? */
>> 	if (pid > 0) {
>> 		close(sockfds[1]);
>> 		int fd = recv_fd(sockfds[0]);
>> 		char buf[1024] = { 0 };
>> 		read(fd, buf, sizeof(buf));
>> 	} else if (pid == 0) {
>> 		close(sockfds[0]);
>> 		int fd;
>> 		fd = open("test.txt", O_RDONLY);
>> 		if (fd == -1)
>> 			ERR_EXIT("open");
>> 		send_fd(sockfds[1], fd);
>> 	}
>> 	return 0;
>> }
>>
>> int main(int argc, char *argv[]) {
>> 	int ret;
>> 	int child_pid;
>> 	int fd;
>> 	int loop_index;
>> 	if (argc >= 2) {
>> 		loop_index = atoi(argv[1]);
>> 	} else {
>> 		loop_index = 9;
>> 	}
>> 	child_pid = fork();
>> 	if (child_pid) {
>> 		child_pid = fork();
>> 		if (child_pid) {
>> 			child_pid = fork();
>> 			child_pid = fork();
>> 			while (1) {
>> 				count++;
>> 				count = count * 2;
>> 			}
>> 		} else {
>> 			test_main();
>> 		}
>> 	}
>> 	child_pid = fork();
>>
>> 	if (child_pid) {
>> 		printf("pid:%d nice:%d \n", getpid(), getpriority(PRIO_PROCESS, getpid()));
>> 		controlfd = open("/dev/loop-control", O_RDWR);
>> 		while (controlfd >= 0) {
>> 			ret = ioctl(controlfd, LOOP_CTL_ADD, loop_index);
>> 			ret = ioctl(controlfd, LOOP_CTL_REMOVE, loop_index);
>> 		}
>> 		close(controlfd);
>> 	} else {
>> 		child_pid = fork();
>> 		printf("pid:%d  nice:%d \n", getpid(), getpriority(PRIO_PROCESS, getpid()));
>> 		char name[100];
>> 		memset(name, 0, 100);
>> 		snprintf(name, 100, "/dev/loop%d", loop_index);
>> 		if (child_pid) {
>> 			prctl(PR_SET_NAME, "ppp");
>> 			setpriority(PRIO_PROCESS, getpid(), 0);
>> 			while (1) {
>> 				fd = open(name, O_RDONLY);
>> 				if (fd <= 0) {
>> 					continue;
>> 				}
>> 				ret = close(fd);
>> 			}
>> 		} else {
>> 			child_pid = fork();
>> 			prctl(PR_SET_NAME, "ccc");
>> 			setpriority(PRIO_PROCESS, getpid(), -5);
>> 			while (1) {
>> 				fd = open(name, O_RDONLY);
>> 				if (fd <= 0) {
>> 					continue;
>> 				}
>> 				ret = close(fd);
>> 			}
>> 		}
>> 	}
>> }
> 
> 
> 
> 
> .
> 

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: blkdev loop UAF
  2018-01-11 11:22   ` Hou Tao
@ 2018-01-11 17:00     ` Jan Kara
  2018-02-02  3:58       ` Eric Biggers
  0 siblings, 1 reply; 5+ messages in thread
From: Jan Kara @ 2018-01-11 17:00 UTC (permalink / raw)
  To: Hou Tao
  Cc: Foy, Jens Axboe, Jan Kara, Dan Carpenter, Paolo Valente,
	security, syzkaller, linux-block

[-- Attachment #1: Type: text/plain, Size: 2116 bytes --]

On Thu 11-01-18 19:22:39, Hou Tao wrote:
> Hi,
> 
> On 2018/1/11 16:24, Dan Carpenter wrote:
> > Thanks for your report and the patch.  I am sending it to the
> > linux-block devs since it's already public.
> > 
> > regards,
> > dan carpenter
> 
> The User-after-free problem is not specific for loop device, it can also
> be reproduced on scsi device, and there are more race problems caused by
> the race between bdev open and gendisk shutdown [1].
> 
> The cause of the UAF problem is that there are two instances of gendisk which share
> the same bdev. After the process owning the new gendisk increases bdev->bd_openers,
> the other process which owns the older gendisk will find bdev->bd_openers is not zero
> and will put the last reference of the older gendisk and cause User-after-free.
> 
> I had proposed a patch for the problem, but it's still an incomplete fix for the race
> between gendisk shutdown and bdev opening.
> 
> diff --git a/fs/block_dev.c b/fs/block_dev.c
> index 4a181fc..5ecdb9f 100644
> --- a/fs/block_dev.c
> +++ b/fs/block_dev.c
> @@ -1510,6 +1510,11 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
>                 if (bdev->bd_bdi == &noop_backing_dev_info)
>                         bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info);
>         } else {
> +               if (bdev->bd_disk != disk) {
> +                       ret = -ENXIO;
> +                       goto out_unlock_bdev;
> +               }
> +
>                 if (bdev->bd_contains == bdev) {
>                         ret = 0;
>                         if (bdev->bd_disk->fops->open)
> 
> 
> As far as I know, Jan Kara is working on these problems. So, Jan, any suggestions ?

Yeah, thanks for the ping. I was working today to get this fixed. I have
about 6 patches (attached) which should fix all the outstanding issues I'm
aware of.  So far they are only boot tested. I will give them more
stress-testing next week and then post them officially but if you have time,
feel free to try them out as well. Thanks!

								Honza


-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

[-- Attachment #2: 0001-genhd-Fix-leaked-module-reference-for-NVME-devices.patch --]
[-- Type: text/x-patch, Size: 1185 bytes --]

>From 7acbaaf7925cfd5440b7f7bf9902d84136624007 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 11 Jan 2018 15:11:03 +0100
Subject: [PATCH 1/6] genhd: Fix leaked module reference for NVME devices

Commit 8ddcd653257c "block: introduce GENHD_FL_HIDDEN" added handling of
hidden devices to get_gendisk() but forgot to drop module reference
which is also acquired by get_disk(). Drop the reference as necessary.

Arguably the function naming here is misleading as put_disk() is *not*
the counterpart of get_disk() but let's fix that in the follow up
commit since that will be more intrusive.

Fixes: 8ddcd653257c18a669fcb75ee42c37054908e0d6
CC: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 block/genhd.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/block/genhd.c b/block/genhd.c
index 96a66f671720..22f4fc340a3a 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -802,7 +802,10 @@ struct gendisk *get_gendisk(dev_t devt, int *partno)
 	}
 
 	if (disk && unlikely(disk->flags & GENHD_FL_HIDDEN)) {
+		struct module *owner = disk->fops->owner;
+
 		put_disk(disk);
+		module_put(owner);
 		disk = NULL;
 	}
 	return disk;
-- 
2.13.6


[-- Attachment #3: 0002-genhd-Rename-get_disk-to-get_disk_and_module.patch --]
[-- Type: text/x-patch, Size: 6231 bytes --]

>From d660ec31e06f683f53c37b9195b85d363fb255ea Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 11 Jan 2018 15:27:38 +0100
Subject: [PATCH 2/6] genhd: Rename get_disk() to get_disk_and_module()

Rename get_disk() to get_disk_and_module() to make sure what the
function does. It's not a great name but at least it is now clear that
put_disk() is not it's counterpart.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 block/genhd.c           | 10 ++++------
 drivers/block/amiflop.c |  2 +-
 drivers/block/ataflop.c |  2 +-
 drivers/block/brd.c     |  2 +-
 drivers/block/floppy.c  |  2 +-
 drivers/block/loop.c    |  2 +-
 drivers/block/swim.c    |  2 +-
 drivers/block/z2ram.c   |  2 +-
 drivers/ide/ide-probe.c |  2 +-
 include/linux/genhd.h   |  2 +-
 10 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index 22f4fc340a3a..058cf91f8d32 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -547,7 +547,7 @@ static int exact_lock(dev_t devt, void *data)
 {
 	struct gendisk *p = data;
 
-	if (!get_disk(p))
+	if (!get_disk_and_module(p))
 		return -1;
 	return 0;
 }
@@ -794,7 +794,7 @@ struct gendisk *get_gendisk(dev_t devt, int *partno)
 
 		spin_lock_bh(&ext_devt_lock);
 		part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
-		if (part && get_disk(part_to_disk(part))) {
+		if (part && get_disk_and_module(part_to_disk(part))) {
 			*partno = part->partno;
 			disk = part_to_disk(part);
 		}
@@ -1441,7 +1441,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
 }
 EXPORT_SYMBOL(__alloc_disk_node);
 
-struct kobject *get_disk(struct gendisk *disk)
+struct kobject *get_disk_and_module(struct gendisk *disk)
 {
 	struct module *owner;
 	struct kobject *kobj;
@@ -1459,15 +1459,13 @@ struct kobject *get_disk(struct gendisk *disk)
 	return kobj;
 
 }
-
-EXPORT_SYMBOL(get_disk);
+EXPORT_SYMBOL(get_disk_and_module);
 
 void put_disk(struct gendisk *disk)
 {
 	if (disk)
 		kobject_put(&disk_to_dev(disk)->kobj);
 }
-
 EXPORT_SYMBOL(put_disk);
 
 static void set_disk_ro_uevent(struct gendisk *gd, int ro)
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index e5aa62fcf5a8..3aaf6af3ec23 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1758,7 +1758,7 @@ static struct kobject *floppy_find(dev_t dev, int *part, void *data)
 	if (unit[drive].type->code == FD_NODRIVE)
 		return NULL;
 	*part = 0;
-	return get_disk(unit[drive].gendisk);
+	return get_disk_and_module(unit[drive].gendisk);
 }
 
 static int __init amiga_floppy_probe(struct platform_device *pdev)
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index 8bc3b9fd8dd2..dfb2c2622e5a 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -1917,7 +1917,7 @@ static struct kobject *floppy_find(dev_t dev, int *part, void *data)
 	if (drive >= FD_MAX_UNITS || type > NUM_DISK_MINORS)
 		return NULL;
 	*part = 0;
-	return get_disk(unit[drive].disk);
+	return get_disk_and_module(unit[drive].disk);
 }
 
 static int __init atari_floppy_init (void)
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 8028a3a7e7fd..deea78e485da 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -456,7 +456,7 @@ static struct kobject *brd_probe(dev_t dev, int *part, void *data)
 
 	mutex_lock(&brd_devices_mutex);
 	brd = brd_init_one(MINOR(dev) / max_part, &new);
-	kobj = brd ? get_disk(brd->brd_disk) : NULL;
+	kobj = brd ? get_disk_and_module(brd->brd_disk) : NULL;
 	mutex_unlock(&brd_devices_mutex);
 
 	if (new)
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index eae484acfbbc..8ec7235fc93b 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -4505,7 +4505,7 @@ static struct kobject *floppy_find(dev_t dev, int *part, void *data)
 	if (((*part >> 2) & 0x1f) >= ARRAY_SIZE(floppy_type))
 		return NULL;
 	*part = 0;
-	return get_disk(disks[drive]);
+	return get_disk_and_module(disks[drive]);
 }
 
 static int __init do_floppy_init(void)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index d5fe720cf149..87855b5123a6 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1922,7 +1922,7 @@ static struct kobject *loop_probe(dev_t dev, int *part, void *data)
 	if (err < 0)
 		kobj = NULL;
 	else
-		kobj = get_disk(lo->lo_disk);
+		kobj = get_disk_and_module(lo->lo_disk);
 	mutex_unlock(&loop_index_mutex);
 
 	*part = 0;
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index 84434d3ea19b..64e066eba72e 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -799,7 +799,7 @@ static struct kobject *floppy_find(dev_t dev, int *part, void *data)
 		return NULL;
 
 	*part = 0;
-	return get_disk(swd->unit[drive].disk);
+	return get_disk_and_module(swd->unit[drive].disk);
 }
 
 static int swim_add_floppy(struct swim_priv *swd, enum drive_location location)
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index 41c95c9b2ab4..8f9130ab5887 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -332,7 +332,7 @@ static const struct block_device_operations z2_fops =
 static struct kobject *z2_find(dev_t dev, int *part, void *data)
 {
 	*part = 0;
-	return get_disk(z2ram_gendisk);
+	return get_disk_and_module(z2ram_gendisk);
 }
 
 static struct request_queue *z2_queue;
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 17fd55af4d92..caa20eb5f26b 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -928,7 +928,7 @@ static int exact_lock(dev_t dev, void *data)
 {
 	struct gendisk *p = data;
 
-	if (!get_disk(p))
+	if (!get_disk_and_module(p))
 		return -1;
 	return 0;
 }
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 5144ebe046c9..12402912a86a 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -595,7 +595,7 @@ extern void delete_partition(struct gendisk *, int);
 extern void printk_all_partitions(void);
 
 extern struct gendisk *__alloc_disk_node(int minors, int node_id);
-extern struct kobject *get_disk(struct gendisk *disk);
+extern struct kobject *get_disk_and_module(struct gendisk *disk);
 extern void put_disk(struct gendisk *disk);
 extern void blk_register_region(dev_t devt, unsigned long range,
 			struct module *module,
-- 
2.13.6


[-- Attachment #4: 0003-genhd-Add-helper-put_disk_and_module.patch --]
[-- Type: text/x-patch, Size: 5699 bytes --]

>From e5cebe2d7a647a91cb4af3997c45b849dc6a1f6d Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 11 Jan 2018 15:40:52 +0100
Subject: [PATCH 3/6] genhd: Add helper put_disk_and_module()

Add a proper counterpart to get_disk_and_module() -
put_disk_and_module(). Currently it is opencoded in several places.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 block/blk-cgroup.c    | 11 ++---------
 block/genhd.c         | 20 ++++++++++++++++----
 fs/block_dev.c        | 19 +++++--------------
 include/linux/genhd.h |  1 +
 4 files changed, 24 insertions(+), 27 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 4117524ca45b..c2033a232a44 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -812,7 +812,6 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 	struct gendisk *disk;
 	struct request_queue *q;
 	struct blkcg_gq *blkg;
-	struct module *owner;
 	unsigned int major, minor;
 	int key_len, part, ret;
 	char *body;
@@ -904,9 +903,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 	spin_unlock_irq(q->queue_lock);
 	rcu_read_unlock();
 fail:
-	owner = disk->fops->owner;
-	put_disk(disk);
-	module_put(owner);
+	put_disk_and_module(disk);
 	/*
 	 * If queue was bypassing, we should retry.  Do so after a
 	 * short msleep().  It isn't strictly necessary but queue
@@ -931,13 +928,9 @@ EXPORT_SYMBOL_GPL(blkg_conf_prep);
 void blkg_conf_finish(struct blkg_conf_ctx *ctx)
 	__releases(ctx->disk->queue->queue_lock) __releases(rcu)
 {
-	struct module *owner;
-
 	spin_unlock_irq(ctx->disk->queue->queue_lock);
 	rcu_read_unlock();
-	owner = ctx->disk->fops->owner;
-	put_disk(ctx->disk);
-	module_put(owner);
+	put_disk_and_module(ctx->disk);
 }
 EXPORT_SYMBOL_GPL(blkg_conf_finish);
 
diff --git a/block/genhd.c b/block/genhd.c
index 058cf91f8d32..64c323549a22 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -802,10 +802,7 @@ struct gendisk *get_gendisk(dev_t devt, int *partno)
 	}
 
 	if (disk && unlikely(disk->flags & GENHD_FL_HIDDEN)) {
-		struct module *owner = disk->fops->owner;
-
-		put_disk(disk);
-		module_put(owner);
+		put_disk_and_module(disk);
 		disk = NULL;
 	}
 	return disk;
@@ -1468,6 +1465,21 @@ void put_disk(struct gendisk *disk)
 }
 EXPORT_SYMBOL(put_disk);
 
+/*
+ * This is a counterpart of get_disk_and_module() and thus also of
+ * get_gendisk().
+ */
+void put_disk_and_module(struct gendisk *disk)
+{
+	if (disk) {
+		struct module *owner = disk->fops->owner;
+
+		put_disk(disk);
+		module_put(owner);
+	}
+}
+EXPORT_SYMBOL(put_disk_and_module);
+
 static void set_disk_ro_uevent(struct gendisk *gd, int ro)
 {
 	char event[] = "DISK_RO=1";
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 4a181fcb5175..1dbbf847911a 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1111,8 +1111,7 @@ static struct block_device *bd_start_claiming(struct block_device *bdev,
 	else
 		whole = bdgrab(bdev);
 
-	module_put(disk->fops->owner);
-	put_disk(disk);
+	put_disk_and_module(disk);
 	if (!whole)
 		return ERR_PTR(-ENOMEM);
 
@@ -1407,7 +1406,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
 static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 {
 	struct gendisk *disk;
-	struct module *owner;
 	int ret;
 	int partno;
 	int perm = 0;
@@ -1433,7 +1431,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 	disk = get_gendisk(bdev->bd_dev, &partno);
 	if (!disk)
 		goto out;
-	owner = disk->fops->owner;
 
 	disk_block_events(disk);
 	mutex_lock_nested(&bdev->bd_mutex, for_part);
@@ -1463,8 +1460,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 					bdev->bd_queue = NULL;
 					mutex_unlock(&bdev->bd_mutex);
 					disk_unblock_events(disk);
-					put_disk(disk);
-					module_put(owner);
+					put_disk_and_module(disk);
 					goto restart;
 				}
 			}
@@ -1525,8 +1521,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 				goto out_unlock_bdev;
 		}
 		/* only one opener holds refs to the module and disk */
-		put_disk(disk);
-		module_put(owner);
+		put_disk_and_module(disk);
 	}
 	bdev->bd_openers++;
 	if (for_part)
@@ -1546,8 +1541,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
  out_unlock_bdev:
 	mutex_unlock(&bdev->bd_mutex);
 	disk_unblock_events(disk);
-	put_disk(disk);
-	module_put(owner);
+	put_disk_and_module(disk);
  out:
 	bdput(bdev);
 
@@ -1770,8 +1764,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
 			disk->fops->release(disk, mode);
 	}
 	if (!bdev->bd_openers) {
-		struct module *owner = disk->fops->owner;
-
 		disk_put_part(bdev->bd_part);
 		bdev->bd_part = NULL;
 		bdev->bd_disk = NULL;
@@ -1779,8 +1771,7 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
 			victim = bdev->bd_contains;
 		bdev->bd_contains = NULL;
 
-		put_disk(disk);
-		module_put(owner);
+		put_disk_and_module(disk);
 	}
 	mutex_unlock(&bdev->bd_mutex);
 	bdput(bdev);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 12402912a86a..07b715cdeb93 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -597,6 +597,7 @@ extern void printk_all_partitions(void);
 extern struct gendisk *__alloc_disk_node(int minors, int node_id);
 extern struct kobject *get_disk_and_module(struct gendisk *disk);
 extern void put_disk(struct gendisk *disk);
+extern void put_disk_and_module(struct gendisk *disk);
 extern void blk_register_region(dev_t devt, unsigned long range,
 			struct module *module,
 			struct kobject *(*probe)(dev_t, int *, void *),
-- 
2.13.6


[-- Attachment #5: 0004-genhd-Fix-use-after-free-in-__blkdev_get.patch --]
[-- Type: text/x-patch, Size: 2606 bytes --]

>From 9509768ff6680e280f84e47f7f8438c24aa25c48 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 11 Jan 2018 15:48:46 +0100
Subject: [PATCH 4/6] genhd: Fix use after free in __blkdev_get()

When two blkdev_open() calls race with device removal and recreation,
__blkdev_get() can use looked up gendisk after it is freed:

CPU0				CPU1			CPU2
							del_gendisk(disk);
							  bdev_unhash_inode(inode);
blkdev_open()			blkdev_open()
  bdev = bd_acquire(inode);
    - creates and returns new inode
				  bdev = bd_acquire(inode);
				    - returns the same inode
  __blkdev_get(devt)		  __blkdev_get(devt)
    disk = get_gendisk(devt);
      - got structure of device going away
							<finish device removal>
							<new device gets
							 created under the same
							 device number>
				  disk = get_gendisk(devt);
				    - got new device structure
				  if (!bdev->bd_openers) {
				    does the first open
				  }
    if (!bdev->bd_openers)
      - false
    } else {
      put_disk_and_module(disk)
        - remember this was old device - this was last ref and disk is
          now freed
    }
    disk_unblock_events(disk); -> oops

Fix the problem by making sure we drop reference to disk in
__blkdev_get() only after we are really done with it.

Reported-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/block_dev.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 1dbbf847911a..fe41a76769fa 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1409,6 +1409,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 	int ret;
 	int partno;
 	int perm = 0;
+	bool first_open = false;
 
 	if (mode & FMODE_READ)
 		perm |= MAY_READ;
@@ -1435,6 +1436,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 	disk_block_events(disk);
 	mutex_lock_nested(&bdev->bd_mutex, for_part);
 	if (!bdev->bd_openers) {
+		first_open = true;
 		bdev->bd_disk = disk;
 		bdev->bd_queue = disk->queue;
 		bdev->bd_contains = bdev;
@@ -1520,14 +1522,15 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 			if (ret)
 				goto out_unlock_bdev;
 		}
-		/* only one opener holds refs to the module and disk */
-		put_disk_and_module(disk);
 	}
 	bdev->bd_openers++;
 	if (for_part)
 		bdev->bd_part_count++;
 	mutex_unlock(&bdev->bd_mutex);
 	disk_unblock_events(disk);
+	/* only one opener holds refs to the module and disk */
+	if (!first_open)
+		put_disk_and_module(disk);
 	return 0;
 
  out_clear:
-- 
2.13.6


[-- Attachment #6: 0005-genhd-Fix-BUG-in-blkdev_open.patch --]
[-- Type: text/x-patch, Size: 3990 bytes --]

>From 020dd3a183c4fb804a376d53a011f9fb67383866 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 11 Jan 2018 16:49:58 +0100
Subject: [PATCH 5/6] genhd: Fix BUG in blkdev_open()

When two blkdev_open() calls for a partition race with device removal
and recreation, we can hit BUG_ON(!bd_may_claim(bdev, whole, holder)) in
blkdev_open(). The race can happen as follows:

CPU0				CPU1			CPU2
							del_gendisk()
							  bdev_unhash_inode(part1);

blkdev_open(part1, O_EXCL)	blkdev_open(part1, O_EXCL)
  bdev = bd_acquire()		  bdev = bd_acquire()
  blkdev_get(bdev)
    bd_start_claiming(bdev)
      - finds old inode 'whole'
      bd_prepare_to_claim() -> 0
							  bdev_unhash_inode(whole);
							<device removed>
							<new device under same
							 number created>
				  blkdev_get(bdev);
				    bd_start_claiming(bdev)
				      - finds new inode 'whole'
				      bd_prepare_to_claim()
					- this also succeeds as we have
					  different 'whole' here...
					- bad things happen now as we
					  have two exclusive openers of
					  the same bdev

The problem here is that block device opens can see various intermediate
states while gendisk is shutting down and then being recreated.

We fix the problem by introducing new lookup_sem in gendisk that
synchronizes gendisk deletion with get_gendisk() and furthermore by
making sure that get_gendisk() does not return gendisk that is being (or
has been) deleted. This makes sure that once we ever manage to look up
newly created bdev inode, we are also guaranteed that following
get_gendisk() will either return failure (and we fail open) or it
returns gendisk for the new device and following bdget_disk() will
return new bdev inode (i.e., blkdev_open() follows the path as if it is
completely run after new device is created).

Reported-and-analyzed-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 block/genhd.c         | 21 ++++++++++++++++++++-
 include/linux/genhd.h |  1 +
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/block/genhd.c b/block/genhd.c
index 64c323549a22..c6f68c332bfe 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -703,6 +703,11 @@ void del_gendisk(struct gendisk *disk)
 	blk_integrity_del(disk);
 	disk_del_events(disk);
 
+	/*
+	 * Block lookups of the disk until all bdevs are unhashed and the
+	 * disk is marked as dead (GENHD_FL_UP cleared).
+	 */
+	down_write(&disk->lookup_sem);
 	/* invalidate stuff */
 	disk_part_iter_init(&piter, disk,
 			     DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
@@ -717,6 +722,7 @@ void del_gendisk(struct gendisk *disk)
 	bdev_unhash_inode(disk_devt(disk));
 	set_capacity(disk, 0);
 	disk->flags &= ~GENHD_FL_UP;
+	up_write(&disk->lookup_sem);
 
 	if (!(disk->flags & GENHD_FL_HIDDEN))
 		sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
@@ -801,9 +807,21 @@ struct gendisk *get_gendisk(dev_t devt, int *partno)
 		spin_unlock_bh(&ext_devt_lock);
 	}
 
-	if (disk && unlikely(disk->flags & GENHD_FL_HIDDEN)) {
+	if (!disk)
+		return NULL;
+
+	/*
+	 * Synchronize with del_gendisk() to not return disk that is being
+	 * destroyed.
+	 */
+	down_read(&disk->lookup_sem);
+	if (unlikely((disk->flags & GENHD_FL_HIDDEN) ||
+		     !(disk->flags & GENHD_FL_UP))) {
+		up_read(&disk->lookup_sem);
 		put_disk_and_module(disk);
 		disk = NULL;
+	} else {
+		up_read(&disk->lookup_sem);
 	}
 	return disk;
 }
@@ -1403,6 +1421,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
 			kfree(disk);
 			return NULL;
 		}
+		init_rwsem(&disk->lookup_sem);
 		disk->node_id = node_id;
 		if (disk_expand_part_tbl(disk, 0)) {
 			free_part_stats(&disk->part0);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 07b715cdeb93..7b548253eaef 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -198,6 +198,7 @@ struct gendisk {
 	void *private_data;
 
 	int flags;
+	struct rw_semaphore lookup_sem;
 	struct kobject *slave_dir;
 
 	struct timer_rand_state *random;
-- 
2.13.6


[-- Attachment #7: 0006-blockdev-Avoid-two-active-bdev-inodes-for-one-device.patch --]
[-- Type: text/x-patch, Size: 2764 bytes --]

>From f560a27dd0938300a6bb5b16d9164e29b5abf980 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 11 Jan 2018 17:29:03 +0100
Subject: [PATCH 6/6] blockdev: Avoid two active bdev inodes for one device

When blkdev_open() races with device removal and creation it can happen
that unhashed bdev inode gets associated with newly created gendisk
like:

CPU0					CPU1
blkdev_open()
  bdev = bd_acquire()
					del_gendisk()
					  bdev_unhash_inode(bdev);
					remove device
					create new device with the same number
  __blkdev_get()
    disk = get_gendisk()
      - gets reference to gendisk of the new device

Now another blkdev_open() will not find original 'bdev' as it got
unhashed, create a new one and associate it with the same 'disk' at
which point problems start as we have two independent page caches for
one device.

Fix the problem by verifying that the bdev inode didn't get unhashed
before we acquired gendisk reference. That way we make sure gendisk can
get associated only with visible bdev inodes.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/block_dev.c | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index fe41a76769fa..fe09ef9c21f3 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1058,6 +1058,27 @@ static int bd_prepare_to_claim(struct block_device *bdev,
 	return 0;
 }
 
+static struct gendisk *bdev_get_gendisk(struct block_device *bdev, int *partno)
+{
+	struct gendisk *disk = get_gendisk(bdev->bd_dev, partno);
+
+	if (!disk)
+		return NULL;
+	/*
+	 * Now that we hold gendisk reference we make sure bdev we looked up is
+	 * not stale. If it is, it means device got removed and created before
+	 * we looked up gendisk and we fail open in such case. Associating
+	 * unhashed bdev with newly created gendisk could lead to two bdevs
+	 * (and thus two independent caches) being associated with one device
+	 * which is bad.
+	 */
+	if (inode_unhashed(bdev->bd_inode)) {
+		put_disk_and_module(disk);
+		return NULL;
+	}
+	return disk;
+}
+
 /**
  * bd_start_claiming - start claiming a block device
  * @bdev: block device of interest
@@ -1094,7 +1115,7 @@ static struct block_device *bd_start_claiming(struct block_device *bdev,
 	 * @bdev might not have been initialized properly yet, look up
 	 * and grab the outer block device the hard way.
 	 */
-	disk = get_gendisk(bdev->bd_dev, &partno);
+	disk = bdev_get_gendisk(bdev, &partno);
 	if (!disk)
 		return ERR_PTR(-ENXIO);
 
@@ -1429,7 +1450,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
  restart:
 
 	ret = -ENXIO;
-	disk = get_gendisk(bdev->bd_dev, &partno);
+	disk = bdev_get_gendisk(bdev, &partno);
 	if (!disk)
 		goto out;
 
-- 
2.13.6


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: blkdev loop UAF
  2018-01-11 17:00     ` Jan Kara
@ 2018-02-02  3:58       ` Eric Biggers
  2018-02-05  8:56         ` Jan Kara
  0 siblings, 1 reply; 5+ messages in thread
From: Eric Biggers @ 2018-02-02  3:58 UTC (permalink / raw)
  To: Jan Kara
  Cc: Hou Tao, Foy, Jens Axboe, Dan Carpenter, Paolo Valente, security,
	syzkaller, linux-block

On Thu, Jan 11, 2018 at 06:00:08PM +0100, Jan Kara wrote:
> On Thu 11-01-18 19:22:39, Hou Tao wrote:
> > Hi,
> > 
> > On 2018/1/11 16:24, Dan Carpenter wrote:
> > > Thanks for your report and the patch.  I am sending it to the
> > > linux-block devs since it's already public.
> > > 
> > > regards,
> > > dan carpenter
> > 
> > The User-after-free problem is not specific for loop device, it can also
> > be reproduced on scsi device, and there are more race problems caused by
> > the race between bdev open and gendisk shutdown [1].
> > 
> > The cause of the UAF problem is that there are two instances of gendisk which share
> > the same bdev. After the process owning the new gendisk increases bdev->bd_openers,
> > the other process which owns the older gendisk will find bdev->bd_openers is not zero
> > and will put the last reference of the older gendisk and cause User-after-free.
> > 
> > I had proposed a patch for the problem, but it's still an incomplete fix for the race
> > between gendisk shutdown and bdev opening.
> > 
> > diff --git a/fs/block_dev.c b/fs/block_dev.c
> > index 4a181fc..5ecdb9f 100644
> > --- a/fs/block_dev.c
> > +++ b/fs/block_dev.c
> > @@ -1510,6 +1510,11 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
> >                 if (bdev->bd_bdi == &noop_backing_dev_info)
> >                         bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info);
> >         } else {
> > +               if (bdev->bd_disk != disk) {
> > +                       ret = -ENXIO;
> > +                       goto out_unlock_bdev;
> > +               }
> > +
> >                 if (bdev->bd_contains == bdev) {
> >                         ret = 0;
> >                         if (bdev->bd_disk->fops->open)
> > 
> > 
> > As far as I know, Jan Kara is working on these problems. So, Jan, any suggestions ?
> 
> Yeah, thanks for the ping. I was working today to get this fixed. I have
> about 6 patches (attached) which should fix all the outstanding issues I'm
> aware of.  So far they are only boot tested. I will give them more
> stress-testing next week and then post them officially but if you have time,
> feel free to try them out as well. Thanks!
> 

Jan, are you still planning to fix this?  This was also reported by syzbot here:
https://groups.google.com/d/msg/syzkaller-bugs/9wXx4YULITQ/0SK8vqxhAgAJ.  Note
that the C reproducer attached to that report doesn't work for me anymore
following ae6650163c6 ("loop: fix concurrent lo_open/lo_release").  However,
syzbot has still hit this on more recent kernels.  Here's a C reproducer that
still works for me on Linus' tree as of today, though it can take 5-10 seconds:

#include <fcntl.h>
#include <linux/loop.h>
#include <sys/ioctl.h>
#include <unistd.h>

int main()
{
	int fd = open("/dev/loop-control", 0);

	if (!fork()) {
		for (;;)
			ioctl(fd, LOOP_CTL_ADD, 0);
	}

	if (!fork()) {
		for (;;)
			ioctl(fd, LOOP_CTL_REMOVE, 0);
	}

	fork();
	for (;;) {
		fd = open("/dev/loop0", O_EXCL);
		close(fd);
	}
}

This is the KASAN splat I get from the above:

BUG: KASAN: use-after-free in disk_unblock_events+0x3e/0x40 block/genhd.c:1672
Read of size 8 at addr ffff880030743670 by task systemd-udevd/165

CPU: 1 PID: 165 Comm: systemd-udevd Not tainted 4.15.0-08279-gfe53d1443a146 #38
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-20171110_100015-anatol 04/01/2014
Call Trace:
 __dump_stack lib/dump_stack.c:17 [inline]
 dump_stack+0xb3/0x145 lib/dump_stack.c:53
perf: interrupt took too long (7334 > 4816), lowering kernel.perf_event_max_sample_rate to 27000
 print_address_description+0x73/0x290 mm/kasan/report.c:252
 kasan_report_error mm/kasan/report.c:351 [inline]
 kasan_report+0x235/0x350 mm/kasan/report.c:409
 disk_unblock_events+0x3e/0x40 block/genhd.c:1672
 __blkdev_get+0x41b/0xd30 fs/block_dev.c:1535
 blkdev_get+0x283/0x980 fs/block_dev.c:1591
 do_dentry_open.isra.1+0x6c0/0xaf0 fs/open.c:752
 do_last fs/namei.c:3378 [inline]
 path_openat+0x587/0x28b0 fs/namei.c:3518
 do_filp_open+0x231/0x3a0 fs/namei.c:3553
 do_sys_open+0x399/0x610 fs/open.c:1059
 do_syscall_64+0x213/0x740 arch/x86/entry/common.c:285
 entry_SYSCALL64_slow_path+0x25/0x25
RIP: 0033:0x7f51d23262ce
RSP: 002b:00007ffdf1844310 EFLAGS: 00000246 ORIG_RAX: 0000000000000101
RAX: ffffffffffffffda RBX: 0000562878961d30 RCX: 00007f51d23262ce
RDX: 00000000000a0800 RSI: 0000562878974860 RDI: ffffffffffffff9c
RBP: 0000562878965150 R08: 00007f51d1c9d914 R09: ffffffffffffffb0
R10: 0000000000000000 R11: 0000000000000246 R12: 000000000000000e
R13: 0000562878965150 R14: 0000000000000004 R15: 0000000000000003

Allocated by task 160:
 kmem_cache_alloc_node_trace include/linux/slab.h:413 [inline]
 kmalloc_node include/linux/slab.h:537 [inline]
 kzalloc_node include/linux/slab.h:699 [inline]
 __alloc_disk_node+0xb6/0x450 block/genhd.c:1415
 loop_add+0x3fd/0x9e0 drivers/block/loop.c:1814
 loop_probe+0x152/0x180 drivers/block/loop.c:1921
 kobj_lookup+0x176/0x310 drivers/base/map.c:124
 get_gendisk+0x28/0x290 block/genhd.c:804
 __blkdev_get+0x2ec/0xd30 fs/block_dev.c:1433
 blkdev_get+0x485/0x980 fs/block_dev.c:1591
 do_dentry_open.isra.1+0x6c0/0xaf0 fs/open.c:752
 do_last fs/namei.c:3378 [inline]
 path_openat+0x587/0x28b0 fs/namei.c:3518
 do_filp_open+0x231/0x3a0 fs/namei.c:3553
 do_sys_open+0x399/0x610 fs/open.c:1059
 entry_SYSCALL_64_fastpath+0x1e/0x8b

Freed by task 165:
 __cache_free mm/slab.c:3484 [inline]
 kfree+0x8a/0xd0 mm/slab.c:3799
 disk_release+0x2c3/0x380 block/genhd.c:1264
 device_release+0x6e/0x1d0 drivers/base/core.c:811
 kobject_cleanup lib/kobject.c:646 [inline]
 kobject_release lib/kobject.c:675 [inline]
 kref_put include/linux/kref.h:70 [inline]
 kobject_put+0x14c/0x400 lib/kobject.c:692
 __blkdev_get+0x39f/0xd30 fs/block_dev.c:1528
 blkdev_get+0x283/0x980 fs/block_dev.c:1591
 do_dentry_open.isra.1+0x6c0/0xaf0 fs/open.c:752
 do_last fs/namei.c:3378 [inline]
 path_openat+0x587/0x28b0 fs/namei.c:3518
 do_filp_open+0x231/0x3a0 fs/namei.c:3553
 do_sys_open+0x399/0x610 fs/open.c:1059
 do_syscall_64+0x213/0x740 arch/x86/entry/common.c:285
 return_from_SYSCALL_64+0x0/0x75

The buggy address belongs to the object at ffff880030743280
 which belongs to the cache kmalloc-2048 of size 2048
The buggy address is located 1008 bytes inside of
 2048-byte region [ffff880030743280, ffff880030743a80)
The buggy address belongs to the page:
page:ffffea0000a99670 count:1 mapcount:0 mapping:ffff880030742180 index:0x0 compound_mapcount: 0
flags: 0x100000000008100(slab|head)
raw: 0100000000008100 ffff880030742180 0000000000000000 0000000100000003
raw: ffffea0000b895a0 ffffea0000b91720 ffff880036000800
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 ffff880030743500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
 ffff880030743580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
>ffff880030743600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
                                                             ^
 ffff880030743680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
 ffff880030743700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: blkdev loop UAF
  2018-02-02  3:58       ` Eric Biggers
@ 2018-02-05  8:56         ` Jan Kara
  0 siblings, 0 replies; 5+ messages in thread
From: Jan Kara @ 2018-02-05  8:56 UTC (permalink / raw)
  To: Eric Biggers
  Cc: Jan Kara, Hou Tao, Foy, Jens Axboe, Dan Carpenter, Paolo Valente,
	security, syzkaller, linux-block

On Thu 01-02-18 19:58:45, Eric Biggers wrote:
> On Thu, Jan 11, 2018 at 06:00:08PM +0100, Jan Kara wrote:
> > On Thu 11-01-18 19:22:39, Hou Tao wrote:
> > > Hi,
> > > 
> > > On 2018/1/11 16:24, Dan Carpenter wrote:
> > > > Thanks for your report and the patch.  I am sending it to the
> > > > linux-block devs since it's already public.
> > > > 
> > > > regards,
> > > > dan carpenter
> > > 
> > > The User-after-free problem is not specific for loop device, it can also
> > > be reproduced on scsi device, and there are more race problems caused by
> > > the race between bdev open and gendisk shutdown [1].
> > > 
> > > The cause of the UAF problem is that there are two instances of gendisk which share
> > > the same bdev. After the process owning the new gendisk increases bdev->bd_openers,
> > > the other process which owns the older gendisk will find bdev->bd_openers is not zero
> > > and will put the last reference of the older gendisk and cause User-after-free.
> > > 
> > > I had proposed a patch for the problem, but it's still an incomplete fix for the race
> > > between gendisk shutdown and bdev opening.
> > > 
> > > diff --git a/fs/block_dev.c b/fs/block_dev.c
> > > index 4a181fc..5ecdb9f 100644
> > > --- a/fs/block_dev.c
> > > +++ b/fs/block_dev.c
> > > @@ -1510,6 +1510,11 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
> > >                 if (bdev->bd_bdi == &noop_backing_dev_info)
> > >                         bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info);
> > >         } else {
> > > +               if (bdev->bd_disk != disk) {
> > > +                       ret = -ENXIO;
> > > +                       goto out_unlock_bdev;
> > > +               }
> > > +
> > >                 if (bdev->bd_contains == bdev) {
> > >                         ret = 0;
> > >                         if (bdev->bd_disk->fops->open)
> > > 
> > > 
> > > As far as I know, Jan Kara is working on these problems. So, Jan, any suggestions ?
> > 
> > Yeah, thanks for the ping. I was working today to get this fixed. I have
> > about 6 patches (attached) which should fix all the outstanding issues I'm
> > aware of.  So far they are only boot tested. I will give them more
> > stress-testing next week and then post them officially but if you have time,
> > feel free to try them out as well. Thanks!
> > 
> 
> Jan, are you still planning to fix this?  This was also reported by syzbot here:
> https://groups.google.com/d/msg/syzkaller-bugs/9wXx4YULITQ/0SK8vqxhAgAJ.  Note
> that the C reproducer attached to that report doesn't work for me anymore
> following ae6650163c6 ("loop: fix concurrent lo_open/lo_release").  However,
> syzbot has still hit this on more recent kernels.  Here's a C reproducer that
> still works for me on Linus' tree as of today, though it can take 5-10 seconds:

Yes, I do plan to post the patches. I was just struggling with getting the
original failure reproduced (plus I had quite some other work, vacation) so
I couldn't really test whether my patches fix the reported problem. So
thanks for your reproducer, I'll try it out and see whether my patches fix
the problem.

								Honza

> #include <fcntl.h>
> #include <linux/loop.h>
> #include <sys/ioctl.h>
> #include <unistd.h>
> 
> int main()
> {
> 	int fd = open("/dev/loop-control", 0);
> 
> 	if (!fork()) {
> 		for (;;)
> 			ioctl(fd, LOOP_CTL_ADD, 0);
> 	}
> 
> 	if (!fork()) {
> 		for (;;)
> 			ioctl(fd, LOOP_CTL_REMOVE, 0);
> 	}
> 
> 	fork();
> 	for (;;) {
> 		fd = open("/dev/loop0", O_EXCL);
> 		close(fd);
> 	}
> }
> 
> This is the KASAN splat I get from the above:
> 
> BUG: KASAN: use-after-free in disk_unblock_events+0x3e/0x40 block/genhd.c:1672
> Read of size 8 at addr ffff880030743670 by task systemd-udevd/165
> 
> CPU: 1 PID: 165 Comm: systemd-udevd Not tainted 4.15.0-08279-gfe53d1443a146 #38
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-20171110_100015-anatol 04/01/2014
> Call Trace:
>  __dump_stack lib/dump_stack.c:17 [inline]
>  dump_stack+0xb3/0x145 lib/dump_stack.c:53
> perf: interrupt took too long (7334 > 4816), lowering kernel.perf_event_max_sample_rate to 27000
>  print_address_description+0x73/0x290 mm/kasan/report.c:252
>  kasan_report_error mm/kasan/report.c:351 [inline]
>  kasan_report+0x235/0x350 mm/kasan/report.c:409
>  disk_unblock_events+0x3e/0x40 block/genhd.c:1672
>  __blkdev_get+0x41b/0xd30 fs/block_dev.c:1535
>  blkdev_get+0x283/0x980 fs/block_dev.c:1591
>  do_dentry_open.isra.1+0x6c0/0xaf0 fs/open.c:752
>  do_last fs/namei.c:3378 [inline]
>  path_openat+0x587/0x28b0 fs/namei.c:3518
>  do_filp_open+0x231/0x3a0 fs/namei.c:3553
>  do_sys_open+0x399/0x610 fs/open.c:1059
>  do_syscall_64+0x213/0x740 arch/x86/entry/common.c:285
>  entry_SYSCALL64_slow_path+0x25/0x25
> RIP: 0033:0x7f51d23262ce
> RSP: 002b:00007ffdf1844310 EFLAGS: 00000246 ORIG_RAX: 0000000000000101
> RAX: ffffffffffffffda RBX: 0000562878961d30 RCX: 00007f51d23262ce
> RDX: 00000000000a0800 RSI: 0000562878974860 RDI: ffffffffffffff9c
> RBP: 0000562878965150 R08: 00007f51d1c9d914 R09: ffffffffffffffb0
> R10: 0000000000000000 R11: 0000000000000246 R12: 000000000000000e
> R13: 0000562878965150 R14: 0000000000000004 R15: 0000000000000003
> 
> Allocated by task 160:
>  kmem_cache_alloc_node_trace include/linux/slab.h:413 [inline]
>  kmalloc_node include/linux/slab.h:537 [inline]
>  kzalloc_node include/linux/slab.h:699 [inline]
>  __alloc_disk_node+0xb6/0x450 block/genhd.c:1415
>  loop_add+0x3fd/0x9e0 drivers/block/loop.c:1814
>  loop_probe+0x152/0x180 drivers/block/loop.c:1921
>  kobj_lookup+0x176/0x310 drivers/base/map.c:124
>  get_gendisk+0x28/0x290 block/genhd.c:804
>  __blkdev_get+0x2ec/0xd30 fs/block_dev.c:1433
>  blkdev_get+0x485/0x980 fs/block_dev.c:1591
>  do_dentry_open.isra.1+0x6c0/0xaf0 fs/open.c:752
>  do_last fs/namei.c:3378 [inline]
>  path_openat+0x587/0x28b0 fs/namei.c:3518
>  do_filp_open+0x231/0x3a0 fs/namei.c:3553
>  do_sys_open+0x399/0x610 fs/open.c:1059
>  entry_SYSCALL_64_fastpath+0x1e/0x8b
> 
> Freed by task 165:
>  __cache_free mm/slab.c:3484 [inline]
>  kfree+0x8a/0xd0 mm/slab.c:3799
>  disk_release+0x2c3/0x380 block/genhd.c:1264
>  device_release+0x6e/0x1d0 drivers/base/core.c:811
>  kobject_cleanup lib/kobject.c:646 [inline]
>  kobject_release lib/kobject.c:675 [inline]
>  kref_put include/linux/kref.h:70 [inline]
>  kobject_put+0x14c/0x400 lib/kobject.c:692
>  __blkdev_get+0x39f/0xd30 fs/block_dev.c:1528
>  blkdev_get+0x283/0x980 fs/block_dev.c:1591
>  do_dentry_open.isra.1+0x6c0/0xaf0 fs/open.c:752
>  do_last fs/namei.c:3378 [inline]
>  path_openat+0x587/0x28b0 fs/namei.c:3518
>  do_filp_open+0x231/0x3a0 fs/namei.c:3553
>  do_sys_open+0x399/0x610 fs/open.c:1059
>  do_syscall_64+0x213/0x740 arch/x86/entry/common.c:285
>  return_from_SYSCALL_64+0x0/0x75
> 
> The buggy address belongs to the object at ffff880030743280
>  which belongs to the cache kmalloc-2048 of size 2048
> The buggy address is located 1008 bytes inside of
>  2048-byte region [ffff880030743280, ffff880030743a80)
> The buggy address belongs to the page:
> page:ffffea0000a99670 count:1 mapcount:0 mapping:ffff880030742180 index:0x0 compound_mapcount: 0
> flags: 0x100000000008100(slab|head)
> raw: 0100000000008100 ffff880030742180 0000000000000000 0000000100000003
> raw: ffffea0000b895a0 ffffea0000b91720 ffff880036000800
> page dumped because: kasan: bad access detected
> 
> Memory state around the buggy address:
>  ffff880030743500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
>  ffff880030743580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
> >ffff880030743600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
>                                                              ^
>  ffff880030743680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
>  ffff880030743700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2018-02-05  8:56 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <654967ea.6cce.160e43379f6.Coremail.long7573@126.com>
2018-01-11  8:24 ` blkdev loop UAF Dan Carpenter
2018-01-11 11:22   ` Hou Tao
2018-01-11 17:00     ` Jan Kara
2018-02-02  3:58       ` Eric Biggers
2018-02-05  8:56         ` Jan Kara

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.