> From 73471e01733dd1d998ff3cd41edebb4c78793193 Mon Sep 17 00:00:00 2001 > From: Peter Maydell > Date: Mon, 20 Apr 2020 11:54:22 +0100 > Subject: [RFC] linux-user: Use new F_SET_FILE_32BIT_FS fcntl for 32-bit guests > > If the guest is 32 bit then there is a potential problem if the > host gives us back a 64-bit sized value that we can't fit into > the ABI the guest requires. This is a theoretical issue for many > syscalls, but a real issue for directory reads where the host > is using ext3 or ext4. There the 'offset' values retured via > the getdents syscall are hashes, and on a 64-bit system they > will always fill the full 64 bits. > > Use the F_SET_FILE_32BIT_FS fcntl to tell the kernel to stick > to 32-bit sized hashes for fds used by the guest. > > Signed-off-by: Peter Maydell Another question I had here is whether the filesystem needs to provide 32-bit values for other syscalls, such as stat() and statfs()? For ext4, stat() is not going to return a 64-bit inode number, but other filesystems might (e.g. Lustre has a mode to do this). Similarly, should statfs() scale up f_bsize until it can return a 32-bit f_blocks value? We also had to do this ages ago for Lustre when 32-bit clients couldn't handle > 16TB filesystems, but that is a single disk today. Should that be added into F_SET_FILE_32BIT_FS also? Cheers, Andreas > --- > RFC patch because it depends on the kernel patch to provide > F_SET_FILE_32BIT_FS, which is still under discussion. All this > patch does is call the fcntl for every fd the guest opens. > > linux-user/syscall.c | 27 +++++++++++++++++++++++++++ > 1 file changed, 27 insertions(+) > > diff --git a/linux-user/syscall.c b/linux-user/syscall.c > index 674f70e70a5..8966d4881bd 100644 > --- a/linux-user/syscall.c > +++ b/linux-user/syscall.c > @@ -884,6 +884,28 @@ static inline int host_to_target_sock_type(int host_type) > return target_type; > } > > +/* > + * If the guest is using a 32 bit ABI then we should try to ask the kernel > + * to provide 32-bit offsets in getdents syscalls, as otherwise some > + * filesystems will return 64-bit hash values which we can't fit into > + * the field sizes the guest ABI mandates. > + */ > +#ifndef F_SET_FILE_32BIT_FS > +#define F_SET_FILE_32BIT_FS (1024 + 15) > +#endif > + > +static inline void request_32bit_fs(int fd) > +{ > +#if HOST_LONG_BITS > TARGET_ABI_BITS > + /* > + * Ignore errors, which are likely due to the host kernel being too > + * old to support this fcntl. We'll try anyway, which might or might > + * not work, depending on the guest code and on the host filesystem. > + */ > + fcntl(fd, F_SET_FILE_32BIT_FS); > +#endif > +} > + > static abi_ulong target_brk; > static abi_ulong target_original_brk; > static abi_ulong brk_page; > @@ -7704,6 +7726,7 @@ static abi_long do_syscall1(void *cpu_env, int > num, abi_long arg1, > target_to_host_bitmask(arg2, > fcntl_flags_tbl), > arg3)); > fd_trans_unregister(ret); > + request_32bit_fs(ret); > unlock_user(p, arg1, 0); > return ret; > #endif > @@ -7714,6 +7737,7 @@ static abi_long do_syscall1(void *cpu_env, int > num, abi_long arg1, > target_to_host_bitmask(arg3, > fcntl_flags_tbl), > arg4)); > fd_trans_unregister(ret); > + request_32bit_fs(ret); > unlock_user(p, arg2, 0); > return ret; > #if defined(TARGET_NR_name_to_handle_at) && defined(CONFIG_OPEN_BY_HANDLE) > @@ -7725,6 +7749,7 @@ static abi_long do_syscall1(void *cpu_env, int > num, abi_long arg1, > case TARGET_NR_open_by_handle_at: > ret = do_open_by_handle_at(arg1, arg2, arg3); > fd_trans_unregister(ret); > + request_32bit_fs(ret); > return ret; > #endif > case TARGET_NR_close: > @@ -7769,6 +7794,7 @@ static abi_long do_syscall1(void *cpu_env, int > num, abi_long arg1, > return -TARGET_EFAULT; > ret = get_errno(creat(p, arg2)); > fd_trans_unregister(ret); > + request_32bit_fs(ret); > unlock_user(p, arg1, 0); > return ret; > #endif > @@ -12393,6 +12419,7 @@ static abi_long do_syscall1(void *cpu_env, int > num, abi_long arg1, > } > ret = get_errno(memfd_create(p, arg2)); > fd_trans_unregister(ret); > + request_32bit_fs(ret); > unlock_user(p, arg1, 0); > return ret; > #endif > -- > 2.20.1 Cheers, Andreas