Stable Archive on lore.kernel.org
 help / Atom feed
* [PATCH v2] staging: erofs: keep corrupted fs from crashing kernel in erofs_namei()
@ 2019-02-01 12:16 Gao Xiang
  2019-02-12  5:05 ` Gao Xiang
  2019-02-15  7:02 ` Chao Yu
  0 siblings, 2 replies; 14+ messages in thread
From: Gao Xiang @ 2019-02-01 12:16 UTC (permalink / raw)
  To: Chao Yu, Al Viro, Greg Kroah-Hartman, devel
  Cc: LKML, linux-erofs, Chao Yu, Miao Xie, weidu.du, Fang Wei,
	Gao Xiang, stable

As Al pointed out, "
... and while we are at it, what happens to
	unsigned int nameoff = le16_to_cpu(de[mid].nameoff);
	unsigned int matched = min(startprfx, endprfx);

	struct qstr dname = QSTR_INIT(data + nameoff,
		unlikely(mid >= ndirents - 1) ?
			maxsize - nameoff :
			le16_to_cpu(de[mid + 1].nameoff) - nameoff);

	/* string comparison without already matched prefix */
	int ret = dirnamecmp(name, &dname, &matched);
if le16_to_cpu(de[...].nameoff) is not monotonically increasing?  I.e.
what's to prevent e.g. (unsigned)-1 ending up in dname.len?

Corrupted fs image shouldn't oops the kernel.. "

Revisit the related lookup flow to address the issue.

Fixes: d72d1ce60174 ("staging: erofs: add namei functions")
Cc: <stable@vger.kernel.org> # 4.19+
Suggested-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Gao Xiang <gaoxiang25@huawei.com>
---
[ It should be better get reviewed first and for linux-next... ]

change log v2:
 - fix the missing "kunmap_atomic" pointed out by Dan;
 - minor cleanup;

 drivers/staging/erofs/namei.c | 187 ++++++++++++++++++++++--------------------
 1 file changed, 99 insertions(+), 88 deletions(-)

diff --git a/drivers/staging/erofs/namei.c b/drivers/staging/erofs/namei.c
index 5596c52e246d..321c881d720f 100644
--- a/drivers/staging/erofs/namei.c
+++ b/drivers/staging/erofs/namei.c
@@ -15,74 +15,77 @@
 
 #include <trace/events/erofs.h>
 
-/* based on the value of qn->len is accurate */
-static inline int dirnamecmp(struct qstr *qn,
-	struct qstr *qd, unsigned int *matched)
+struct erofs_qstr {
+	const unsigned char *name;
+	const unsigned char *end;
+};
+
+/* based on the end of qn is accurate and it must have the trailing '\0' */
+static inline int dirnamecmp(const struct erofs_qstr *qn,
+			     const struct erofs_qstr *qd,
+			     unsigned int *matched)
 {
-	unsigned int i = *matched, len = min(qn->len, qd->len);
-loop:
-	if (unlikely(i >= len)) {
-		*matched = i;
-		if (qn->len < qd->len) {
-			/*
-			 * actually (qn->len == qd->len)
-			 * when qd->name[i] == '\0'
-			 */
-			return qd->name[i] == '\0' ? 0 : -1;
+	unsigned int i = *matched;
+
+	/*
+	 * on-disk error, let's only BUG_ON in the debugging mode.
+	 * otherwise, it will return 1 to just skip the invalid name
+	 * and go on (in consideration of the lookup performance).
+	 */
+	DBG_BUGON(qd->name > qd->end);
+
+	/* qd could not have trailing '\0' */
+	/* However it is absolutely safe if < qd->end */
+	while (qd->name + i < qd->end && qd->name[i] != '\0') {
+		if (qn->name[i] != qd->name[i]) {
+			*matched = i;
+			return qn->name[i] > qd->name[i] ? 1 : -1;
 		}
-		return (qn->len > qd->len);
+		++i;
 	}
-
-	if (qn->name[i] != qd->name[i]) {
-		*matched = i;
-		return qn->name[i] > qd->name[i] ? 1 : -1;
-	}
-
-	++i;
-	goto loop;
+	*matched = i;
+	/* See comments in __d_alloc on the terminating NUL character */
+	return qn->name[i] == '\0' ? 0 : 1;
 }
 
-static struct erofs_dirent *find_target_dirent(
-	struct qstr *name,
-	u8 *data, int maxsize)
+#define nameoff_from_disk(off, sz)	(le16_to_cpu(off) & ((sz) - 1))
+
+static struct erofs_dirent *find_target_dirent(struct erofs_qstr *name,
+					       u8 *data,
+					       unsigned int dirblksize,
+					       const int ndirents)
 {
-	unsigned int ndirents, head, back;
+	int head, back;
 	unsigned int startprfx, endprfx;
 	struct erofs_dirent *const de = (struct erofs_dirent *)data;
 
-	/* make sure that maxsize is valid */
-	BUG_ON(maxsize < sizeof(struct erofs_dirent));
-
-	ndirents = le16_to_cpu(de->nameoff) / sizeof(*de);
-
-	/* corrupted dir (may be unnecessary...) */
-	BUG_ON(!ndirents);
-
-	head = 0;
+	/* since the 1st dirent has been evaluated previously */
+	head = 1;
 	back = ndirents - 1;
 	startprfx = endprfx = 0;
 
 	while (head <= back) {
-		unsigned int mid = head + (back - head) / 2;
-		unsigned int nameoff = le16_to_cpu(de[mid].nameoff);
+		const int mid = head + (back - head) / 2;
+		const int nameoff = nameoff_from_disk(de[mid].nameoff,
+						      dirblksize);
 		unsigned int matched = min(startprfx, endprfx);
-
-		struct qstr dname = QSTR_INIT(data + nameoff,
-			unlikely(mid >= ndirents - 1) ?
-				maxsize - nameoff :
-				le16_to_cpu(de[mid + 1].nameoff) - nameoff);
+		struct erofs_qstr dname = {
+			.name = data + nameoff,
+			.end = unlikely(mid >= ndirents - 1) ?
+				data + dirblksize :
+				data + nameoff_from_disk(de[mid + 1].nameoff,
+							 dirblksize)
+		};
 
 		/* string comparison without already matched prefix */
 		int ret = dirnamecmp(name, &dname, &matched);
 
-		if (unlikely(!ret))
+		if (unlikely(!ret)) {
 			return de + mid;
-		else if (ret > 0) {
+		} else if (ret > 0) {
 			head = mid + 1;
 			startprfx = matched;
-		} else if (unlikely(mid < 1))	/* fix "mid" overflow */
-			break;
-		else {
+		} else {
 			back = mid - 1;
 			endprfx = matched;
 		}
@@ -91,12 +94,12 @@ static struct erofs_dirent *find_target_dirent(
 	return ERR_PTR(-ENOENT);
 }
 
-static struct page *find_target_block_classic(
-	struct inode *dir,
-	struct qstr *name, int *_diff)
+static struct page *find_target_block_classic(struct inode *dir,
+					      struct erofs_qstr *name,
+					      int *_ndirents)
 {
 	unsigned int startprfx, endprfx;
-	unsigned int head, back;
+	int head, back;
 	struct address_space *const mapping = dir->i_mapping;
 	struct page *candidate = ERR_PTR(-ENOENT);
 
@@ -105,41 +108,43 @@ static struct page *find_target_block_classic(
 	back = inode_datablocks(dir) - 1;
 
 	while (head <= back) {
-		unsigned int mid = head + (back - head) / 2;
+		const int mid = head + (back - head) / 2;
 		struct page *page = read_mapping_page(mapping, mid, NULL);
 
-		if (IS_ERR(page)) {
-exact_out:
-			if (!IS_ERR(candidate)) /* valid candidate */
-				put_page(candidate);
-			return page;
-		} else {
-			int diff;
-			unsigned int ndirents, matched;
-			struct qstr dname;
+		if (!IS_ERR(page)) {
 			struct erofs_dirent *de = kmap_atomic(page);
-			unsigned int nameoff = le16_to_cpu(de->nameoff);
-
-			ndirents = nameoff / sizeof(*de);
+			const int nameoff = nameoff_from_disk(de->nameoff,
+							      EROFS_BLKSIZ);
+			const int ndirents = nameoff / sizeof(*de);
+			int diff;
+			unsigned int matched;
+			struct erofs_qstr dname;
 
-			/* corrupted dir (should have one entry at least) */
-			BUG_ON(!ndirents || nameoff > PAGE_SIZE);
+			if (unlikely(!ndirents)) {
+				DBG_BUGON(1);
+				kunmap_atomic(de);
+				put_page(page);
+				page = ERR_PTR(-EIO);
+				goto out;
+			}
 
 			matched = min(startprfx, endprfx);
 
 			dname.name = (u8 *)de + nameoff;
-			dname.len = ndirents == 1 ?
-				/* since the rest of the last page is 0 */
-				EROFS_BLKSIZ - nameoff
-				: le16_to_cpu(de[1].nameoff) - nameoff;
+			if (ndirents == 1)
+				dname.end = (u8 *)de + EROFS_BLKSIZ;
+			else
+				dname.end = (u8 *)de +
+					nameoff_from_disk(de[1].nameoff,
+							  EROFS_BLKSIZ);
 
 			/* string comparison without already matched prefix */
 			diff = dirnamecmp(name, &dname, &matched);
 			kunmap_atomic(de);
 
 			if (unlikely(!diff)) {
-				*_diff = 0;
-				goto exact_out;
+				*_ndirents = 0;
+				goto out;
 			} else if (diff > 0) {
 				head = mid + 1;
 				startprfx = matched;
@@ -147,47 +152,53 @@ static struct page *find_target_block_classic(
 				if (likely(!IS_ERR(candidate)))
 					put_page(candidate);
 				candidate = page;
+				*_ndirents = ndirents;
 			} else {
 				put_page(page);
 
-				if (unlikely(mid < 1))	/* fix "mid" overflow */
-					break;
-
 				back = mid - 1;
 				endprfx = matched;
 			}
+			continue;
 		}
+out:		/* free if the candidate is valid */
+		if (!IS_ERR(candidate))
+			put_page(candidate);
+		return page;
 	}
-	*_diff = 1;
 	return candidate;
 }
 
 int erofs_namei(struct inode *dir,
-	struct qstr *name,
-	erofs_nid_t *nid, unsigned int *d_type)
+		struct qstr *name,
+		erofs_nid_t *nid, unsigned int *d_type)
 {
-	int diff;
+	int ndirents;
 	struct page *page;
-	u8 *data;
+	void *data;
 	struct erofs_dirent *de;
+	struct erofs_qstr qn;
 
 	if (unlikely(!dir->i_size))
 		return -ENOENT;
 
-	diff = 1;
-	page = find_target_block_classic(dir, name, &diff);
+	qn.name = name->name;
+	qn.end = name->name + name->len;
+
+	ndirents = 0;
+	page = find_target_block_classic(dir, &qn, &ndirents);
 
-	if (unlikely(IS_ERR(page)))
+	if (IS_ERR(page))
 		return PTR_ERR(page);
 
 	data = kmap_atomic(page);
 	/* the target page has been mapped */
-	de = likely(diff) ?
-		/* since the rest of the last page is 0 */
-		find_target_dirent(name, data, EROFS_BLKSIZ) :
-		(struct erofs_dirent *)data;
+	if (ndirents)
+		de = find_target_dirent(&qn, data, EROFS_BLKSIZ, ndirents);
+	else
+		de = (struct erofs_dirent *)data;
 
-	if (likely(!IS_ERR(de))) {
+	if (!IS_ERR(de)) {
 		*nid = le64_to_cpu(de->nid);
 		*d_type = de->file_type;
 	}
-- 
2.14.4


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] staging: erofs: keep corrupted fs from crashing kernel in erofs_namei()
  2019-02-01 12:16 [PATCH v2] staging: erofs: keep corrupted fs from crashing kernel in erofs_namei() Gao Xiang
@ 2019-02-12  5:05 ` Gao Xiang
  2019-02-15  7:02 ` Chao Yu
  1 sibling, 0 replies; 14+ messages in thread
From: Gao Xiang @ 2019-02-12  5:05 UTC (permalink / raw)
  To: Chao Yu, Al Viro, Dan Carpenter
  Cc: Gao Xiang, Greg Kroah-Hartman, devel, linux-erofs, LKML, stable,
	weidu.du, Miao Xie

kindly ping... some ideas about this patch v2? Thanks,

On 2019/2/1 20:16, Gao Xiang wrote:
> As Al pointed out, "
> ... and while we are at it, what happens to
> 	unsigned int nameoff = le16_to_cpu(de[mid].nameoff);
> 	unsigned int matched = min(startprfx, endprfx);
>
> 	struct qstr dname = QSTR_INIT(data + nameoff,
> 		unlikely(mid >= ndirents - 1) ?
> 			maxsize - nameoff :
> 			le16_to_cpu(de[mid + 1].nameoff) - nameoff);
>
> 	/* string comparison without already matched prefix */
> 	int ret = dirnamecmp(name, &dname, &matched);
> if le16_to_cpu(de[...].nameoff) is not monotonically increasing?  I.e.
> what's to prevent e.g. (unsigned)-1 ending up in dname.len?
>
> Corrupted fs image shouldn't oops the kernel.. "
>
> Revisit the related lookup flow to address the issue.
>
> Fixes: d72d1ce60174 ("staging: erofs: add namei functions")
> Cc: <stable@vger.kernel.org> # 4.19+
> Suggested-by: Al Viro <viro@ZenIV.linux.org.uk>
> Signed-off-by: Gao Xiang <gaoxiang25@huawei.com>
> ---
> [ It should be better get reviewed first and for linux-next... ]
>
> change log v2:
>  - fix the missing "kunmap_atomic" pointed out by Dan;
>  - minor cleanup;
>
>  drivers/staging/erofs/namei.c | 187 ++++++++++++++++++++++--------------------
>  1 file changed, 99 insertions(+), 88 deletions(-)
>
> diff --git a/drivers/staging/erofs/namei.c b/drivers/staging/erofs/namei.c
> index 5596c52e246d..321c881d720f 100644
> --- a/drivers/staging/erofs/namei.c
> +++ b/drivers/staging/erofs/namei.c
> @@ -15,74 +15,77 @@
>  
>  #include <trace/events/erofs.h>
>  
> -/* based on the value of qn->len is accurate */
> -static inline int dirnamecmp(struct qstr *qn,
> -	struct qstr *qd, unsigned int *matched)
> +struct erofs_qstr {
> +	const unsigned char *name;
> +	const unsigned char *end;
> +};
> +
> +/* based on the end of qn is accurate and it must have the trailing '\0' */
> +static inline int dirnamecmp(const struct erofs_qstr *qn,
> +			     const struct erofs_qstr *qd,
> +			     unsigned int *matched)
>  {
> -	unsigned int i = *matched, len = min(qn->len, qd->len);
> -loop:
> -	if (unlikely(i >= len)) {
> -		*matched = i;
> -		if (qn->len < qd->len) {
> -			/*
> -			 * actually (qn->len == qd->len)
> -			 * when qd->name[i] == '\0'
> -			 */
> -			return qd->name[i] == '\0' ? 0 : -1;
> +	unsigned int i = *matched;
> +
> +	/*
> +	 * on-disk error, let's only BUG_ON in the debugging mode.
> +	 * otherwise, it will return 1 to just skip the invalid name
> +	 * and go on (in consideration of the lookup performance).
> +	 */
> +	DBG_BUGON(qd->name > qd->end);
> +
> +	/* qd could not have trailing '\0' */
> +	/* However it is absolutely safe if < qd->end */
> +	while (qd->name + i < qd->end && qd->name[i] != '\0') {
> +		if (qn->name[i] != qd->name[i]) {
> +			*matched = i;
> +			return qn->name[i] > qd->name[i] ? 1 : -1;
>  		}
> -		return (qn->len > qd->len);
> +		++i;
>  	}
> -
> -	if (qn->name[i] != qd->name[i]) {
> -		*matched = i;
> -		return qn->name[i] > qd->name[i] ? 1 : -1;
> -	}
> -
> -	++i;
> -	goto loop;
> +	*matched = i;
> +	/* See comments in __d_alloc on the terminating NUL character */
> +	return qn->name[i] == '\0' ? 0 : 1;
>  }
>  
> -static struct erofs_dirent *find_target_dirent(
> -	struct qstr *name,
> -	u8 *data, int maxsize)
> +#define nameoff_from_disk(off, sz)	(le16_to_cpu(off) & ((sz) - 1))
> +
> +static struct erofs_dirent *find_target_dirent(struct erofs_qstr *name,
> +					       u8 *data,
> +					       unsigned int dirblksize,
> +					       const int ndirents)
>  {
> -	unsigned int ndirents, head, back;
> +	int head, back;
>  	unsigned int startprfx, endprfx;
>  	struct erofs_dirent *const de = (struct erofs_dirent *)data;
>  
> -	/* make sure that maxsize is valid */
> -	BUG_ON(maxsize < sizeof(struct erofs_dirent));
> -
> -	ndirents = le16_to_cpu(de->nameoff) / sizeof(*de);
> -
> -	/* corrupted dir (may be unnecessary...) */
> -	BUG_ON(!ndirents);
> -
> -	head = 0;
> +	/* since the 1st dirent has been evaluated previously */
> +	head = 1;
>  	back = ndirents - 1;
>  	startprfx = endprfx = 0;
>  
>  	while (head <= back) {
> -		unsigned int mid = head + (back - head) / 2;
> -		unsigned int nameoff = le16_to_cpu(de[mid].nameoff);
> +		const int mid = head + (back - head) / 2;
> +		const int nameoff = nameoff_from_disk(de[mid].nameoff,
> +						      dirblksize);
>  		unsigned int matched = min(startprfx, endprfx);
> -
> -		struct qstr dname = QSTR_INIT(data + nameoff,
> -			unlikely(mid >= ndirents - 1) ?
> -				maxsize - nameoff :
> -				le16_to_cpu(de[mid + 1].nameoff) - nameoff);
> +		struct erofs_qstr dname = {
> +			.name = data + nameoff,
> +			.end = unlikely(mid >= ndirents - 1) ?
> +				data + dirblksize :
> +				data + nameoff_from_disk(de[mid + 1].nameoff,
> +							 dirblksize)
> +		};
>  
>  		/* string comparison without already matched prefix */
>  		int ret = dirnamecmp(name, &dname, &matched);
>  
> -		if (unlikely(!ret))
> +		if (unlikely(!ret)) {
>  			return de + mid;
> -		else if (ret > 0) {
> +		} else if (ret > 0) {
>  			head = mid + 1;
>  			startprfx = matched;
> -		} else if (unlikely(mid < 1))	/* fix "mid" overflow */
> -			break;
> -		else {
> +		} else {
>  			back = mid - 1;
>  			endprfx = matched;
>  		}
> @@ -91,12 +94,12 @@ static struct erofs_dirent *find_target_dirent(
>  	return ERR_PTR(-ENOENT);
>  }
>  
> -static struct page *find_target_block_classic(
> -	struct inode *dir,
> -	struct qstr *name, int *_diff)
> +static struct page *find_target_block_classic(struct inode *dir,
> +					      struct erofs_qstr *name,
> +					      int *_ndirents)
>  {
>  	unsigned int startprfx, endprfx;
> -	unsigned int head, back;
> +	int head, back;
>  	struct address_space *const mapping = dir->i_mapping;
>  	struct page *candidate = ERR_PTR(-ENOENT);
>  
> @@ -105,41 +108,43 @@ static struct page *find_target_block_classic(
>  	back = inode_datablocks(dir) - 1;
>  
>  	while (head <= back) {
> -		unsigned int mid = head + (back - head) / 2;
> +		const int mid = head + (back - head) / 2;
>  		struct page *page = read_mapping_page(mapping, mid, NULL);
>  
> -		if (IS_ERR(page)) {
> -exact_out:
> -			if (!IS_ERR(candidate)) /* valid candidate */
> -				put_page(candidate);
> -			return page;
> -		} else {
> -			int diff;
> -			unsigned int ndirents, matched;
> -			struct qstr dname;
> +		if (!IS_ERR(page)) {
>  			struct erofs_dirent *de = kmap_atomic(page);
> -			unsigned int nameoff = le16_to_cpu(de->nameoff);
> -
> -			ndirents = nameoff / sizeof(*de);
> +			const int nameoff = nameoff_from_disk(de->nameoff,
> +							      EROFS_BLKSIZ);
> +			const int ndirents = nameoff / sizeof(*de);
> +			int diff;
> +			unsigned int matched;
> +			struct erofs_qstr dname;
>  
> -			/* corrupted dir (should have one entry at least) */
> -			BUG_ON(!ndirents || nameoff > PAGE_SIZE);
> +			if (unlikely(!ndirents)) {
> +				DBG_BUGON(1);
> +				kunmap_atomic(de);
> +				put_page(page);
> +				page = ERR_PTR(-EIO);
> +				goto out;
> +			}
>  
>  			matched = min(startprfx, endprfx);
>  
>  			dname.name = (u8 *)de + nameoff;
> -			dname.len = ndirents == 1 ?
> -				/* since the rest of the last page is 0 */
> -				EROFS_BLKSIZ - nameoff
> -				: le16_to_cpu(de[1].nameoff) - nameoff;
> +			if (ndirents == 1)
> +				dname.end = (u8 *)de + EROFS_BLKSIZ;
> +			else
> +				dname.end = (u8 *)de +
> +					nameoff_from_disk(de[1].nameoff,
> +							  EROFS_BLKSIZ);
>  
>  			/* string comparison without already matched prefix */
>  			diff = dirnamecmp(name, &dname, &matched);
>  			kunmap_atomic(de);
>  
>  			if (unlikely(!diff)) {
> -				*_diff = 0;
> -				goto exact_out;
> +				*_ndirents = 0;
> +				goto out;
>  			} else if (diff > 0) {
>  				head = mid + 1;
>  				startprfx = matched;
> @@ -147,47 +152,53 @@ static struct page *find_target_block_classic(
>  				if (likely(!IS_ERR(candidate)))
>  					put_page(candidate);
>  				candidate = page;
> +				*_ndirents = ndirents;
>  			} else {
>  				put_page(page);
>  
> -				if (unlikely(mid < 1))	/* fix "mid" overflow */
> -					break;
> -
>  				back = mid - 1;
>  				endprfx = matched;
>  			}
> +			continue;
>  		}
> +out:		/* free if the candidate is valid */
> +		if (!IS_ERR(candidate))
> +			put_page(candidate);
> +		return page;
>  	}
> -	*_diff = 1;
>  	return candidate;
>  }
>  
>  int erofs_namei(struct inode *dir,
> -	struct qstr *name,
> -	erofs_nid_t *nid, unsigned int *d_type)
> +		struct qstr *name,
> +		erofs_nid_t *nid, unsigned int *d_type)
>  {
> -	int diff;
> +	int ndirents;
>  	struct page *page;
> -	u8 *data;
> +	void *data;
>  	struct erofs_dirent *de;
> +	struct erofs_qstr qn;
>  
>  	if (unlikely(!dir->i_size))
>  		return -ENOENT;
>  
> -	diff = 1;
> -	page = find_target_block_classic(dir, name, &diff);
> +	qn.name = name->name;
> +	qn.end = name->name + name->len;
> +
> +	ndirents = 0;
> +	page = find_target_block_classic(dir, &qn, &ndirents);
>  
> -	if (unlikely(IS_ERR(page)))
> +	if (IS_ERR(page))
>  		return PTR_ERR(page);
>  
>  	data = kmap_atomic(page);
>  	/* the target page has been mapped */
> -	de = likely(diff) ?
> -		/* since the rest of the last page is 0 */
> -		find_target_dirent(name, data, EROFS_BLKSIZ) :
> -		(struct erofs_dirent *)data;
> +	if (ndirents)
> +		de = find_target_dirent(&qn, data, EROFS_BLKSIZ, ndirents);
> +	else
> +		de = (struct erofs_dirent *)data;
>  
> -	if (likely(!IS_ERR(de))) {
> +	if (!IS_ERR(de)) {
>  		*nid = le64_to_cpu(de->nid);
>  		*d_type = de->file_type;
>  	}

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] staging: erofs: keep corrupted fs from crashing kernel in erofs_namei()
  2019-02-01 12:16 [PATCH v2] staging: erofs: keep corrupted fs from crashing kernel in erofs_namei() Gao Xiang
  2019-02-12  5:05 ` Gao Xiang
@ 2019-02-15  7:02 ` Chao Yu
  2019-02-15  7:57   ` Dan Carpenter
  2019-02-15  8:58   ` Gao Xiang
  1 sibling, 2 replies; 14+ messages in thread
From: Chao Yu @ 2019-02-15  7:02 UTC (permalink / raw)
  To: Gao Xiang, Al Viro, Greg Kroah-Hartman, devel
  Cc: LKML, linux-erofs, Chao Yu, Miao Xie, weidu.du, Fang Wei, stable

On 2019/2/1 20:16, Gao Xiang wrote:
> As Al pointed out, "
> ... and while we are at it, what happens to
> 	unsigned int nameoff = le16_to_cpu(de[mid].nameoff);
> 	unsigned int matched = min(startprfx, endprfx);
> 
> 	struct qstr dname = QSTR_INIT(data + nameoff,
> 		unlikely(mid >= ndirents - 1) ?
> 			maxsize - nameoff :
> 			le16_to_cpu(de[mid + 1].nameoff) - nameoff);
> 
> 	/* string comparison without already matched prefix */
> 	int ret = dirnamecmp(name, &dname, &matched);
> if le16_to_cpu(de[...].nameoff) is not monotonically increasing?  I.e.
> what's to prevent e.g. (unsigned)-1 ending up in dname.len?
> 
> Corrupted fs image shouldn't oops the kernel.. "
> 
> Revisit the related lookup flow to address the issue.
> 
> Fixes: d72d1ce60174 ("staging: erofs: add namei functions")
> Cc: <stable@vger.kernel.org> # 4.19+
> Suggested-by: Al Viro <viro@ZenIV.linux.org.uk>
> Signed-off-by: Gao Xiang <gaoxiang25@huawei.com>
> ---
> [ It should be better get reviewed first and for linux-next... ]
> 
> change log v2:
>  - fix the missing "kunmap_atomic" pointed out by Dan;
>  - minor cleanup;
> 
>  drivers/staging/erofs/namei.c | 187 ++++++++++++++++++++++--------------------
>  1 file changed, 99 insertions(+), 88 deletions(-)
> 
> diff --git a/drivers/staging/erofs/namei.c b/drivers/staging/erofs/namei.c
> index 5596c52e246d..321c881d720f 100644
> --- a/drivers/staging/erofs/namei.c
> +++ b/drivers/staging/erofs/namei.c
> @@ -15,74 +15,77 @@
>  
>  #include <trace/events/erofs.h>
>  
> -/* based on the value of qn->len is accurate */
> -static inline int dirnamecmp(struct qstr *qn,
> -	struct qstr *qd, unsigned int *matched)
> +struct erofs_qstr {
> +	const unsigned char *name;
> +	const unsigned char *end;
> +};
> +
> +/* based on the end of qn is accurate and it must have the trailing '\0' */
> +static inline int dirnamecmp(const struct erofs_qstr *qn,
> +			     const struct erofs_qstr *qd,
> +			     unsigned int *matched)
>  {
> -	unsigned int i = *matched, len = min(qn->len, qd->len);
> -loop:
> -	if (unlikely(i >= len)) {
> -		*matched = i;
> -		if (qn->len < qd->len) {
> -			/*
> -			 * actually (qn->len == qd->len)
> -			 * when qd->name[i] == '\0'
> -			 */
> -			return qd->name[i] == '\0' ? 0 : -1;
> +	unsigned int i = *matched;
> +
> +	/*
> +	 * on-disk error, let's only BUG_ON in the debugging mode.
> +	 * otherwise, it will return 1 to just skip the invalid name
> +	 * and go on (in consideration of the lookup performance).
> +	 */
> +	DBG_BUGON(qd->name > qd->end);

qd->name == qd->end is not allowed as well?

So will it be better to return directly here?

	if (unlikely(qd->name >= qd->end)) {
		DBG_BUGON(1);
		return 1;
	}

> +
> +	/* qd could not have trailing '\0' */
> +	/* However it is absolutely safe if < qd->end */
> +	while (qd->name + i < qd->end && qd->name[i] != '\0') {
> +		if (qn->name[i] != qd->name[i]) {
> +			*matched = i;
> +			return qn->name[i] > qd->name[i] ? 1 : -1;
>  		}
> -		return (qn->len > qd->len);
> +		++i;
>  	}
> -
> -	if (qn->name[i] != qd->name[i]) {
> -		*matched = i;
> -		return qn->name[i] > qd->name[i] ? 1 : -1;
> -	}
> -
> -	++i;
> -	goto loop;
> +	*matched = i;
> +	/* See comments in __d_alloc on the terminating NUL character */
> +	return qn->name[i] == '\0' ? 0 : 1;
>  }
>  
> -static struct erofs_dirent *find_target_dirent(
> -	struct qstr *name,
> -	u8 *data, int maxsize)
> +#define nameoff_from_disk(off, sz)	(le16_to_cpu(off) & ((sz) - 1))
> +
> +static struct erofs_dirent *find_target_dirent(struct erofs_qstr *name,
> +					       u8 *data,
> +					       unsigned int dirblksize,
> +					       const int ndirents)
>  {
> -	unsigned int ndirents, head, back;
> +	int head, back;
>  	unsigned int startprfx, endprfx;
>  	struct erofs_dirent *const de = (struct erofs_dirent *)data;
>  
> -	/* make sure that maxsize is valid */
> -	BUG_ON(maxsize < sizeof(struct erofs_dirent));
> -
> -	ndirents = le16_to_cpu(de->nameoff) / sizeof(*de);
> -
> -	/* corrupted dir (may be unnecessary...) */
> -	BUG_ON(!ndirents);
> -
> -	head = 0;
> +	/* since the 1st dirent has been evaluated previously */
> +	head = 1;
>  	back = ndirents - 1;
>  	startprfx = endprfx = 0;
>  
>  	while (head <= back) {
> -		unsigned int mid = head + (back - head) / 2;
> -		unsigned int nameoff = le16_to_cpu(de[mid].nameoff);
> +		const int mid = head + (back - head) / 2;
> +		const int nameoff = nameoff_from_disk(de[mid].nameoff,
> +						      dirblksize);
>  		unsigned int matched = min(startprfx, endprfx);
> -
> -		struct qstr dname = QSTR_INIT(data + nameoff,
> -			unlikely(mid >= ndirents - 1) ?
> -				maxsize - nameoff :
> -				le16_to_cpu(de[mid + 1].nameoff) - nameoff);
> +		struct erofs_qstr dname = {
> +			.name = data + nameoff,
> +			.end = unlikely(mid >= ndirents - 1) ?
> +				data + dirblksize :
> +				data + nameoff_from_disk(de[mid + 1].nameoff,
> +							 dirblksize)
> +		};
>  
>  		/* string comparison without already matched prefix */
>  		int ret = dirnamecmp(name, &dname, &matched);
>  
> -		if (unlikely(!ret))
> +		if (unlikely(!ret)) {
>  			return de + mid;
> -		else if (ret > 0) {
> +		} else if (ret > 0) {
>  			head = mid + 1;
>  			startprfx = matched;
> -		} else if (unlikely(mid < 1))	/* fix "mid" overflow */
> -			break;
> -		else {
> +		} else {
>  			back = mid - 1;
>  			endprfx = matched;
>  		}
> @@ -91,12 +94,12 @@ static struct erofs_dirent *find_target_dirent(
>  	return ERR_PTR(-ENOENT);
>  }
>  
> -static struct page *find_target_block_classic(
> -	struct inode *dir,
> -	struct qstr *name, int *_diff)
> +static struct page *find_target_block_classic(struct inode *dir,
> +					      struct erofs_qstr *name,
> +					      int *_ndirents)
>  {
>  	unsigned int startprfx, endprfx;
> -	unsigned int head, back;
> +	int head, back;
>  	struct address_space *const mapping = dir->i_mapping;
>  	struct page *candidate = ERR_PTR(-ENOENT);
>  
> @@ -105,41 +108,43 @@ static struct page *find_target_block_classic(
>  	back = inode_datablocks(dir) - 1;
>  
>  	while (head <= back) {
> -		unsigned int mid = head + (back - head) / 2;
> +		const int mid = head + (back - head) / 2;
>  		struct page *page = read_mapping_page(mapping, mid, NULL);
>  
> -		if (IS_ERR(page)) {
> -exact_out:
> -			if (!IS_ERR(candidate)) /* valid candidate */
> -				put_page(candidate);
> -			return page;
> -		} else {
> -			int diff;
> -			unsigned int ndirents, matched;
> -			struct qstr dname;
> +		if (!IS_ERR(page)) {
>  			struct erofs_dirent *de = kmap_atomic(page);
> -			unsigned int nameoff = le16_to_cpu(de->nameoff);
> -
> -			ndirents = nameoff / sizeof(*de);
> +			const int nameoff = nameoff_from_disk(de->nameoff,
> +							      EROFS_BLKSIZ);
> +			const int ndirents = nameoff / sizeof(*de);
> +			int diff;
> +			unsigned int matched;
> +			struct erofs_qstr dname;
>  
> -			/* corrupted dir (should have one entry at least) */
> -			BUG_ON(!ndirents || nameoff > PAGE_SIZE);
> +			if (unlikely(!ndirents)) {
> +				DBG_BUGON(1);
> +				kunmap_atomic(de);
> +				put_page(page);
> +				page = ERR_PTR(-EIO);
> +				goto out;
> +			}
>  
>  			matched = min(startprfx, endprfx);
>  
>  			dname.name = (u8 *)de + nameoff;
> -			dname.len = ndirents == 1 ?
> -				/* since the rest of the last page is 0 */
> -				EROFS_BLKSIZ - nameoff
> -				: le16_to_cpu(de[1].nameoff) - nameoff;
> +			if (ndirents == 1)
> +				dname.end = (u8 *)de + EROFS_BLKSIZ;
> +			else
> +				dname.end = (u8 *)de +
> +					nameoff_from_disk(de[1].nameoff,
> +							  EROFS_BLKSIZ);
>  
>  			/* string comparison without already matched prefix */
>  			diff = dirnamecmp(name, &dname, &matched);
>  			kunmap_atomic(de);
>  
>  			if (unlikely(!diff)) {
> -				*_diff = 0;
> -				goto exact_out;
> +				*_ndirents = 0;
> +				goto out;
>  			} else if (diff > 0) {
>  				head = mid + 1;
>  				startprfx = matched;
> @@ -147,47 +152,53 @@ static struct page *find_target_block_classic(
>  				if (likely(!IS_ERR(candidate)))
>  					put_page(candidate);
>  				candidate = page;
> +				*_ndirents = ndirents;
>  			} else {
>  				put_page(page);
>  
> -				if (unlikely(mid < 1))	/* fix "mid" overflow */
> -					break;
> -
>  				back = mid - 1;
>  				endprfx = matched;
>  			}
> +			continue;
>  		}
> +out:		/* free if the candidate is valid */
> +		if (!IS_ERR(candidate))
> +			put_page(candidate);
> +		return page;
>  	}
> -	*_diff = 1;
>  	return candidate;
>  }
>  
>  int erofs_namei(struct inode *dir,
> -	struct qstr *name,
> -	erofs_nid_t *nid, unsigned int *d_type)
> +		struct qstr *name,
> +		erofs_nid_t *nid, unsigned int *d_type)
>  {
> -	int diff;
> +	int ndirents;
>  	struct page *page;
> -	u8 *data;
> +	void *data;
>  	struct erofs_dirent *de;
> +	struct erofs_qstr qn;
>  
>  	if (unlikely(!dir->i_size))
>  		return -ENOENT;
>  
> -	diff = 1;
> -	page = find_target_block_classic(dir, name, &diff);
> +	qn.name = name->name;
> +	qn.end = name->name + name->len;
> +
> +	ndirents = 0;
> +	page = find_target_block_classic(dir, &qn, &ndirents);
>  
> -	if (unlikely(IS_ERR(page)))
> +	if (IS_ERR(page))
>  		return PTR_ERR(page);
>  
>  	data = kmap_atomic(page);
>  	/* the target page has been mapped */
> -	de = likely(diff) ?
> -		/* since the rest of the last page is 0 */
> -		find_target_dirent(name, data, EROFS_BLKSIZ) :
> -		(struct erofs_dirent *)data;
> +	if (ndirents)
> +		de = find_target_dirent(&qn, data, EROFS_BLKSIZ, ndirents);

If we want to support different blksize during dirent lookup, how about
adding parameter for find_target_block_classic() as find_target_dirent()?

Thanks,

> +	else
> +		de = (struct erofs_dirent *)data;
>  
> -	if (likely(!IS_ERR(de))) {
> +	if (!IS_ERR(de)) {
>  		*nid = le64_to_cpu(de->nid);
>  		*d_type = de->file_type;
>  	}
> 


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] staging: erofs: keep corrupted fs from crashing kernel in erofs_namei()
  2019-02-15  7:02 ` Chao Yu
@ 2019-02-15  7:57   ` Dan Carpenter
  2019-02-15  9:04     ` Gao Xiang
  2019-02-15  9:32     ` Chao Yu
  2019-02-15  8:58   ` Gao Xiang
  1 sibling, 2 replies; 14+ messages in thread
From: Dan Carpenter @ 2019-02-15  7:57 UTC (permalink / raw)
  To: Chao Yu
  Cc: Gao Xiang, Al Viro, Greg Kroah-Hartman, devel, linux-erofs,
	Chao Yu, LKML, stable, weidu.du, Fang Wei, Miao Xie

On Fri, Feb 15, 2019 at 03:02:25PM +0800, Chao Yu wrote:
> On 2019/2/1 20:16, Gao Xiang wrote:
> > +	/*
> > +	 * on-disk error, let's only BUG_ON in the debugging mode.
> > +	 * otherwise, it will return 1 to just skip the invalid name
> > +	 * and go on (in consideration of the lookup performance).
> > +	 */
> > +	DBG_BUGON(qd->name > qd->end);
> 
> qd->name == qd->end is not allowed as well?
> 
> So will it be better to return directly here?
> 
> 	if (unlikely(qd->name >= qd->end)) {
> 		DBG_BUGON(1);
> 		return 1;
> 	}

Please don't add likely/unlikely() annotations unless you have
benchmarked it and it makes a difference.

regards,
dan carpenter



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] staging: erofs: keep corrupted fs from crashing kernel in erofs_namei()
  2019-02-15  7:02 ` Chao Yu
  2019-02-15  7:57   ` Dan Carpenter
@ 2019-02-15  8:58   ` Gao Xiang
  2019-02-18  1:39     ` Chao Yu
  1 sibling, 1 reply; 14+ messages in thread
From: Gao Xiang @ 2019-02-15  8:58 UTC (permalink / raw)
  To: Chao Yu
  Cc: Al Viro, Greg Kroah-Hartman, devel, LKML, linux-erofs, Chao Yu,
	Miao Xie, weidu.du, Fang Wei, stable

Hi Chao,

On 2019/2/15 15:02, Chao Yu wrote:
> On 2019/2/1 20:16, Gao Xiang wrote:
>> As Al pointed out, "
>> ... and while we are at it, what happens to
>> 	unsigned int nameoff = le16_to_cpu(de[mid].nameoff);
>> 	unsigned int matched = min(startprfx, endprfx);
>>
>> 	struct qstr dname = QSTR_INIT(data + nameoff,
>> 		unlikely(mid >= ndirents - 1) ?
>> 			maxsize - nameoff :
>> 			le16_to_cpu(de[mid + 1].nameoff) - nameoff);
>>
>> 	/* string comparison without already matched prefix */
>> 	int ret = dirnamecmp(name, &dname, &matched);
>> if le16_to_cpu(de[...].nameoff) is not monotonically increasing?  I.e.
>> what's to prevent e.g. (unsigned)-1 ending up in dname.len?
>>
>> Corrupted fs image shouldn't oops the kernel.. "
>>
>> Revisit the related lookup flow to address the issue.
>>
>> Fixes: d72d1ce60174 ("staging: erofs: add namei functions")
>> Cc: <stable@vger.kernel.org> # 4.19+
>> Suggested-by: Al Viro <viro@ZenIV.linux.org.uk>
>> Signed-off-by: Gao Xiang <gaoxiang25@huawei.com>
>> ---
>> [ It should be better get reviewed first and for linux-next... ]
>>
>> change log v2:
>>  - fix the missing "kunmap_atomic" pointed out by Dan;
>>  - minor cleanup;
>>
>>  drivers/staging/erofs/namei.c | 187 ++++++++++++++++++++++--------------------
>>  1 file changed, 99 insertions(+), 88 deletions(-)
>>
>> diff --git a/drivers/staging/erofs/namei.c b/drivers/staging/erofs/namei.c
>> index 5596c52e246d..321c881d720f 100644
>> --- a/drivers/staging/erofs/namei.c
>> +++ b/drivers/staging/erofs/namei.c
>> @@ -15,74 +15,77 @@
>>  
>>  #include <trace/events/erofs.h>
>>  
>> -/* based on the value of qn->len is accurate */
>> -static inline int dirnamecmp(struct qstr *qn,
>> -	struct qstr *qd, unsigned int *matched)
>> +struct erofs_qstr {
>> +	const unsigned char *name;
>> +	const unsigned char *end;
>> +};
>> +
>> +/* based on the end of qn is accurate and it must have the trailing '\0' */
>> +static inline int dirnamecmp(const struct erofs_qstr *qn,
>> +			     const struct erofs_qstr *qd,
>> +			     unsigned int *matched)
>>  {
>> -	unsigned int i = *matched, len = min(qn->len, qd->len);
>> -loop:
>> -	if (unlikely(i >= len)) {
>> -		*matched = i;
>> -		if (qn->len < qd->len) {
>> -			/*
>> -			 * actually (qn->len == qd->len)
>> -			 * when qd->name[i] == '\0'
>> -			 */
>> -			return qd->name[i] == '\0' ? 0 : -1;
>> +	unsigned int i = *matched;
>> +
>> +	/*
>> +	 * on-disk error, let's only BUG_ON in the debugging mode.
>> +	 * otherwise, it will return 1 to just skip the invalid name
>> +	 * and go on (in consideration of the lookup performance).
>> +	 */
>> +	DBG_BUGON(qd->name > qd->end);
> 
> qd->name == qd->end is not allowed as well?

Make sense, it is only used for debugging mode, let me send another patch later...

> 
> So will it be better to return directly here?
> 
> 	if (unlikely(qd->name >= qd->end)) {
> 		DBG_BUGON(1);
> 		return 1;
> 	}

Corrupted image is rare happened in normal systems, I tend to only use
DBG_BUGON here, and  qn->name[i] is of course not '\0', so it will return 1;

> 
>> +
>> +	/* qd could not have trailing '\0' */
>> +	/* However it is absolutely safe if < qd->end */
>> +	while (qd->name + i < qd->end && qd->name[i] != '\0') {
>> +		if (qn->name[i] != qd->name[i]) {
>> +			*matched = i;
>> +			return qn->name[i] > qd->name[i] ? 1 : -1;
>>  		}
>> -		return (qn->len > qd->len);
>> +		++i;
>>  	}
>> -
>> -	if (qn->name[i] != qd->name[i]) {
>> -		*matched = i;
>> -		return qn->name[i] > qd->name[i] ? 1 : -1;
>> -	}
>> -
>> -	++i;
>> -	goto loop;
>> +	*matched = i;
>> +	/* See comments in __d_alloc on the terminating NUL character */
>> +	return qn->name[i] == '\0' ? 0 : 1;
>>  }
>>  
>> -static struct erofs_dirent *find_target_dirent(
>> -	struct qstr *name,
>> -	u8 *data, int maxsize)
>> +#define nameoff_from_disk(off, sz)	(le16_to_cpu(off) & ((sz) - 1))
>> +
>> +static struct erofs_dirent *find_target_dirent(struct erofs_qstr *name,
>> +					       u8 *data,
>> +					       unsigned int dirblksize,
>> +					       const int ndirents)
>>  {
>> -	unsigned int ndirents, head, back;
>> +	int head, back;
>>  	unsigned int startprfx, endprfx;
>>  	struct erofs_dirent *const de = (struct erofs_dirent *)data;
>>  
>> -	/* make sure that maxsize is valid */
>> -	BUG_ON(maxsize < sizeof(struct erofs_dirent));
>> -
>> -	ndirents = le16_to_cpu(de->nameoff) / sizeof(*de);
>> -
>> -	/* corrupted dir (may be unnecessary...) */
>> -	BUG_ON(!ndirents);
>> -
>> -	head = 0;
>> +	/* since the 1st dirent has been evaluated previously */
>> +	head = 1;
>>  	back = ndirents - 1;
>>  	startprfx = endprfx = 0;
>>  
>>  	while (head <= back) {
>> -		unsigned int mid = head + (back - head) / 2;
>> -		unsigned int nameoff = le16_to_cpu(de[mid].nameoff);
>> +		const int mid = head + (back - head) / 2;
>> +		const int nameoff = nameoff_from_disk(de[mid].nameoff,
>> +						      dirblksize);
>>  		unsigned int matched = min(startprfx, endprfx);
>> -
>> -		struct qstr dname = QSTR_INIT(data + nameoff,
>> -			unlikely(mid >= ndirents - 1) ?
>> -				maxsize - nameoff :
>> -				le16_to_cpu(de[mid + 1].nameoff) - nameoff);
>> +		struct erofs_qstr dname = {
>> +			.name = data + nameoff,
>> +			.end = unlikely(mid >= ndirents - 1) ?
>> +				data + dirblksize :
>> +				data + nameoff_from_disk(de[mid + 1].nameoff,
>> +							 dirblksize)
>> +		};
>>  
>>  		/* string comparison without already matched prefix */
>>  		int ret = dirnamecmp(name, &dname, &matched);
>>  
>> -		if (unlikely(!ret))
>> +		if (unlikely(!ret)) {
>>  			return de + mid;
>> -		else if (ret > 0) {
>> +		} else if (ret > 0) {
>>  			head = mid + 1;
>>  			startprfx = matched;
>> -		} else if (unlikely(mid < 1))	/* fix "mid" overflow */
>> -			break;
>> -		else {
>> +		} else {
>>  			back = mid - 1;
>>  			endprfx = matched;
>>  		}
>> @@ -91,12 +94,12 @@ static struct erofs_dirent *find_target_dirent(
>>  	return ERR_PTR(-ENOENT);
>>  }
>>  
>> -static struct page *find_target_block_classic(
>> -	struct inode *dir,
>> -	struct qstr *name, int *_diff)
>> +static struct page *find_target_block_classic(struct inode *dir,
>> +					      struct erofs_qstr *name,
>> +					      int *_ndirents)
>>  {
>>  	unsigned int startprfx, endprfx;
>> -	unsigned int head, back;
>> +	int head, back;
>>  	struct address_space *const mapping = dir->i_mapping;
>>  	struct page *candidate = ERR_PTR(-ENOENT);
>>  
>> @@ -105,41 +108,43 @@ static struct page *find_target_block_classic(
>>  	back = inode_datablocks(dir) - 1;
>>  
>>  	while (head <= back) {
>> -		unsigned int mid = head + (back - head) / 2;
>> +		const int mid = head + (back - head) / 2;
>>  		struct page *page = read_mapping_page(mapping, mid, NULL);
>>  
>> -		if (IS_ERR(page)) {
>> -exact_out:
>> -			if (!IS_ERR(candidate)) /* valid candidate */
>> -				put_page(candidate);
>> -			return page;
>> -		} else {
>> -			int diff;
>> -			unsigned int ndirents, matched;
>> -			struct qstr dname;
>> +		if (!IS_ERR(page)) {
>>  			struct erofs_dirent *de = kmap_atomic(page);
>> -			unsigned int nameoff = le16_to_cpu(de->nameoff);
>> -
>> -			ndirents = nameoff / sizeof(*de);
>> +			const int nameoff = nameoff_from_disk(de->nameoff,
>> +							      EROFS_BLKSIZ);
>> +			const int ndirents = nameoff / sizeof(*de);
>> +			int diff;
>> +			unsigned int matched;
>> +			struct erofs_qstr dname;
>>  
>> -			/* corrupted dir (should have one entry at least) */
>> -			BUG_ON(!ndirents || nameoff > PAGE_SIZE);
>> +			if (unlikely(!ndirents)) {
>> +				DBG_BUGON(1);
>> +				kunmap_atomic(de);
>> +				put_page(page);
>> +				page = ERR_PTR(-EIO);
>> +				goto out;
>> +			}
>>  
>>  			matched = min(startprfx, endprfx);
>>  
>>  			dname.name = (u8 *)de + nameoff;
>> -			dname.len = ndirents == 1 ?
>> -				/* since the rest of the last page is 0 */
>> -				EROFS_BLKSIZ - nameoff
>> -				: le16_to_cpu(de[1].nameoff) - nameoff;
>> +			if (ndirents == 1)
>> +				dname.end = (u8 *)de + EROFS_BLKSIZ;
>> +			else
>> +				dname.end = (u8 *)de +
>> +					nameoff_from_disk(de[1].nameoff,
>> +							  EROFS_BLKSIZ);
>>  
>>  			/* string comparison without already matched prefix */
>>  			diff = dirnamecmp(name, &dname, &matched);
>>  			kunmap_atomic(de);
>>  
>>  			if (unlikely(!diff)) {
>> -				*_diff = 0;
>> -				goto exact_out;
>> +				*_ndirents = 0;
>> +				goto out;
>>  			} else if (diff > 0) {
>>  				head = mid + 1;
>>  				startprfx = matched;
>> @@ -147,47 +152,53 @@ static struct page *find_target_block_classic(
>>  				if (likely(!IS_ERR(candidate)))
>>  					put_page(candidate);
>>  				candidate = page;
>> +				*_ndirents = ndirents;
>>  			} else {
>>  				put_page(page);
>>  
>> -				if (unlikely(mid < 1))	/* fix "mid" overflow */
>> -					break;
>> -
>>  				back = mid - 1;
>>  				endprfx = matched;
>>  			}
>> +			continue;
>>  		}
>> +out:		/* free if the candidate is valid */
>> +		if (!IS_ERR(candidate))
>> +			put_page(candidate);
>> +		return page;
>>  	}
>> -	*_diff = 1;
>>  	return candidate;
>>  }
>>  
>>  int erofs_namei(struct inode *dir,
>> -	struct qstr *name,
>> -	erofs_nid_t *nid, unsigned int *d_type)
>> +		struct qstr *name,
>> +		erofs_nid_t *nid, unsigned int *d_type)
>>  {
>> -	int diff;
>> +	int ndirents;
>>  	struct page *page;
>> -	u8 *data;
>> +	void *data;
>>  	struct erofs_dirent *de;
>> +	struct erofs_qstr qn;
>>  
>>  	if (unlikely(!dir->i_size))
>>  		return -ENOENT;
>>  
>> -	diff = 1;
>> -	page = find_target_block_classic(dir, name, &diff);
>> +	qn.name = name->name;
>> +	qn.end = name->name + name->len;
>> +
>> +	ndirents = 0;
>> +	page = find_target_block_classic(dir, &qn, &ndirents);
>>  
>> -	if (unlikely(IS_ERR(page)))
>> +	if (IS_ERR(page))
>>  		return PTR_ERR(page);
>>  
>>  	data = kmap_atomic(page);
>>  	/* the target page has been mapped */
>> -	de = likely(diff) ?
>> -		/* since the rest of the last page is 0 */
>> -		find_target_dirent(name, data, EROFS_BLKSIZ) :
>> -		(struct erofs_dirent *)data;
>> +	if (ndirents)
>> +		de = find_target_dirent(&qn, data, EROFS_BLKSIZ, ndirents);
> 
> If we want to support different blksize during dirent lookup, how about
> adding parameter for find_target_block_classic() as find_target_dirent()?

Yes, you are right. However, find_target_dirent is now totally designed in page unit
because of get_meta_page usage, but find_target_dirent doesn't.

I think if we support sub-page feature later (by using iomap or someelse), it
could be added later.

Thanks,
Gao Xiang


> 
> Thanks,
> 
>> +	else
>> +		de = (struct erofs_dirent *)data;
>>  
>> -	if (likely(!IS_ERR(de))) {
>> +	if (!IS_ERR(de)) {
>>  		*nid = le64_to_cpu(de->nid);
>>  		*d_type = de->file_type;
>>  	}
>>
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] staging: erofs: keep corrupted fs from crashing kernel in erofs_namei()
  2019-02-15  7:57   ` Dan Carpenter
@ 2019-02-15  9:04     ` Gao Xiang
  2019-02-15  9:32     ` Chao Yu
  1 sibling, 0 replies; 14+ messages in thread
From: Gao Xiang @ 2019-02-15  9:04 UTC (permalink / raw)
  To: Dan Carpenter
  Cc: Chao Yu, Al Viro, Greg Kroah-Hartman, devel, linux-erofs,
	Chao Yu, LKML, stable, weidu.du, Fang Wei, Miao Xie

Hi Dan,

On 2019/2/15 15:57, Dan Carpenter wrote:
> On Fri, Feb 15, 2019 at 03:02:25PM +0800, Chao Yu wrote:
>> On 2019/2/1 20:16, Gao Xiang wrote:
>>> +	/*
>>> +	 * on-disk error, let's only BUG_ON in the debugging mode.
>>> +	 * otherwise, it will return 1 to just skip the invalid name
>>> +	 * and go on (in consideration of the lookup performance).
>>> +	 */
>>> +	DBG_BUGON(qd->name > qd->end);
>>
>> qd->name == qd->end is not allowed as well?
>>
>> So will it be better to return directly here?
>>
>> 	if (unlikely(qd->name >= qd->end)) {
>> 		DBG_BUGON(1);
>> 		return 1;
>> 	}
> 
> Please don't add likely/unlikely() annotations unless you have
> benchmarked it and it makes a difference.

I tend not to add this branch above since the current logic can handle
qd->name >= qd->end (it only happens in corrupted images) and
it will return 1;

Let's just leave DBG_BUGON(qd->name > qd->end); here for debugging use
(to detect corrupted image in some degree earlier).

Thanks,
Gao Xiang

> 
> regards,
> dan carpenter
> 
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] staging: erofs: keep corrupted fs from crashing kernel in erofs_namei()
  2019-02-15  7:57   ` Dan Carpenter
  2019-02-15  9:04     ` Gao Xiang
@ 2019-02-15  9:32     ` Chao Yu
  2019-02-15  9:35       ` Dan Carpenter
  1 sibling, 1 reply; 14+ messages in thread
From: Chao Yu @ 2019-02-15  9:32 UTC (permalink / raw)
  To: Dan Carpenter
  Cc: Gao Xiang, Al Viro, Greg Kroah-Hartman, devel, linux-erofs,
	Chao Yu, LKML, stable, weidu.du, Fang Wei, Miao Xie

On 2019/2/15 15:57, Dan Carpenter wrote:
> On Fri, Feb 15, 2019 at 03:02:25PM +0800, Chao Yu wrote:
>> On 2019/2/1 20:16, Gao Xiang wrote:
>>> +	/*
>>> +	 * on-disk error, let's only BUG_ON in the debugging mode.
>>> +	 * otherwise, it will return 1 to just skip the invalid name
>>> +	 * and go on (in consideration of the lookup performance).
>>> +	 */
>>> +	DBG_BUGON(qd->name > qd->end);
>>
>> qd->name == qd->end is not allowed as well?
>>
>> So will it be better to return directly here?
>>
>> 	if (unlikely(qd->name >= qd->end)) {
>> 		DBG_BUGON(1);
>> 		return 1;
>> 	}
> 
> Please don't add likely/unlikely() annotations unless you have
> benchmarked it and it makes a difference.

Well, it only occur for corrupted image, since the image is readonly, so it
is really rare.

Thanks,

> 
> regards,
> dan carpenter
> 
> 
> 
> .
> 


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] staging: erofs: keep corrupted fs from crashing kernel in erofs_namei()
  2019-02-15  9:32     ` Chao Yu
@ 2019-02-15  9:35       ` Dan Carpenter
  2019-02-15 10:33         ` Gao Xiang
  2019-02-18  2:41         ` Chao Yu
  0 siblings, 2 replies; 14+ messages in thread
From: Dan Carpenter @ 2019-02-15  9:35 UTC (permalink / raw)
  To: Chao Yu
  Cc: devel, Greg Kroah-Hartman, Miao Xie, Chao Yu, LKML, stable,
	weidu.du, Al Viro, linux-erofs, Fang Wei

On Fri, Feb 15, 2019 at 05:32:33PM +0800, Chao Yu wrote:
> On 2019/2/15 15:57, Dan Carpenter wrote:
> > On Fri, Feb 15, 2019 at 03:02:25PM +0800, Chao Yu wrote:
> >> On 2019/2/1 20:16, Gao Xiang wrote:
> >>> +	/*
> >>> +	 * on-disk error, let's only BUG_ON in the debugging mode.
> >>> +	 * otherwise, it will return 1 to just skip the invalid name
> >>> +	 * and go on (in consideration of the lookup performance).
> >>> +	 */
> >>> +	DBG_BUGON(qd->name > qd->end);
> >>
> >> qd->name == qd->end is not allowed as well?
> >>
> >> So will it be better to return directly here?
> >>
> >> 	if (unlikely(qd->name >= qd->end)) {
> >> 		DBG_BUGON(1);
> >> 		return 1;
> >> 	}
> > 
> > Please don't add likely/unlikely() annotations unless you have
> > benchmarked it and it makes a difference.
> 
> Well, it only occur for corrupted image, since the image is readonly, so it
> is really rare.

The likely/unlikely() annotations make the code harder to read.  It's
only worth it if it's is a speedup on a fast path.

regards,
dan carpenter


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] staging: erofs: keep corrupted fs from crashing kernel in erofs_namei()
  2019-02-15  9:35       ` Dan Carpenter
@ 2019-02-15 10:33         ` Gao Xiang
  2019-02-18  2:41         ` Chao Yu
  1 sibling, 0 replies; 14+ messages in thread
From: Gao Xiang @ 2019-02-15 10:33 UTC (permalink / raw)
  To: Dan Carpenter
  Cc: Chao Yu, devel, Greg Kroah-Hartman, Chao Yu, linux-erofs, LKML,
	stable, weidu.du, Al Viro, Miao Xie, Fang Wei

Hi Dan,

On 2019/2/15 17:35, Dan Carpenter wrote:
> On Fri, Feb 15, 2019 at 05:32:33PM +0800, Chao Yu wrote:
>> On 2019/2/15 15:57, Dan Carpenter wrote:
>>> On Fri, Feb 15, 2019 at 03:02:25PM +0800, Chao Yu wrote:
>>>> On 2019/2/1 20:16, Gao Xiang wrote:
>>>>> +	/*
>>>>> +	 * on-disk error, let's only BUG_ON in the debugging mode.
>>>>> +	 * otherwise, it will return 1 to just skip the invalid name
>>>>> +	 * and go on (in consideration of the lookup performance).
>>>>> +	 */
>>>>> +	DBG_BUGON(qd->name > qd->end);
>>>>
>>>> qd->name == qd->end is not allowed as well?
>>>>
>>>> So will it be better to return directly here?
>>>>
>>>> 	if (unlikely(qd->name >= qd->end)) {
>>>> 		DBG_BUGON(1);
>>>> 		return 1;
>>>> 	}
>>>
>>> Please don't add likely/unlikely() annotations unless you have
>>> benchmarked it and it makes a difference.
>>
>> Well, it only occur for corrupted image, since the image is readonly, so it
>> is really rare.
> 
> The likely/unlikely() annotations make the code harder to read.  It's
> only worth it if it's is a speedup on a fast path.

Yes, I think abuse of using likely/unlikely() should be avoided (I agree that
some odd likely/unlikely() exists in the current code, that should be cleaned up).

However, likely/unlikely()s are also clearly highlight critical/corner paths).
I personally think it should be used in case-by-case basis rather than a unified
conclusion ("that makes the code harder to read").

Thanks,
Gao Xiang

> 
> regards,
> dan carpenter
> 
> _______________________________________________
> devel mailing list
> devel@linuxdriverproject.org
> http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] staging: erofs: keep corrupted fs from crashing kernel in erofs_namei()
  2019-02-15  8:58   ` Gao Xiang
@ 2019-02-18  1:39     ` Chao Yu
  2019-02-18  2:17       ` Gao Xiang
  0 siblings, 1 reply; 14+ messages in thread
From: Chao Yu @ 2019-02-18  1:39 UTC (permalink / raw)
  To: Gao Xiang
  Cc: Al Viro, Greg Kroah-Hartman, devel, LKML, linux-erofs, Chao Yu,
	Miao Xie, weidu.du, Fang Wei, stable

Hi xiang,

On 2019/2/15 16:58, Gao Xiang wrote:
> Hi Chao,
> 
> On 2019/2/15 15:02, Chao Yu wrote:
>> On 2019/2/1 20:16, Gao Xiang wrote:
>>> As Al pointed out, "
>>> ... and while we are at it, what happens to
>>> 	unsigned int nameoff = le16_to_cpu(de[mid].nameoff);
>>> 	unsigned int matched = min(startprfx, endprfx);
>>>
>>> 	struct qstr dname = QSTR_INIT(data + nameoff,
>>> 		unlikely(mid >= ndirents - 1) ?
>>> 			maxsize - nameoff :
>>> 			le16_to_cpu(de[mid + 1].nameoff) - nameoff);
>>>
>>> 	/* string comparison without already matched prefix */
>>> 	int ret = dirnamecmp(name, &dname, &matched);
>>> if le16_to_cpu(de[...].nameoff) is not monotonically increasing?  I.e.
>>> what's to prevent e.g. (unsigned)-1 ending up in dname.len?
>>>
>>> Corrupted fs image shouldn't oops the kernel.. "
>>>
>>> Revisit the related lookup flow to address the issue.
>>>
>>> Fixes: d72d1ce60174 ("staging: erofs: add namei functions")
>>> Cc: <stable@vger.kernel.org> # 4.19+
>>> Suggested-by: Al Viro <viro@ZenIV.linux.org.uk>
>>> Signed-off-by: Gao Xiang <gaoxiang25@huawei.com>
>>> ---
>>> [ It should be better get reviewed first and for linux-next... ]
>>>
>>> change log v2:
>>>  - fix the missing "kunmap_atomic" pointed out by Dan;
>>>  - minor cleanup;
>>>
>>>  drivers/staging/erofs/namei.c | 187 ++++++++++++++++++++++--------------------
>>>  1 file changed, 99 insertions(+), 88 deletions(-)
>>>
>>> diff --git a/drivers/staging/erofs/namei.c b/drivers/staging/erofs/namei.c
>>> index 5596c52e246d..321c881d720f 100644
>>> --- a/drivers/staging/erofs/namei.c
>>> +++ b/drivers/staging/erofs/namei.c
>>> @@ -15,74 +15,77 @@
>>>  
>>>  #include <trace/events/erofs.h>
>>>  
>>> -/* based on the value of qn->len is accurate */
>>> -static inline int dirnamecmp(struct qstr *qn,
>>> -	struct qstr *qd, unsigned int *matched)
>>> +struct erofs_qstr {
>>> +	const unsigned char *name;
>>> +	const unsigned char *end;
>>> +};
>>> +
>>> +/* based on the end of qn is accurate and it must have the trailing '\0' */
>>> +static inline int dirnamecmp(const struct erofs_qstr *qn,
>>> +			     const struct erofs_qstr *qd,
>>> +			     unsigned int *matched)
>>>  {
>>> -	unsigned int i = *matched, len = min(qn->len, qd->len);
>>> -loop:
>>> -	if (unlikely(i >= len)) {
>>> -		*matched = i;
>>> -		if (qn->len < qd->len) {
>>> -			/*
>>> -			 * actually (qn->len == qd->len)
>>> -			 * when qd->name[i] == '\0'
>>> -			 */
>>> -			return qd->name[i] == '\0' ? 0 : -1;
>>> +	unsigned int i = *matched;
>>> +
>>> +	/*
>>> +	 * on-disk error, let's only BUG_ON in the debugging mode.
>>> +	 * otherwise, it will return 1 to just skip the invalid name
>>> +	 * and go on (in consideration of the lookup performance).
>>> +	 */
>>> +	DBG_BUGON(qd->name > qd->end);
>>
>> qd->name == qd->end is not allowed as well?
> 
> Make sense, it is only used for debugging mode, let me send another patch later...
> 
>>
>> So will it be better to return directly here?
>>
>> 	if (unlikely(qd->name >= qd->end)) {
>> 		DBG_BUGON(1);
>> 		return 1;
>> 	}
> 
> Corrupted image is rare happened in normal systems, I tend to only use
> DBG_BUGON here, and  qn->name[i] is of course not '\0', so it will return 1;

If the image is corrupted, qn->name[i] may be anything, as you commented
above DBG_BUGON(), we really don't need to go through any later codes, it
can avoid potentially encoutnering wrong condition.

* otherwise, it will return 1 to just skip the invalid name

> 
>>
>>> +
>>> +	/* qd could not have trailing '\0' */
>>> +	/* However it is absolutely safe if < qd->end */
>>> +	while (qd->name + i < qd->end && qd->name[i] != '\0') {
>>> +		if (qn->name[i] != qd->name[i]) {
>>> +			*matched = i;
>>> +			return qn->name[i] > qd->name[i] ? 1 : -1;
>>>  		}
>>> -		return (qn->len > qd->len);
>>> +		++i;
>>>  	}
>>> -
>>> -	if (qn->name[i] != qd->name[i]) {
>>> -		*matched = i;
>>> -		return qn->name[i] > qd->name[i] ? 1 : -1;
>>> -	}
>>> -
>>> -	++i;
>>> -	goto loop;
>>> +	*matched = i;
>>> +	/* See comments in __d_alloc on the terminating NUL character */
>>> +	return qn->name[i] == '\0' ? 0 : 1;
>>>  }
>>>  
>>> -static struct erofs_dirent *find_target_dirent(
>>> -	struct qstr *name,
>>> -	u8 *data, int maxsize)
>>> +#define nameoff_from_disk(off, sz)	(le16_to_cpu(off) & ((sz) - 1))
>>> +
>>> +static struct erofs_dirent *find_target_dirent(struct erofs_qstr *name,
>>> +					       u8 *data,
>>> +					       unsigned int dirblksize,
>>> +					       const int ndirents)
>>>  {
>>> -	unsigned int ndirents, head, back;
>>> +	int head, back;
>>>  	unsigned int startprfx, endprfx;
>>>  	struct erofs_dirent *const de = (struct erofs_dirent *)data;
>>>  
>>> -	/* make sure that maxsize is valid */
>>> -	BUG_ON(maxsize < sizeof(struct erofs_dirent));
>>> -
>>> -	ndirents = le16_to_cpu(de->nameoff) / sizeof(*de);
>>> -
>>> -	/* corrupted dir (may be unnecessary...) */
>>> -	BUG_ON(!ndirents);
>>> -
>>> -	head = 0;
>>> +	/* since the 1st dirent has been evaluated previously */
>>> +	head = 1;
>>>  	back = ndirents - 1;
>>>  	startprfx = endprfx = 0;
>>>  
>>>  	while (head <= back) {
>>> -		unsigned int mid = head + (back - head) / 2;
>>> -		unsigned int nameoff = le16_to_cpu(de[mid].nameoff);
>>> +		const int mid = head + (back - head) / 2;
>>> +		const int nameoff = nameoff_from_disk(de[mid].nameoff,
>>> +						      dirblksize);
>>>  		unsigned int matched = min(startprfx, endprfx);
>>> -
>>> -		struct qstr dname = QSTR_INIT(data + nameoff,
>>> -			unlikely(mid >= ndirents - 1) ?
>>> -				maxsize - nameoff :
>>> -				le16_to_cpu(de[mid + 1].nameoff) - nameoff);
>>> +		struct erofs_qstr dname = {
>>> +			.name = data + nameoff,
>>> +			.end = unlikely(mid >= ndirents - 1) ?
>>> +				data + dirblksize :
>>> +				data + nameoff_from_disk(de[mid + 1].nameoff,
>>> +							 dirblksize)
>>> +		};
>>>  
>>>  		/* string comparison without already matched prefix */
>>>  		int ret = dirnamecmp(name, &dname, &matched);
>>>  
>>> -		if (unlikely(!ret))
>>> +		if (unlikely(!ret)) {
>>>  			return de + mid;
>>> -		else if (ret > 0) {
>>> +		} else if (ret > 0) {
>>>  			head = mid + 1;
>>>  			startprfx = matched;
>>> -		} else if (unlikely(mid < 1))	/* fix "mid" overflow */
>>> -			break;
>>> -		else {
>>> +		} else {
>>>  			back = mid - 1;
>>>  			endprfx = matched;
>>>  		}
>>> @@ -91,12 +94,12 @@ static struct erofs_dirent *find_target_dirent(
>>>  	return ERR_PTR(-ENOENT);
>>>  }
>>>  
>>> -static struct page *find_target_block_classic(
>>> -	struct inode *dir,
>>> -	struct qstr *name, int *_diff)
>>> +static struct page *find_target_block_classic(struct inode *dir,
>>> +					      struct erofs_qstr *name,
>>> +					      int *_ndirents)
>>>  {
>>>  	unsigned int startprfx, endprfx;
>>> -	unsigned int head, back;
>>> +	int head, back;
>>>  	struct address_space *const mapping = dir->i_mapping;
>>>  	struct page *candidate = ERR_PTR(-ENOENT);
>>>  
>>> @@ -105,41 +108,43 @@ static struct page *find_target_block_classic(
>>>  	back = inode_datablocks(dir) - 1;
>>>  
>>>  	while (head <= back) {
>>> -		unsigned int mid = head + (back - head) / 2;
>>> +		const int mid = head + (back - head) / 2;
>>>  		struct page *page = read_mapping_page(mapping, mid, NULL);
>>>  
>>> -		if (IS_ERR(page)) {
>>> -exact_out:
>>> -			if (!IS_ERR(candidate)) /* valid candidate */
>>> -				put_page(candidate);
>>> -			return page;
>>> -		} else {
>>> -			int diff;
>>> -			unsigned int ndirents, matched;
>>> -			struct qstr dname;
>>> +		if (!IS_ERR(page)) {
>>>  			struct erofs_dirent *de = kmap_atomic(page);
>>> -			unsigned int nameoff = le16_to_cpu(de->nameoff);
>>> -
>>> -			ndirents = nameoff / sizeof(*de);
>>> +			const int nameoff = nameoff_from_disk(de->nameoff,
>>> +							      EROFS_BLKSIZ);
>>> +			const int ndirents = nameoff / sizeof(*de);
>>> +			int diff;
>>> +			unsigned int matched;
>>> +			struct erofs_qstr dname;
>>>  
>>> -			/* corrupted dir (should have one entry at least) */
>>> -			BUG_ON(!ndirents || nameoff > PAGE_SIZE);
>>> +			if (unlikely(!ndirents)) {
>>> +				DBG_BUGON(1);
>>> +				kunmap_atomic(de);
>>> +				put_page(page);
>>> +				page = ERR_PTR(-EIO);
>>> +				goto out;
>>> +			}
>>>  
>>>  			matched = min(startprfx, endprfx);
>>>  
>>>  			dname.name = (u8 *)de + nameoff;
>>> -			dname.len = ndirents == 1 ?
>>> -				/* since the rest of the last page is 0 */
>>> -				EROFS_BLKSIZ - nameoff
>>> -				: le16_to_cpu(de[1].nameoff) - nameoff;
>>> +			if (ndirents == 1)
>>> +				dname.end = (u8 *)de + EROFS_BLKSIZ;
>>> +			else
>>> +				dname.end = (u8 *)de +
>>> +					nameoff_from_disk(de[1].nameoff,
>>> +							  EROFS_BLKSIZ);
>>>  
>>>  			/* string comparison without already matched prefix */
>>>  			diff = dirnamecmp(name, &dname, &matched);
>>>  			kunmap_atomic(de);
>>>  
>>>  			if (unlikely(!diff)) {
>>> -				*_diff = 0;
>>> -				goto exact_out;
>>> +				*_ndirents = 0;
>>> +				goto out;
>>>  			} else if (diff > 0) {
>>>  				head = mid + 1;
>>>  				startprfx = matched;
>>> @@ -147,47 +152,53 @@ static struct page *find_target_block_classic(
>>>  				if (likely(!IS_ERR(candidate)))
>>>  					put_page(candidate);
>>>  				candidate = page;
>>> +				*_ndirents = ndirents;
>>>  			} else {
>>>  				put_page(page);
>>>  
>>> -				if (unlikely(mid < 1))	/* fix "mid" overflow */
>>> -					break;
>>> -
>>>  				back = mid - 1;
>>>  				endprfx = matched;
>>>  			}
>>> +			continue;
>>>  		}
>>> +out:		/* free if the candidate is valid */
>>> +		if (!IS_ERR(candidate))
>>> +			put_page(candidate);
>>> +		return page;
>>>  	}
>>> -	*_diff = 1;
>>>  	return candidate;
>>>  }
>>>  
>>>  int erofs_namei(struct inode *dir,
>>> -	struct qstr *name,
>>> -	erofs_nid_t *nid, unsigned int *d_type)
>>> +		struct qstr *name,
>>> +		erofs_nid_t *nid, unsigned int *d_type)
>>>  {
>>> -	int diff;
>>> +	int ndirents;
>>>  	struct page *page;
>>> -	u8 *data;
>>> +	void *data;
>>>  	struct erofs_dirent *de;
>>> +	struct erofs_qstr qn;
>>>  
>>>  	if (unlikely(!dir->i_size))
>>>  		return -ENOENT;
>>>  
>>> -	diff = 1;
>>> -	page = find_target_block_classic(dir, name, &diff);
>>> +	qn.name = name->name;
>>> +	qn.end = name->name + name->len;
>>> +
>>> +	ndirents = 0;
>>> +	page = find_target_block_classic(dir, &qn, &ndirents);
>>>  
>>> -	if (unlikely(IS_ERR(page)))
>>> +	if (IS_ERR(page))
>>>  		return PTR_ERR(page);
>>>  
>>>  	data = kmap_atomic(page);
>>>  	/* the target page has been mapped */
>>> -	de = likely(diff) ?
>>> -		/* since the rest of the last page is 0 */
>>> -		find_target_dirent(name, data, EROFS_BLKSIZ) :
>>> -		(struct erofs_dirent *)data;
>>> +	if (ndirents)
>>> +		de = find_target_dirent(&qn, data, EROFS_BLKSIZ, ndirents);
>>
>> If we want to support different blksize during dirent lookup, how about
>> adding parameter for find_target_block_classic() as find_target_dirent()?
> 
> Yes, you are right. However, find_target_dirent is now totally designed in page unit
> because of get_meta_page usage, but find_target_dirent doesn't.
> 
> I think if we support sub-page feature later (by using iomap or someelse), it
> could be added later.

No problem, it's not a big deal. :)

Thanks,

> 
> Thanks,
> Gao Xiang
> 
> 
>>
>> Thanks,
>>
>>> +	else
>>> +		de = (struct erofs_dirent *)data;
>>>  
>>> -	if (likely(!IS_ERR(de))) {
>>> +	if (!IS_ERR(de)) {
>>>  		*nid = le64_to_cpu(de->nid);
>>>  		*d_type = de->file_type;
>>>  	}
>>>
>>
> 
> .
> 


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] staging: erofs: keep corrupted fs from crashing kernel in erofs_namei()
  2019-02-18  1:39     ` Chao Yu
@ 2019-02-18  2:17       ` Gao Xiang
  2019-02-18  2:50         ` Chao Yu
  0 siblings, 1 reply; 14+ messages in thread
From: Gao Xiang @ 2019-02-18  2:17 UTC (permalink / raw)
  To: Chao Yu
  Cc: Al Viro, Greg Kroah-Hartman, devel, LKML, linux-erofs, Chao Yu,
	Miao Xie, weidu.du, Fang Wei, stable

Hi Chao,

On 2019/2/18 9:39, Chao Yu wrote:
> If the image is corrupted, qn->name[i] may be anything, as you commented
> above DBG_BUGON(), we really don't need to go through any later codes, it
> can avoid potentially encoutnering wrong condition.
> 
> * otherwise, it will return 1 to just skip the invalid name
> 

Just I commented in the following source code, qn is actually the user requested
name allocated in __d_alloc, which can be guaranteed with the trailing '\0' and
it is a valid string.

Thanks,
Gao Xiang

>>>> +
>>>> +	/* qd could not have trailing '\0' */
>>>> +	/* However it is absolutely safe if < qd->end */
>>>> +	while (qd->name + i < qd->end && qd->name[i] != '\0') {
>>>> +		if (qn->name[i] != qd->name[i]) {
>>>> +			*matched = i;
>>>> +			return qn->name[i] > qd->name[i] ? 1 : -1;
>>>>  		}
>>>> -		return (qn->len > qd->len);
>>>> +		++i;
>>>>  	}
>>>> -
>>>> -	if (qn->name[i] != qd->name[i]) {
>>>> -		*matched = i;
>>>> -		return qn->name[i] > qd->name[i] ? 1 : -1;
>>>> -	}
>>>> -
>>>> -	++i;
>>>> -	goto loop;
>>>> +	*matched = i;
>>>> +	/* See comments in __d_alloc on the terminating NUL character */
>>>> +	return qn->name[i] == '\0' ? 0 : 1;
>>>>  }

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] staging: erofs: keep corrupted fs from crashing kernel in erofs_namei()
  2019-02-15  9:35       ` Dan Carpenter
  2019-02-15 10:33         ` Gao Xiang
@ 2019-02-18  2:41         ` Chao Yu
  2019-02-18  6:16           ` Dan Carpenter
  1 sibling, 1 reply; 14+ messages in thread
From: Chao Yu @ 2019-02-18  2:41 UTC (permalink / raw)
  To: Dan Carpenter
  Cc: devel, Greg Kroah-Hartman, Miao Xie, Chao Yu, LKML, stable,
	weidu.du, Al Viro, linux-erofs, Fang Wei

On 2019/2/15 17:35, Dan Carpenter wrote:
> On Fri, Feb 15, 2019 at 05:32:33PM +0800, Chao Yu wrote:
>> On 2019/2/15 15:57, Dan Carpenter wrote:
>>> On Fri, Feb 15, 2019 at 03:02:25PM +0800, Chao Yu wrote:
>>>> On 2019/2/1 20:16, Gao Xiang wrote:
>>>>> +	/*
>>>>> +	 * on-disk error, let's only BUG_ON in the debugging mode.
>>>>> +	 * otherwise, it will return 1 to just skip the invalid name
>>>>> +	 * and go on (in consideration of the lookup performance).
>>>>> +	 */
>>>>> +	DBG_BUGON(qd->name > qd->end);
>>>>
>>>> qd->name == qd->end is not allowed as well?
>>>>
>>>> So will it be better to return directly here?
>>>>
>>>> 	if (unlikely(qd->name >= qd->end)) {
>>>> 		DBG_BUGON(1);
>>>> 		return 1;
>>>> 	}
>>>
>>> Please don't add likely/unlikely() annotations unless you have
>>> benchmarked it and it makes a difference.
>>
>> Well, it only occur for corrupted image, since the image is readonly, so it
>> is really rare.
> 
> The likely/unlikely() annotations make the code harder to read.  It's

Well, I think unlikely here can imply this is a rare case which may help to
read...

> only worth it if it's is a speedup on a fast path.

I guess unlikely here can help pipeline to load/execute right branch codes
instead of that rare branch one with BUGON(), is that right?

Thanks,

> 
> regards,
> dan carpenter
> 
> 
> .
> 


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] staging: erofs: keep corrupted fs from crashing kernel in erofs_namei()
  2019-02-18  2:17       ` Gao Xiang
@ 2019-02-18  2:50         ` Chao Yu
  0 siblings, 0 replies; 14+ messages in thread
From: Chao Yu @ 2019-02-18  2:50 UTC (permalink / raw)
  To: Gao Xiang
  Cc: Al Viro, Greg Kroah-Hartman, devel, LKML, linux-erofs, Chao Yu,
	Miao Xie, weidu.du, Fang Wei, stable

Hi Xiang,

On 2019/2/18 10:17, Gao Xiang wrote:
> Hi Chao,
> 
> On 2019/2/18 9:39, Chao Yu wrote:
>> If the image is corrupted, qn->name[i] may be anything, as you commented
>> above DBG_BUGON(), we really don't need to go through any later codes, it
>> can avoid potentially encoutnering wrong condition.
>>
>> * otherwise, it will return 1 to just skip the invalid name
>>
> 
> Just I commented in the following source code, qn is actually the user requested
> name allocated in __d_alloc, which can be guaranteed with the trailing '\0' and
> it is a valid string.

Alright, I agreed below codes can guarantee that. :)

Thanks,

> 
> Thanks,
> Gao Xiang
> 
>>>>> +
>>>>> +	/* qd could not have trailing '\0' */
>>>>> +	/* However it is absolutely safe if < qd->end */
>>>>> +	while (qd->name + i < qd->end && qd->name[i] != '\0') {
>>>>> +		if (qn->name[i] != qd->name[i]) {
>>>>> +			*matched = i;
>>>>> +			return qn->name[i] > qd->name[i] ? 1 : -1;
>>>>>  		}
>>>>> -		return (qn->len > qd->len);
>>>>> +		++i;
>>>>>  	}
>>>>> -
>>>>> -	if (qn->name[i] != qd->name[i]) {
>>>>> -		*matched = i;
>>>>> -		return qn->name[i] > qd->name[i] ? 1 : -1;
>>>>> -	}
>>>>> -
>>>>> -	++i;
>>>>> -	goto loop;
>>>>> +	*matched = i;
>>>>> +	/* See comments in __d_alloc on the terminating NUL character */
>>>>> +	return qn->name[i] == '\0' ? 0 : 1;
>>>>>  }
> 
> .
> 


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] staging: erofs: keep corrupted fs from crashing kernel in erofs_namei()
  2019-02-18  2:41         ` Chao Yu
@ 2019-02-18  6:16           ` Dan Carpenter
  0 siblings, 0 replies; 14+ messages in thread
From: Dan Carpenter @ 2019-02-18  6:16 UTC (permalink / raw)
  To: Chao Yu
  Cc: devel, Greg Kroah-Hartman, Miao Xie, Chao Yu, LKML, stable,
	weidu.du, Al Viro, linux-erofs, Fang Wei

On Mon, Feb 18, 2019 at 10:41:36AM +0800, Chao Yu wrote:
> On 2019/2/15 17:35, Dan Carpenter wrote:
> > On Fri, Feb 15, 2019 at 05:32:33PM +0800, Chao Yu wrote:
> >> On 2019/2/15 15:57, Dan Carpenter wrote:
> >>> On Fri, Feb 15, 2019 at 03:02:25PM +0800, Chao Yu wrote:
> >>>> On 2019/2/1 20:16, Gao Xiang wrote:
> >>>>> +	/*
> >>>>> +	 * on-disk error, let's only BUG_ON in the debugging mode.
> >>>>> +	 * otherwise, it will return 1 to just skip the invalid name
> >>>>> +	 * and go on (in consideration of the lookup performance).
> >>>>> +	 */
> >>>>> +	DBG_BUGON(qd->name > qd->end);
> >>>>
> >>>> qd->name == qd->end is not allowed as well?
> >>>>
> >>>> So will it be better to return directly here?
> >>>>
> >>>> 	if (unlikely(qd->name >= qd->end)) {
> >>>> 		DBG_BUGON(1);
> >>>> 		return 1;
> >>>> 	}
> >>>
> >>> Please don't add likely/unlikely() annotations unless you have
> >>> benchmarked it and it makes a difference.
> >>
> >> Well, it only occur for corrupted image, since the image is readonly, so it
> >> is really rare.
> > 
> > The likely/unlikely() annotations make the code harder to read.  It's
> 
> Well, I think unlikely here can imply this is a rare case which may help to
> read...
> 
> > only worth it if it's is a speedup on a fast path.
> 
> I guess unlikely here can help pipeline to load/execute right branch codes
> instead of that rare branch one with BUGON(), is that right?
> 

Correct.  If you really think the likely/unlikely on this line will lead
to a performance improvement which will show up on a benchmark then you
should use it.  (But there is no way that it really show on benchmarks,
let's not pretend).

If it doesn't show up on benchmarking, then we're just discussing style.
Kernel style tends to be minimalist.

regards,
dan carpenter


^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, back to index

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-01 12:16 [PATCH v2] staging: erofs: keep corrupted fs from crashing kernel in erofs_namei() Gao Xiang
2019-02-12  5:05 ` Gao Xiang
2019-02-15  7:02 ` Chao Yu
2019-02-15  7:57   ` Dan Carpenter
2019-02-15  9:04     ` Gao Xiang
2019-02-15  9:32     ` Chao Yu
2019-02-15  9:35       ` Dan Carpenter
2019-02-15 10:33         ` Gao Xiang
2019-02-18  2:41         ` Chao Yu
2019-02-18  6:16           ` Dan Carpenter
2019-02-15  8:58   ` Gao Xiang
2019-02-18  1:39     ` Chao Yu
2019-02-18  2:17       ` Gao Xiang
2019-02-18  2:50         ` Chao Yu

Stable Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/stable/0 stable/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 stable stable/ https://lore.kernel.org/stable \
		stable@vger.kernel.org stable@archiver.kernel.org
	public-inbox-index stable


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.stable


AGPL code for this site: git clone https://public-inbox.org/ public-inbox