All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC][PATCH] lvm2: limit accesses to broken devices (v2)
@ 2010-06-29  7:26 Takahiro Yasui
  2010-06-30 12:54 ` Petr Rockai
  2010-07-02 13:32 ` Alasdair G Kergon
  0 siblings, 2 replies; 6+ messages in thread
From: Takahiro Yasui @ 2010-06-29  7:26 UTC (permalink / raw)
  To: lvm-devel

Hi,

This is a updated patch (v2) to limit accesses to broken devices.

* v2 changes:
  - reset error count whenever lvm command is executed
  - change default value from -1 to 0

Appreciate your review and comments.

Thanks,
Taka


Signed-off-by: Takahiro Yasui <takahiro.yasui@hds.com>
---
 doc/example.conf.in        |    5 +++++
 lib/commands/toolcontext.c |    4 ++++
 lib/config/defaults.h      |    2 ++
 lib/device/dev-cache.c     |   21 +++++++++++++++++++++
 lib/device/dev-cache.h     |    2 ++
 lib/device/dev-io.c        |   34 ++++++++++++++++++++++++++++++++--
 lib/device/device.h        |    2 ++
 lib/misc/lvm-globals.c     |   11 +++++++++++
 lib/misc/lvm-globals.h     |    4 ++++
 man/lvm.conf.5.in          |    5 +++++
 tools/lvmcmdline.c         |    3 +++
 11 files changed, 91 insertions(+), 2 deletions(-)

Index: LVM2-2.02.68/doc/example.conf.in
===================================================================
--- LVM2-2.02.68.orig/doc/example.conf.in
+++ LVM2-2.02.68/doc/example.conf.in
@@ -130,6 +130,11 @@ devices {
     # Set this to 1 to skip such devices.  This should only be needed
     # in recovery situations.
     ignore_suspended_devices = 0
+
+    # Maximum number of error counts per device before disabling the device.
+    # This option prevents a broken device from being accessed repeatedly.
+    # Set to 0 to disable the error number control.
+    dev_max_error_count = 0
 }
 
 # This section that allows you to configure the nature of the
Index: LVM2-2.02.68/lib/commands/toolcontext.c
===================================================================
--- LVM2-2.02.68.orig/lib/commands/toolcontext.c
+++ LVM2-2.02.68/lib/commands/toolcontext.c
@@ -558,6 +558,10 @@ static int _init_dev_cache(struct cmd_co
 	const struct config_node *cn;
 	struct config_value *cv;
 
+	init_dev_max_error_count(
+		find_config_tree_int(cmd, "devices/dev_max_error_count",
+				     DEFAULT_MAX_ERROR_COUNT));
+
 	if (!dev_cache_init(cmd))
 		return_0;
 
Index: LVM2-2.02.68/lib/config/defaults.h
===================================================================
--- LVM2-2.02.68.orig/lib/config/defaults.h
+++ LVM2-2.02.68/lib/config/defaults.h
@@ -109,6 +109,8 @@
 #  define DEFAULT_MAX_HISTORY 100
 #endif
 
+#define DEFAULT_MAX_ERROR_COUNT	NO_DEV_ERROR_COUNT_LIMIT
+
 #define DEFAULT_REP_ALIGNED 1
 #define DEFAULT_REP_BUFFERED 1
 #define DEFAULT_REP_COLUMNS_AS_ROWS 0
Index: LVM2-2.02.68/lib/device/dev-cache.c
===================================================================
--- LVM2-2.02.68.orig/lib/device/dev-cache.c
+++ LVM2-2.02.68/lib/device/dev-cache.c
@@ -104,6 +104,8 @@ struct device *dev_create_file(const cha
 	dev->dev = 0;
 	dev->fd = -1;
 	dev->open_count = 0;
+	dev->error_count = 0;
+	dev->max_error_count = NO_DEV_ERROR_COUNT_LIMIT;
 	dev->block_size = -1;
 	dev->read_ahead = -1;
 	memset(dev->pvid, 0, sizeof(dev->pvid));
@@ -125,6 +127,7 @@ static struct device *_dev_create(dev_t 
 	dev->dev = d;
 	dev->fd = -1;
 	dev->open_count = 0;
+	dev->max_error_count = dev_max_error_count();
 	dev->block_size = -1;
 	dev->read_ahead = -1;
 	dev->end = UINT64_C(0);
@@ -791,6 +794,24 @@ struct device *dev_iter_get(struct dev_i
 	return NULL;
 }
 
+int dev_reset_error_count(struct cmd_context *cmd)
+{
+	struct dev_iter *iter;
+	struct device *dev;
+
+	if (!(iter = dev_iter_create(cmd->filter, 0))) {
+		log_error("dev_iter_create failed");
+		return 0;
+	}
+
+	for (dev = dev_iter_get(iter); dev; dev = dev_iter_get(iter))
+		dev->error_count = 0;
+
+	dev_iter_destroy(iter);
+
+	return 1;
+}
+
 int dev_fd(struct device *dev)
 {
 	return dev->fd;
Index: LVM2-2.02.68/lib/device/dev-cache.h
===================================================================
--- LVM2-2.02.68.orig/lib/device/dev-cache.h
+++ LVM2-2.02.68/lib/device/dev-cache.h
@@ -52,4 +52,6 @@ struct dev_iter *dev_iter_create(struct 
 void dev_iter_destroy(struct dev_iter *iter);
 struct device *dev_iter_get(struct dev_iter *iter);
 
+int dev_reset_error_count(struct cmd_context *cmd);
+
 #endif
Index: LVM2-2.02.68/lib/device/dev-io.c
===================================================================
--- LVM2-2.02.68.orig/lib/device/dev-io.c
+++ LVM2-2.02.68/lib/device/dev-io.c
@@ -595,18 +595,40 @@ void dev_close_all(void)
 	}
 }
 
+static inline int _dev_is_valid(struct device *dev)
+{
+	return (dev->max_error_count == NO_DEV_ERROR_COUNT_LIMIT ||
+		dev->error_count < dev->max_error_count);
+}
+
+static void _dev_inc_error_count(struct device *dev)
+{
+	if (++dev->error_count == dev->max_error_count)
+		log_warn("WARNING: Error counts exceeded limit of %d. "
+			 "Device %s was disabled",
+			 dev->max_error_count, dev_name(dev));
+}
+
 int dev_read(struct device *dev, uint64_t offset, size_t len, void *buffer)
 {
 	struct device_area where;
+	int ret;
 
 	if (!dev->open_count)
 		return_0;
 
+	if (!_dev_is_valid(dev))
+		return 0;
+
 	where.dev = dev;
 	where.start = offset;
 	where.size = len;
 
-	return _aligned_io(&where, buffer, 0);
+	ret = _aligned_io(&where, buffer, 0);
+	if (!ret)
+		_dev_inc_error_count(dev);
+
+	return ret;
 }
 
 /*
@@ -662,17 +684,25 @@ int dev_append(struct device *dev, size_
 int dev_write(struct device *dev, uint64_t offset, size_t len, void *buffer)
 {
 	struct device_area where;
+	int ret;
 
 	if (!dev->open_count)
 		return_0;
 
+	if (!_dev_is_valid(dev))
+		return 0;
+
 	where.dev = dev;
 	where.start = offset;
 	where.size = len;
 
 	dev->flags |= DEV_ACCESSED_W;
 
-	return _aligned_io(&where, buffer, 1);
+	ret = _aligned_io(&where, buffer, 1);
+	if (!ret)
+		_dev_inc_error_count(dev);
+
+	return ret;
 }
 
 int dev_set(struct device *dev, uint64_t offset, size_t len, int value)
Index: LVM2-2.02.68/lib/device/device.h
===================================================================
--- LVM2-2.02.68.orig/lib/device/device.h
+++ LVM2-2.02.68/lib/device/device.h
@@ -39,6 +39,8 @@ struct device {
 	/* private */
 	int fd;
 	int open_count;
+	int error_count;
+	int max_error_count;
 	int block_size;
 	int read_ahead;
 	uint32_t flags;
Index: LVM2-2.02.68/lib/misc/lvm-globals.c
===================================================================
--- LVM2-2.02.68.orig/lib/misc/lvm-globals.c
+++ LVM2-2.02.68/lib/misc/lvm-globals.c
@@ -40,6 +40,7 @@ static int _ignore_suspended_devices = 0
 static int _error_message_produced = 0;
 static unsigned _is_static = 0;
 static int _udev_checking = 1;
+static int _dev_max_error_count = DEFAULT_MAX_ERROR_COUNT;
 
 void init_verbose(int level)
 {
@@ -121,6 +122,11 @@ void init_udev_checking(int checking)
 		log_debug("LVM udev checking disabled");
 }
 
+void init_dev_max_error_count(int value)
+{
+	_dev_max_error_count = value;
+}
+
 void set_cmd_name(const char *cmd)
 {
 	strncpy(_cmd_name, cmd, sizeof(_cmd_name));
@@ -224,3 +230,8 @@ int udev_checking(void)
 {
 	return _udev_checking;
 }
+
+int dev_max_error_count(void)
+{
+	return _dev_max_error_count;
+}
Index: LVM2-2.02.68/lib/misc/lvm-globals.h
===================================================================
--- LVM2-2.02.68.orig/lib/misc/lvm-globals.h
+++ LVM2-2.02.68/lib/misc/lvm-globals.h
@@ -37,6 +37,7 @@ void init_ignore_suspended_devices(int i
 void init_error_message_produced(int produced);
 void init_is_static(unsigned value);
 void init_udev_checking(int checking);
+void init_dev_max_error_count(int value);
 
 void set_cmd_name(const char *cmd_name);
 
@@ -56,8 +57,11 @@ int ignore_suspended_devices(void);
 const char *log_command_name(void);
 unsigned is_static(void);
 int udev_checking(void);
+int dev_max_error_count(void);
 
 #define DMEVENTD_MONITOR_IGNORE -1
 int dmeventd_monitor_mode(void);
 
+#define NO_DEV_ERROR_COUNT_LIMIT 0
+
 #endif
Index: LVM2-2.02.68/man/lvm.conf.5.in
===================================================================
--- LVM2-2.02.68.orig/man/lvm.conf.5.in
+++ LVM2-2.02.68/man/lvm.conf.5.in
@@ -165,6 +165,11 @@ use \fBpvs -o +pe_start\fP .  It will be
 \fBdata_alignment\fP plus the alignment_offset from
 \fBdata_alignment_offset_detection\fP (if enabled) or the pvcreate
 commandline.
+.IP
+\fBdev_max_error_count\fP \(em Maximum number of error counts per device
+before disabling devices. This option prevents a broken device from
+being accessed repeatedly. If set to 0, no access control to devices is
+done.
 .TP
 \fBlog\fP \(em Default log settings
 .IP
Index: LVM2-2.02.68/tools/lvmcmdline.c
===================================================================
--- LVM2-2.02.68.orig/tools/lvmcmdline.c
+++ LVM2-2.02.68/tools/lvmcmdline.c
@@ -1020,6 +1020,9 @@ int lvm_run_command(struct cmd_context *
 			goto_out;
 		}
 
+	if (!dev_reset_error_count(cmd))
+		goto_out;
+
 	if (arg_count(cmd, config_ARG) || !cmd->config_valid || config_files_changed(cmd)) {
 		/* Reinitialise various settings inc. logging, filters */
 		if (!refresh_toolcontext(cmd)) {

-- 
Takahiro Yasui
Hitachi Data Systems



^ permalink raw reply	[flat|nested] 6+ messages in thread

* [RFC][PATCH] lvm2: limit accesses to broken devices (v2)
  2010-06-29  7:26 [RFC][PATCH] lvm2: limit accesses to broken devices (v2) Takahiro Yasui
@ 2010-06-30 12:54 ` Petr Rockai
  2010-06-30 21:20   ` Takahiro Yasui
  2010-07-02 13:32 ` Alasdair G Kergon
  1 sibling, 1 reply; 6+ messages in thread
From: Petr Rockai @ 2010-06-30 12:54 UTC (permalink / raw)
  To: lvm-devel

Hello Taka,

Takahiro Yasui <takahiro.yasui@hds.com> writes:
>  int dev_read(struct device *dev, uint64_t offset, size_t len, void *buffer)
>  {
>  	struct device_area where;
> +	int ret;
>  
>  	if (!dev->open_count)
>  		return_0;
>  
> +	if (!_dev_is_valid(dev))
> +		return 0;
Maybe return_0 here?

> +
>  	where.dev = dev;
>  	where.start = offset;
>  	where.size = len;
>  
> -	return _aligned_io(&where, buffer, 0);
> +	ret = _aligned_io(&where, buffer, 0);
> +	if (!ret)
> +		_dev_inc_error_count(dev);
> +
> +	return ret;
>  }
>  
>  /*
> @@ -662,17 +684,25 @@ int dev_append(struct device *dev, size_
>  int dev_write(struct device *dev, uint64_t offset, size_t len, void *buffer)
>  {
>  	struct device_area where;
> +	int ret;
>  
>  	if (!dev->open_count)
>  		return_0;
>  
> +	if (!_dev_is_valid(dev))
> +		return 0;
(and here)

> +
>  	where.dev = dev;
>  	where.start = offset;
>  	where.size = len;
>  
>  	dev->flags |= DEV_ACCESSED_W;
>  
> -	return _aligned_io(&where, buffer, 1);
> +	ret = _aligned_io(&where, buffer, 1);
> +	if (!ret)
> +		_dev_inc_error_count(dev);
> +
> +	return ret;
Hmm. Should write errors count towards this? What happens if the device
just rejects writes -- could this break code by yanking the device
completely? This means, IIUIC, that any subsequent rescan that may
happen won't see the device even if the failure was due to failing
writes and the device reads just fine.

I guess this is a corner case, but I would be surprised if there were
any significant use-cases for tracking write errors for this purpose:
the primary reason for the code is repeated read errors, I believe?

Of course, not writing to the device that cannot be read is certainly
prudent -- do not remove the check near the top of dev_write if you
decide to not track write errors as per above.

Other than this, I think the patch looks OK.

Yours,
   Petr.



^ permalink raw reply	[flat|nested] 6+ messages in thread

* [RFC][PATCH] lvm2: limit accesses to broken devices (v2)
  2010-06-30 12:54 ` Petr Rockai
@ 2010-06-30 21:20   ` Takahiro Yasui
  2010-07-01 10:07     ` Petr Rockai
  0 siblings, 1 reply; 6+ messages in thread
From: Takahiro Yasui @ 2010-06-30 21:20 UTC (permalink / raw)
  To: lvm-devel

Hello Petr,

Thank you for reviewing the patch.

On 06/30/10 08:54, Petr Rockai wrote:
> Takahiro Yasui <takahiro.yasui@hds.com> writes:
>>  int dev_read(struct device *dev, uint64_t offset, size_t len, void *buffer)
>>  {
>>  	struct device_area where;
>> +	int ret;
>>  
>>  	if (!dev->open_count)
>>  		return_0;
>>  
>> +	if (!_dev_is_valid(dev))
>> +		return 0;
> Maybe return_0 here?

As far as I understand, return_0 is for debugging of unexpected errors.
This is not a logical error, so I didn't used "return_0" here.

>> @@ -662,17 +684,25 @@ int dev_append(struct device *dev, size_
>>  int dev_write(struct device *dev, uint64_t offset, size_t len, void *buffer)
>>  {
>>  	struct device_area where;
>> +	int ret;
>>  
>>  	if (!dev->open_count)
>>  		return_0;
>>  
>> +	if (!_dev_is_valid(dev))
>> +		return 0;
> (and here)

Same here.

>>  	dev->flags |= DEV_ACCESSED_W;
>>  
>> -	return _aligned_io(&where, buffer, 1);
>> +	ret = _aligned_io(&where, buffer, 1);
>> +	if (!ret)
>> +		_dev_inc_error_count(dev);
>> +
>> +	return ret;
> Hmm. Should write errors count towards this? What happens if the device
> just rejects writes -- could this break code by yanking the device
> completely? This means, IIUIC, that any subsequent rescan that may
> happen won't see the device even if the failure was due to failing
> writes and the device reads just fine.

I haven't seen the case that a device accepts read I/Os but rejects
write I/Os, but the opposite case could happen. I heard that some
devices re-allocated a bad sector when a write I/O came to devices.

However, as far as I understand, writes aren't issued to devices
when reads don't succeed. Writes are for updating lvm header or
metadata and I think that lvm commands consider those missing
devices as failed devices and no writes are issued to those devices.

With the reason above, I think that the current implementation is
reasonable.

> I guess this is a corner case, but I would be surprised if there were
> any significant use-cases for tracking write errors for this purpose:
> the primary reason for the code is repeated read errors, I believe?

Yes, the primary reason for this code is to suppress repeated read
errors, but it is better if this patch can prevents repeated write
I/Os, as well.

> Of course, not writing to the device that cannot be read is certainly
> prudent -- do not remove the check near the top of dev_write if you
> decide to not track write errors as per above.
> 
> Other than this, I think the patch looks OK.

Again, thank you for reviewing this patch, Petr.
I appreciate your comments.

Thanks,
Taka



^ permalink raw reply	[flat|nested] 6+ messages in thread

* [RFC][PATCH] lvm2: limit accesses to broken devices (v2)
  2010-06-30 21:20   ` Takahiro Yasui
@ 2010-07-01 10:07     ` Petr Rockai
  0 siblings, 0 replies; 6+ messages in thread
From: Petr Rockai @ 2010-07-01 10:07 UTC (permalink / raw)
  To: lvm-devel

Takahiro Yasui <takahiro.yasui@hds.com> writes:

> As far as I understand, return_0 is for debugging of unexpected errors.
> This is not a logical error, so I didn't used "return_0" here.
Well, actually, return_0 is useful to track why something is failing. I
would say that situations may arise where someone will be wondering why
is that particular read failing (the notice about disabling the device
may be buried a long way ahead of the failing read).

>> Hmm. Should write errors count towards this? What happens if the device
>> just rejects writes -- could this break code by yanking the device
>> completely? This means, IIUIC, that any subsequent rescan that may
>> happen won't see the device even if the failure was due to failing
>> writes and the device reads just fine.
>
> I haven't seen the case that a device accepts read I/Os but rejects
> write I/Os, but the opposite case could happen. I heard that some
> devices re-allocated a bad sector when a write I/O came to devices.
You are probably right -- I was probably thinking of a device going
read-only for some reason, but if you say this does not happen in
practice, I won't oppose: you have certainly more experience with
real-life hardware.

> Again, thank you for reviewing this patch, Petr.
With the provision that the default stays to 0 (unlimited), I guess the
patch is OK to go in.

Reviewed-By: Petr Rockai <prockai@redhat.com>

Yours,
   Petr.



^ permalink raw reply	[flat|nested] 6+ messages in thread

* [RFC][PATCH] lvm2: limit accesses to broken devices (v2)
  2010-06-29  7:26 [RFC][PATCH] lvm2: limit accesses to broken devices (v2) Takahiro Yasui
  2010-06-30 12:54 ` Petr Rockai
@ 2010-07-02 13:32 ` Alasdair G Kergon
  2010-07-22  0:58   ` Takahiro Yasui
  1 sibling, 1 reply; 6+ messages in thread
From: Alasdair G Kergon @ 2010-07-02 13:32 UTC (permalink / raw)
  To: lvm-devel

On Tue, Jun 29, 2010 at 03:26:15AM -0400, Takahiro Yasui wrote:
> This is a updated patch (v2) to limit accesses to broken devices.
 
As I mentioned on IRC when you first posted this, my main concern is whether
this introduces new metadata corruption modes (or makes existing ones more
likely to cause problems in given circumstances) if the decision about whether
or not to use a device is per-process and not global (machine and cluster-wide).

Currently the code tries to maintain the list of PVs as a global list which all
instances of LVM tools share.

What is proposed here is to start caching the failure information within a
single process.  But for how long can we safely cache it before revalidating
it?  Can there be races between processes?

For example, how do we cope when bad sectors (or transient I/O failures) cause
one LVM process to consider a device as missing while another process (using
different sectors) still thinks it's perfectly OK?

Shouldn't there be global decisions about this?

Perhaps decisions to start ignoring devices should be protected by the existing
locks: after dropping whichever lock protects the device, the state is reset and
next time the device is needed it will get retried again.  (In other words the
scope of the 'stop using this device because there were errors' flag is limited
to a single transaction - and of course within that transaction the tool could have
set the MISSING_PV flag to inform other processes to stop using the device.)

Anyway, I agree that we need to do something like this patch, but I think more
complex scenarios need to be considered first.

Alasdair



^ permalink raw reply	[flat|nested] 6+ messages in thread

* [RFC][PATCH] lvm2: limit accesses to broken devices (v2)
  2010-07-02 13:32 ` Alasdair G Kergon
@ 2010-07-22  0:58   ` Takahiro Yasui
  0 siblings, 0 replies; 6+ messages in thread
From: Takahiro Yasui @ 2010-07-22  0:58 UTC (permalink / raw)
  To: lvm-devel

Alasdair G Kergon wrote:
> On Tue, Jun 29, 2010 at 03:26:15AM -0400, Takahiro Yasui wrote:
> As I mentioned on IRC when you first posted this, my main concern is whether
> this introduces new metadata corruption modes (or makes existing ones more
> likely to cause problems in given circumstances) if the decision about whether
> or not to use a device is per-process and not global (machine and cluster-wide).
>
> Currently the code tries to maintain the list of PVs as a global list which all
> instances of LVM tools share.
> 
> What is proposed here is to start caching the failure information within a
> single process.  But for how long can we safely cache it before revalidating
> it? Can there be races between processes?

To make discussion point simple, let's consider failure information is cached
in the period VG lock is held.

There are two types of VG lock, WRITE and READ. WRITE lock is exclusive
to others and processes work sequentially. This patch won't introduce
races and corrupt metadata, so this patch is safe both for machine and
cluster-wide. 

As to a period of READ VG lock, several processes may run simultaneously
with a different device status. It is possible for the oritinal lvm command
in case that transient error occurs. For example, two processes, p1 and
p2 are running simultaneously and transient errors occured only for
read#1 and #2 issued by p1 but not for read#3 and #4 by p2. p1 thinks
a device is invalid, but p2 thinks the device is valid. If lvm command can't
handle this case, it is a bug of lvm and we need to fix it.

       <--------- READ VG LOCK --------->
p1       <-->                 <-->
           read#1           read#2

             <--------- READ VG LOCK --------->
p2                   <-->                  <-->
                      read#3             read#4

I can say that my patch just extends a period of transient error to a range
of VG lock and I believe that the patch doesn't introduce any new condition.

> For example, how do we cope when bad sectors (or transient I/O failures) cause
> one LVM process to consider a device as missing while another process (using
> different sectors) still thinks it's perfectly OK?
> Shouldn't there be global decisions about this?

As for bad sectors, they cause a permanent error rather than a transient
error and every reads to the sectors will fail. (They could succeed if the
sectors recovered, but we can think the case as a transient error.) Even if
a process might conder a device as missing and another process using
different sectors might think the device is void, both processes should
work properly. The patch doesn't instoruce a new condition, either.

> Perhaps decisions to start ignoring devices should be protected by the existing
> locks: after dropping whichever lock protects the device, the state is reset and
> next time the device is needed it will get retried again.  (In other words the
> scope of the 'stop using this device because there were errors' flag is limited
> to a single transaction - and of course within that transaction the tool could have
> set the MISSING_PV flag to inform other processes to stop using the device.)

It is difficult to use the MISSING_PV flag and haven't found a good solution
yet. The main reason of accessing failed devices a lot of times is scans by
lvmcache_label_scan(). lvm command calls lvmcache_label_scan() a lot of
times and it doesn't use lvm structures such as lv, vg and pv. So I believe
that the MISSING_PV flag is useful to achieve the goal.

I haven't found a bad case with the v2 yet, but limiting the period to cache
failure information to the period of VG lock makes things simple and safer.
I believe that it could be a solution.

I appreciate your comments and other perspectives we should consider.

Thanks,
Taka



^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2010-07-22  0:58 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-06-29  7:26 [RFC][PATCH] lvm2: limit accesses to broken devices (v2) Takahiro Yasui
2010-06-30 12:54 ` Petr Rockai
2010-06-30 21:20   ` Takahiro Yasui
2010-07-01 10:07     ` Petr Rockai
2010-07-02 13:32 ` Alasdair G Kergon
2010-07-22  0:58   ` Takahiro Yasui

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.