From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756039Ab0FXQ3G (ORCPT ); Thu, 24 Jun 2010 12:29:06 -0400 Received: from ogre.sisk.pl ([217.79.144.158]:46368 "EHLO ogre.sisk.pl" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755750Ab0FXQ3C (ORCPT ); Thu, 24 Jun 2010 12:29:02 -0400 From: "Rafael J. Wysocki" To: Jiri Slaby Subject: Re: [PATCH 3/9] PM / Hibernate: user, implement user_ops writer Date: Thu, 24 Jun 2010 18:27:34 +0200 User-Agent: KMail/1.13.3 (Linux/2.6.35-rc3-rjw+; KDE/4.4.3; x86_64; ; ) Cc: pavel@ucw.cz, linux-pm@lists.linux-foundation.org, linux-kernel@vger.kernel.org, jirislaby@gmail.com References: <1275468768-28229-1-git-send-email-jslaby@suse.cz> <1275468768-28229-3-git-send-email-jslaby@suse.cz> In-Reply-To: <1275468768-28229-3-git-send-email-jslaby@suse.cz> MIME-Version: 1.0 Content-Type: Text/Plain; charset="iso-8859-2" Content-Transfer-Encoding: 7bit Message-Id: <201006241827.34871.rjw@sisk.pl> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On Wednesday, June 02, 2010, Jiri Slaby wrote: > Switch /dev/snapshot writer to hibernate_io_ops approach so that we > can do whatever we want with snapshot processing code. All the later > code changes will be transparent and needn't care about different > readers/writers. Well. I don't want image data to undergo _any_ transformations within the kernel before going to s2disk. In fact, s2disk is supposed to do whatever it wants with the image data (that are supposed to be _raw_ image data). So, I don't think we need to "switch" /dev/snapshot to anything adding complexity in the process. > In this patch only writer is implemented -- for better bisectability > if something breaks. (The development was easier too, because one > could break only one part, not both.) > > Also, when using this interface, switch to the ops on open/release, > so they are used. > > There are explicit barriers protecting to_do_buffer, because it is > all done in a producer-consumer fashion: > PRODUCER (snapshot layer) > 1) to_do_buffer is set > 2) set TODO bit > 3) wake CONSUMER > 4) wait for TODO bit _clear_ > > CONSUMER (fops->read) > 1) wait for TODO bit > 2) pass to_do_buffer to userspace > 3) clear TODO bit > 4) wake PRODUCER > > Signed-off-by: Jiri Slaby > Cc: "Rafael J. Wysocki" > --- > kernel/power/user.c | 140 ++++++++++++++++++++++++++++++++++++++++++++++++--- > 1 files changed, 132 insertions(+), 8 deletions(-) > > diff --git a/kernel/power/user.c b/kernel/power/user.c > index e819e17..b4610c3 100644 > --- a/kernel/power/user.c > +++ b/kernel/power/user.c > @@ -23,6 +23,8 @@ > #include > #include > #include > +#include > +#include > #include > > #include > @@ -61,10 +63,93 @@ static struct snapshot_data { > char frozen; > char ready; > char platform_support; > + struct hibernate_io_ops *prev_ops; > } snapshot_state; > > atomic_t snapshot_device_available = ATOMIC_INIT(1); > > +static struct { > + void *buffer; /* buffer to pass through */ > + struct workqueue_struct *worker; /* the thread initiating read/write */ > + wait_queue_head_t wait; /* wait for buffer */ > + wait_queue_head_t done; /* read/write done? */ > + struct mutex lock; /* protection for buffer */ > + > +#define TODO_WORK 1 > +#define TODO_FINISH 2 > +#define TODO_CLOSED 3 > +#define TODO_ERROR 4 > + DECLARE_BITMAP(flags, 10); /* TODO_* flags defined above */ > +} to_do; > + > +static unsigned long user_free_space(void) > +{ > + return ~0UL; /* we have no idea, maybe we will fail later */ > +} > + > +static struct hibernate_io_handle *user_writer_start(void) > +{ > + return hib_io_handle_alloc(0) ? : ERR_PTR(-ENOMEM); > +} > + > +static int user_write_page(struct hibernate_io_handle *io_handle, void *buf, > + struct bio **bio_chain) > +{ > + int err = 0; > + > + if (test_bit(TODO_CLOSED, to_do.flags)) > + return -EIO; > + > + mutex_lock(&to_do.lock); > + to_do.buffer = buf; > + mutex_unlock(&to_do.lock); > + set_bit(TODO_WORK, to_do.flags); > + wake_up_interruptible(&to_do.wait); > + > + wait_event(to_do.done, !test_bit(TODO_WORK, to_do.flags) || > + (err = test_bit(TODO_CLOSED, to_do.flags))); > + > + return err ? -EIO : 0; > +} > + > +static int user_writer_finish(struct hibernate_io_handle *io_handle, > + unsigned int flags, int error) > +{ > + int err = 0; > + > + if (error) > + set_bit(TODO_ERROR, to_do.flags); > + set_bit(TODO_FINISH, to_do.flags); > + wake_up_interruptible(&to_do.wait); > + > + wait_event(to_do.done, !test_bit(TODO_FINISH, to_do.flags) || > + (err = test_bit(TODO_CLOSED, to_do.flags))); > + > + if (!error && err) > + error = -EIO; > + > + return error; > +} > + > +struct hibernate_io_ops user_ops = { > + .free_space = user_free_space, > + > + .writer_start = user_writer_start, > + .writer_finish = user_writer_finish, > + .write_page = user_write_page, > +}; > + > +static void snapshot_writer(struct work_struct *work) > +{ > + int ret; > + > + ret = swsusp_write(0); > + if (ret) > + printk(KERN_ERR "PM: write failed with %d\n", ret); > +} > + > +static DECLARE_WORK(snapshot_writer_w, snapshot_writer); > + > static int snapshot_open(struct inode *inode, struct file *filp) > { > struct snapshot_data *data; > @@ -115,9 +200,17 @@ static int snapshot_open(struct inode *inode, struct file *filp) > } > if (error) > atomic_inc(&snapshot_device_available); > + else { > + data->prev_ops = hibernate_io_ops; > + hibernate_io_ops = &user_ops; > + } > data->frozen = 0; > data->ready = 0; > data->platform_support = 0; > + memset(to_do.flags, 0, sizeof(*to_do.flags)); > + init_waitqueue_head(&to_do.wait); > + init_waitqueue_head(&to_do.done); > + mutex_init(&to_do.lock); > > Unlock: > mutex_unlock(&pm_mutex); > @@ -131,6 +224,10 @@ static int snapshot_release(struct inode *inode, struct file *filp) > > mutex_lock(&pm_mutex); > > + set_bit(TODO_CLOSED, to_do.flags); > + wake_up(&to_do.done); > + flush_workqueue(to_do.worker); > + > swsusp_free(); > free_basic_memory_bitmaps(); > data = filp->private_data; > @@ -139,6 +236,7 @@ static int snapshot_release(struct inode *inode, struct file *filp) > thaw_processes(); > pm_notifier_call_chain(data->mode == O_WRONLY ? > PM_POST_HIBERNATION : PM_POST_RESTORE); > + hibernate_io_ops = data->prev_ops; > atomic_inc(&snapshot_device_available); > > mutex_unlock(&pm_mutex); > @@ -152,6 +250,7 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf, > struct snapshot_data *data; > ssize_t res; > loff_t pg_offp = *offp & ~PAGE_MASK; > + int fin = 0; > > mutex_lock(&pm_mutex); > > @@ -161,18 +260,31 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf, > goto Unlock; > } > if (!pg_offp) { /* on page boundary? */ > - res = snapshot_read_next(&data->handle); I really don't understand why you're willing to do this. In the s2disk case we have an image in memory and we only want to trasfer it to user space page-by-page, where user space decides when each page is going to be transferred. snapshot_read_next() is for that and while you can modify it however you like, I don't think it's really worth it. > - if (res <= 0) > + res = wait_event_interruptible(to_do.wait, > + test_bit(TODO_WORK, to_do.flags) || > + (fin = test_and_clear_bit(TODO_FINISH, to_do.flags))); > + if (res) > goto Unlock; > - } else { > - res = PAGE_SIZE - pg_offp; > + if (test_bit(TODO_ERROR, to_do.flags)) { > + res = -EIO; > + goto Unlock; > + } > + if (fin) > + goto wake; > } > + res = PAGE_SIZE - pg_offp; > > - res = simple_read_from_buffer(buf, count, &pg_offp, > - data_of(data->handle), res); > + mutex_lock(&to_do.lock); > + res = simple_read_from_buffer(buf, count, &pg_offp, to_do.buffer, res); > + mutex_unlock(&to_do.lock); > if (res > 0) > *offp += res; > > + if (!(pg_offp & ~PAGE_MASK)) { > + clear_bit(TODO_WORK, to_do.flags); > +wake: > + wake_up(&to_do.done); > + } > Unlock: > mutex_unlock(&pm_mutex); > > @@ -278,8 +390,11 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, > error = hibernation_snapshot(data->platform_support); > if (!error) > error = put_user(in_suspend, (int __user *)arg); > - if (!error) > + if (!error) { > + if (in_suspend) > + queue_work(to_do.worker, &snapshot_writer_w); > data->ready = 1; > + } > break; > > case SNAPSHOT_ATOMIC_RESTORE: > @@ -473,7 +588,16 @@ static struct miscdevice snapshot_device = { > > static int __init snapshot_device_init(void) > { > - return misc_register(&snapshot_device); > + int ret; > + > + to_do.worker = create_singlethread_workqueue("suspend_worker"); > + if (!to_do.worker) > + return -ENOMEM; > + > + ret = misc_register(&snapshot_device); > + if (ret) > + destroy_workqueue(to_do.worker); > + return ret; > }; > > device_initcall(snapshot_device_init); And using a special workqueue for this purpose is seriously over the top IMnshO. Why don't you just regard s2disk as a special case and don't touch it (or modify it only so much to prevent it from getting in the way)? Rafael