Re: [PATCH 2/2] zram: do not count duplicated pages as compressed

From: Minchan Kim <minchan@kernel.org>
To: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	linux-kernel@vger.kernel.org,
	Joonsoo Kim <iamjoonsoo.kim@lge.com>,
	Sergey Senozhatsky <sergey.senozhatsky@gmail.com>,
	kernel-team <kernel-team@lge.com>
Subject: Re: [PATCH 2/2] zram: do not count duplicated pages as compressed
Date: Tue, 16 May 2017 14:26:05 +0900	[thread overview]
Message-ID: <20170516052605.GA5429@bbox> (raw)
In-Reply-To: <20170516023615.GC10262@jagdpanzerIV.localdomain>

On Tue, May 16, 2017 at 11:36:15AM +0900, Sergey Senozhatsky wrote:
> > > > @@ -794,7 +801,15 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
> > > >  	entry = zram_dedup_find(zram, page, &checksum);
> > > >  	if (entry) {
> > > >  		comp_len = entry->len;
> > > > -		goto found_dup;
> > > > +		zram_slot_lock(zram, index);
> > > > +		zram_free_page(zram, index);
> > > > +		zram_set_flag(zram, index, ZRAM_DUP);
> > > > +		zram_set_entry(zram, index, entry);
> > > > +		zram_set_obj_size(zram, index, comp_len);
> > > > +		zram_slot_unlock(zram, index);
> > > > +		atomic64_add(comp_len, &zram->stats.dup_data_size);
> > > > +		atomic64_inc(&zram->stats.pages_stored);
> > > > +		return 0;
> > > 
> > > hm. that's a somewhat big code duplication. isn't it?
> > 
> > Yub. 3 parts. above part,  zram_same_page_write and tail of __zram_bvec_write.
> 
> hmm... good question... hardly can think of anything significantly
> better, zram object handling is now a mix of flags, entries,
> ref_counters, etc. etc. may be we can merge some of those ops, if we
> would keep slot locked through the entire __zram_bvec_write(), but
> that does not look attractive.
> 
> something ABSOLUTELY untested and incomplete. not even compile tested (!).
> 99% broken and stupid (!). but there is one thing that it has revealed, so
> thus incomplete. see below:
> 
> ---
> 
> diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
> index 372602c7da49..b31543c40d54 100644
> --- a/drivers/block/zram/zram_drv.c
> +++ b/drivers/block/zram/zram_drv.c
> @@ -509,11 +509,8 @@ static bool zram_same_page_write(struct zram *zram, u32 index,
>         if (page_same_filled(mem, &element)) {
>                 kunmap_atomic(mem);
>                 /* Free memory associated with this sector now. */
> -               zram_slot_lock(zram, index);
> -               zram_free_page(zram, index);
>                 zram_set_flag(zram, index, ZRAM_SAME);
>                 zram_set_element(zram, index, element);
> -               zram_slot_unlock(zram, index);
>  
>                 atomic64_inc(&zram->stats.same_pages);
>                 return true;
> @@ -778,7 +775,7 @@ static int zram_compress(struct zram *zram, struct zcomp_strm **zstrm,
>  
>  static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
>  {
> -       int ret;
> +       int ret = 0;
>         struct zram_entry *entry;
>         unsigned int comp_len;
>         void *src, *dst;
> @@ -786,12 +783,20 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
>         struct page *page = bvec->bv_page;
>         u32 checksum;
>  
> +       /*
> +        * Free memory associated with this sector
> +        * before overwriting unused sectors.
> +        */
> +       zram_slot_lock(zram, index);
> +       zram_free_page(zram, index);

Hmm, zram_free should happen only if the write is done successfully.
Otherwise, we lose the valid data although write IO was fail.

> +
>         if (zram_same_page_write(zram, index, page))
> -               return 0;
> +               goto out_unlock;
>  
>         entry = zram_dedup_find(zram, page, &checksum);
>         if (entry) {
>                 comp_len = entry->len;
> +               zram_set_flag(zram, index, ZRAM_DUP);

In case of hitting dedup, we shouldn't increase compr_data_size.
If we fix above two problems, do you think it's still cleaner?
(I don't mean to be reluctant with your suggestion. Just a
 real question to know your thought.:)

>                 goto found_dup;
>         }
>  
> @@ -799,7 +804,7 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
>         ret = zram_compress(zram, &zstrm, page, &entry, &comp_len);
>         if (ret) {
>                 zcomp_stream_put(zram->comp);
> -               return ret;
> +               goto out_unlock;
>         }
>  
>         dst = zs_map_object(zram->mem_pool,
> @@ -817,20 +822,16 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
>         zram_dedup_insert(zram, entry, checksum);
>  
>  found_dup:
> -       /*
> -        * Free memory associated with this sector
> -        * before overwriting unused sectors.
> -        */
> -       zram_slot_lock(zram, index);
> -       zram_free_page(zram, index);
>         zram_set_entry(zram, index, entry);
>         zram_set_obj_size(zram, index, comp_len);
> -       zram_slot_unlock(zram, index);
>  
>         /* Update stats */
>         atomic64_add(comp_len, &zram->stats.compr_data_size);
>         atomic64_inc(&zram->stats.pages_stored);
> -       return 0;
> +
> +out_unlock:
> +       zram_slot_unlock(zram, index);
> +       return ret;
>  }
> 
> ---
> 
> 
> namely,
> that zram_compress() error return path from __zram_bvec_write().
> 
> currently, we touch the existing compressed object and overwrite it only
> when we successfully compressed a new object. when zram_compress() fails
> we propagate the error, but never touch the old object. so all reads that
> could hit that index later will read stale data. and probably it would
> make more sense to fail those reads as well; IOW to free the old page
> regardless zram_compress() progress.
> 
> what do you think?
> 
> 	-ss