All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yehuda Sadeh <yehuda@inktank.com>
To: Sage Weil <sage@inktank.com>
Cc: ceph-devel@vger.kernel.org, caleb miles <caleb.miles@inktank.com>
Subject: Re: [PATCH 2/9] libceph: support crush tunables
Date: Tue, 24 Jul 2012 15:24:55 -0700	[thread overview]
Message-ID: <CAC-hyiGu--5SUgbexOM-8o-kzRzXLV+5Mve+P-6dYcfo-AX78w@mail.gmail.com> (raw)
In-Reply-To: <1342831308-18815-3-git-send-email-sage@inktank.com>

On Fri, Jul 20, 2012 at 5:41 PM, Sage Weil <sage@inktank.com> wrote:
> From: caleb miles <caleb.miles@inktank.com>
>
> The server side recently added support for tuning some magic
> crush variables. Decode these variables if they are present, or use the
> default values if they are not present.
>
> Corresponds to ceph.git commit 89af369c25f274fe62ef730e5e8aad0c54f1e5a5.
>
> Signed-off-by: caleb miles <caleb.miles@inktank.com>
> Reviewed-by: Sage Weil <sage@inktank.com>
> ---
>  include/linux/ceph/ceph_features.h |    4 ++-
>  include/linux/crush/crush.h        |    8 +++++++
>  net/ceph/crush/mapper.c            |   13 ++++++-----
>  net/ceph/osdmap.c                  |   39 ++++++++++++++++++++++++++++++++++++
>  4 files changed, 57 insertions(+), 7 deletions(-)
>
> diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
> index 342f93d..df25dcf 100644
> --- a/include/linux/ceph/ceph_features.h
> +++ b/include/linux/ceph/ceph_features.h
> @@ -12,12 +12,14 @@
>  #define CEPH_FEATURE_MONNAMES       (1<<5)
>  #define CEPH_FEATURE_RECONNECT_SEQ  (1<<6)
>  #define CEPH_FEATURE_DIRLAYOUTHASH  (1<<7)
> +#define CEPH_FEATURE_CRUSH_TUNABLES (1<<18)

any reason why this is 18 and not 8?

>
>  /*
>   * Features supported.
>   */
>  #define CEPH_FEATURES_SUPPORTED_DEFAULT  \
> -       (CEPH_FEATURE_NOSRCADDR)
> +       (CEPH_FEATURE_NOSRCADDR |        \
> +        CEPH_FEATURE_CRUSH_TUNABLES)
>
>  #define CEPH_FEATURES_REQUIRED_DEFAULT   \
>         (CEPH_FEATURE_NOSRCADDR)
> diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
> index 7c47508..25baa28 100644
> --- a/include/linux/crush/crush.h
> +++ b/include/linux/crush/crush.h
> @@ -154,6 +154,14 @@ struct crush_map {
>         __s32 max_buckets;
>         __u32 max_rules;
>         __s32 max_devices;
> +
> +       /* choose local retries before re-descent */
> +       __u32 choose_local_tries;
> +       /* choose local attempts using a fallback permutation before
> +        * re-descent */
> +       __u32 choose_local_fallback_tries;
> +       /* choose attempts before giving up */
> +       __u32 choose_total_tries;
>  };
>
>
> diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
> index d7edc24..35fce75 100644
> --- a/net/ceph/crush/mapper.c
> +++ b/net/ceph/crush/mapper.c
> @@ -306,7 +306,6 @@ static int crush_choose(const struct crush_map *map,
>         int item = 0;
>         int itemtype;
>         int collide, reject;
> -       const unsigned int orig_tries = 5; /* attempts before we fall back to search */
>
>         dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "",
>                 bucket->id, x, outpos, numrep);
> @@ -351,8 +350,9 @@ static int crush_choose(const struct crush_map *map,
>                                         reject = 1;
>                                         goto reject;
>                                 }
> -                               if (flocal >= (in->size>>1) &&
> -                                   flocal > orig_tries)
> +                               if (map->choose_local_fallback_tries > 0 &&
> +                                   flocal >= (in->size>>1) &&
> +                                   flocal > map->choose_local_fallback_tries)

is flocal right here or should it be ftotal?

>                                         item = bucket_perm_choose(in, x, r);
>                                 else
>                                         item = crush_bucket_choose(in, x, r);
> @@ -422,13 +422,14 @@ reject:
>                                         ftotal++;
>                                         flocal++;
>
> -                                       if (collide && flocal < 3)
> +                                       if (collide && flocal <= map->choose_local_tries)
>                                                 /* retry locally a few times */
>                                                 retry_bucket = 1;
> -                                       else if (flocal <= in->size + orig_tries)
> +                                       else if (map->choose_local_fallback_tries > 0 &&
> +                                                flocal <= in->size + map->choose_local_fallback_tries)
>                                                 /* exhaustive bucket search */
>                                                 retry_bucket = 1;
> -                                       else if (ftotal < 20)
> +                                       else if (ftotal <= map->choose_total_tries)
>                                                 /* then retry descent */
>                                                 retry_descent = 1;
>                                         else
> diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
> index 9600674..3124b71 100644
> --- a/net/ceph/osdmap.c
> +++ b/net/ceph/osdmap.c
> @@ -135,6 +135,21 @@ bad:
>         return -EINVAL;
>  }
>
> +static int skip_name_map(void **p, void *end)
> +{
> +        int len;
> +        ceph_decode_32_safe(p, end, len ,bad);
> +        while (len--) {
> +                int strlen;
use u32 for strlen

> +                *p += sizeof(u32);
> +                ceph_decode_32_safe(p, end, strlen, bad);
> +                *p += strlen;
> +}
> +        return 0;
> +bad:
> +        return -EINVAL;
> +}
> +
>  static struct crush_map *crush_decode(void *pbyval, void *end)
>  {
>         struct crush_map *c;
> @@ -143,6 +158,7 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
>         void **p = &pbyval;
>         void *start = pbyval;
>         u32 magic;
> +       u32 num_name_maps;
>
>         dout("crush_decode %p to %p len %d\n", *p, end, (int)(end - *p));
>
> @@ -150,6 +166,11 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
>         if (c == NULL)
>                 return ERR_PTR(-ENOMEM);
>
> +        /* set tunables to default values */
> +        c->choose_local_tries = 2;
> +        c->choose_local_fallback_tries = 5;
> +        c->choose_total_tries = 19;
> +
>         ceph_decode_need(p, end, 4*sizeof(u32), bad);
>         magic = ceph_decode_32(p);
>         if (magic != CRUSH_MAGIC) {
> @@ -297,7 +318,25 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
>         }
>
>         /* ignore trailing name maps. */
> +        for (num_name_maps = 0; num_name_maps < 3; num_name_maps++) {
> +                err = skip_name_map(p, end);
> +                if (err < 0)
> +                        goto done;
> +        }
> +
> +        /* tunables */
> +        ceph_decode_need(p, end, 3*sizeof(u32), done);
> +        c->choose_local_tries = ceph_decode_32(p);
> +        c->choose_local_fallback_tries =  ceph_decode_32(p);
> +        c->choose_total_tries = ceph_decode_32(p);
> +        dout("crush decode tunable choose_local_tries = %d",
> +             c->choose_local_tries);
> +        dout("crush decode tunable choose_local_fallback_tries = %d",
> +             c->choose_local_fallback_tries);
> +        dout("crush decode tunable choose_total_tries = %d",
> +             c->choose_total_tries);
>
> +done:
>         dout("crush_decode success\n");
>         return c;
>
> --
> 1.7.9
>
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

  reply	other threads:[~2012-07-24 22:24 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-07-21  0:41 [PATCH 0/9] messenger fixups, batch #1 Sage Weil
2012-07-21  0:41 ` [PATCH 1/9] libceph: move feature bits to separate header Sage Weil
2012-07-24 22:14   ` Yehuda Sadeh
2012-07-30 18:29   ` Alex Elder
2012-07-21  0:41 ` [PATCH 2/9] libceph: support crush tunables Sage Weil
2012-07-24 22:24   ` Yehuda Sadeh [this message]
2012-07-30 23:14     ` Sage Weil
2012-07-30 23:45       ` Yehuda Sadeh
2012-07-30 18:36   ` Alex Elder
2012-07-21  0:41 ` [PATCH 3/9] libceph: report socket read/write error message Sage Weil
2012-07-24 22:26   ` Yehuda Sadeh
2012-07-30 18:37   ` Alex Elder
2012-07-21  0:41 ` [PATCH 4/9] libceph: fix mutex coverage for ceph_con_close Sage Weil
2012-07-24 22:29   ` Yehuda Sadeh
2012-07-30 18:43   ` Alex Elder
2012-07-21  0:41 ` [PATCH 5/9] libceph: resubmit linger ops when pg mapping changes Sage Weil
2012-07-24 22:51   ` Yehuda Sadeh
2012-07-30 22:40   ` Alex Elder
2012-07-30 23:03     ` Sage Weil
2012-07-21  0:41 ` [PATCH 6/9] libceph: (re)initialize bio_iter on start of message receive Sage Weil
2012-07-24 22:55   ` Yehuda Sadeh
2012-07-30 19:04   ` Alex Elder
2012-07-21  0:41 ` [PATCH 7/9] ceph: close old con before reopening on mds reconnect Sage Weil
2012-07-24 22:56   ` Yehuda Sadeh
2012-07-30 23:11     ` Sage Weil
2012-07-21  0:41 ` [PATCH 8/9] libceph: protect ceph_con_open() with mutex Sage Weil
2012-07-24 22:58   ` Yehuda Sadeh
2012-07-30 19:06   ` Alex Elder
2012-07-21  0:41 ` [PATCH 9/9] libceph: reset connection retry on successfully negotiation Sage Weil
2012-07-24 23:00   ` Yehuda Sadeh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAC-hyiGu--5SUgbexOM-8o-kzRzXLV+5Mve+P-6dYcfo-AX78w@mail.gmail.com \
    --to=yehuda@inktank.com \
    --cc=caleb.miles@inktank.com \
    --cc=ceph-devel@vger.kernel.org \
    --cc=sage@inktank.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.