> On 22 Mar 2019, at 22.48, Igor Konopko wrote: > > Currently there is only one copy of smeta stored per line in pblk. This > is risky, because in case of read error on such a chunk, we are losing > all the data from whole line, what leads to silent data corruption. > > This patch changes this behaviour and allows to store more then one > copy of the smeta (specified by module parameter) in order to provide > higher reliability by storing mirrored copies of smeta struct and > providing possibility to failover to another copy of that struct in > case of read error. Such an approach ensures that copies of that > critical structures will be stored on different dies and thus predicted > UBER is multiple times higher > > Signed-off-by: Igor Konopko > --- > drivers/lightnvm/pblk-core.c | 124 ++++++++++++++++++++++++++++++++------- > drivers/lightnvm/pblk-init.c | 23 ++++++-- > drivers/lightnvm/pblk-recovery.c | 14 +++-- > drivers/lightnvm/pblk-rl.c | 3 +- > drivers/lightnvm/pblk.h | 1 + > 5 files changed, 132 insertions(+), 33 deletions(-) > > diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c > index 06ac3f0..9d9a9e2 100644 > --- a/drivers/lightnvm/pblk-core.c > +++ b/drivers/lightnvm/pblk-core.c > @@ -720,13 +720,14 @@ u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line) > return bit * geo->ws_opt; > } > > -int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line) > +static int pblk_line_smeta_read_copy(struct pblk *pblk, > + struct pblk_line *line, u64 paddr) > { > struct nvm_tgt_dev *dev = pblk->dev; > + struct nvm_geo *geo = &dev->geo; > struct pblk_line_meta *lm = &pblk->lm; > struct bio *bio; > struct nvm_rq rqd; > - u64 paddr = pblk_line_smeta_start(pblk, line); > int i, ret; > > memset(&rqd, 0, sizeof(struct nvm_rq)); > @@ -749,8 +750,20 @@ int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line) > rqd.nr_ppas = lm->smeta_sec; > rqd.is_seq = 1; > > - for (i = 0; i < lm->smeta_sec; i++, paddr++) > - rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); > + for (i = 0; i < rqd.nr_ppas; i++, paddr++) { > + struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, line->id); > + int pos = pblk_ppa_to_pos(geo, ppa); > + > + while (test_bit(pos, line->blk_bitmap)) { > + paddr += pblk->min_write_pgs; > + ppa = addr_to_gen_ppa(pblk, paddr, line->id); > + pos = pblk_ppa_to_pos(geo, ppa); > + } > + > + rqd.ppa_list[i] = ppa; > + pblk_get_meta(pblk, rqd.meta_list, i)->lba = > + cpu_to_le64(ADDR_EMPTY); > + } > > ret = pblk_submit_io_sync(pblk, &rqd); > if (ret) { > @@ -771,16 +784,63 @@ int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line) > return ret; > } > > -static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line, > - u64 paddr) > +int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line) > +{ > + struct pblk_line_meta *lm = &pblk->lm; > + int i, ret = 0; > + u64 paddr = pblk_line_smeta_start(pblk, line); > + > + for (i = 0; i < lm->smeta_copies; i++) { > + ret = pblk_line_smeta_read_copy(pblk, line, > + paddr + (i * lm->smeta_sec)); > + if (!ret) { > + /* > + * Just one successfully read copy of smeta is > + * enough for us for recovery, don't need to > + * read another one. > + */ > + return ret; > + } > + } > + return ret; > +} > + > +static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line) > { > struct nvm_tgt_dev *dev = pblk->dev; > + struct nvm_geo *geo = &dev->geo; > struct pblk_line_meta *lm = &pblk->lm; > struct bio *bio; > struct nvm_rq rqd; > __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf); > __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); > - int i, ret; > + u64 paddr = 0; > + int smeta_wr_len = lm->smeta_len; > + int smeta_wr_sec = lm->smeta_sec; > + int i, ret, rq_writes; > + > + /* > + * Check if we can write all the smeta copies with > + * a single write command. > + * If yes -> copy smeta sector into multiple copies > + * in buffer to write. > + * If no -> issue writes one by one using the same > + * buffer space. > + * Only if all the copies are written correctly > + * we are treating this line as valid for proper > + * UBER reliability. > + */ > + if (lm->smeta_sec * lm->smeta_copies > pblk->max_write_pgs) { > + rq_writes = lm->smeta_copies; > + } else { > + rq_writes = 1; > + for (i = 1; i < lm->smeta_copies; i++) { > + memcpy(line->smeta + i * lm->smeta_len, > + line->smeta, lm->smeta_len); > + } > + smeta_wr_len *= lm->smeta_copies; > + smeta_wr_sec *= lm->smeta_copies; > + } > > memset(&rqd, 0, sizeof(struct nvm_rq)); > > @@ -788,7 +848,8 @@ static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line, > if (ret) > return ret; > > - bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL); > +next_rq: > + bio = bio_map_kern(dev->q, line->smeta, smeta_wr_len, GFP_KERNEL); > if (IS_ERR(bio)) { > ret = PTR_ERR(bio); > goto clear_rqd; > @@ -799,15 +860,23 @@ static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line, > > rqd.bio = bio; > rqd.opcode = NVM_OP_PWRITE; > - rqd.nr_ppas = lm->smeta_sec; > + rqd.nr_ppas = smeta_wr_sec; > rqd.is_seq = 1; > > - for (i = 0; i < lm->smeta_sec; i++, paddr++) { > - struct pblk_sec_meta *meta = pblk_get_meta(pblk, > - rqd.meta_list, i); > + for (i = 0; i < rqd.nr_ppas; i++, paddr++) { > + void *meta_list = rqd.meta_list; > + struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, line->id); > + int pos = pblk_ppa_to_pos(geo, ppa); > > - rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); > - meta->lba = lba_list[paddr] = addr_empty; > + while (test_bit(pos, line->blk_bitmap)) { > + paddr += pblk->min_write_pgs; > + ppa = addr_to_gen_ppa(pblk, paddr, line->id); > + pos = pblk_ppa_to_pos(geo, ppa); > + } > + > + rqd.ppa_list[i] = ppa; > + pblk_get_meta(pblk, meta_list, i)->lba = addr_empty; > + lba_list[paddr] = addr_empty; > } > > ret = pblk_submit_io_sync_sem(pblk, &rqd); > @@ -822,8 +891,13 @@ static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line, > if (rqd.error) { > pblk_log_write_err(pblk, &rqd); > ret = -EIO; > + goto clear_rqd; > } > > + rq_writes--; > + if (rq_writes > 0) > + goto next_rq; > + > clear_rqd: > pblk_free_rqd_meta(pblk, &rqd); > return ret; > @@ -1020,7 +1094,7 @@ static void pblk_line_setup_metadata(struct pblk_line *line, > line->smeta = l_mg->sline_meta[meta_line]; > line->emeta = l_mg->eline_meta[meta_line]; > > - memset(line->smeta, 0, lm->smeta_len); > + memset(line->smeta, 0, lm->smeta_len * lm->smeta_copies); > memset(line->emeta->buf, 0, lm->emeta_len[0]); > > line->emeta->mem = 0; > @@ -1147,7 +1221,7 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line, > struct pblk_line_mgmt *l_mg = &pblk->l_mg; > u64 off; > int bit = -1; > - int emeta_secs; > + int emeta_secs, smeta_secs; > > line->sec_in_line = lm->sec_per_line; > > @@ -1163,13 +1237,19 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line, > } > > /* Mark smeta metadata sectors as bad sectors */ > - bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line); > - off = bit * geo->ws_opt; > - bitmap_set(line->map_bitmap, off, lm->smeta_sec); > - line->sec_in_line -= lm->smeta_sec; > - line->cur_sec = off + lm->smeta_sec; > + smeta_secs = lm->smeta_sec * lm->smeta_copies; > + bit = -1; > + while (smeta_secs) { > + bit = find_next_zero_bit(line->blk_bitmap, lm->blk_per_line, > + bit + 1); > + off = bit * geo->ws_opt; > + bitmap_set(line->map_bitmap, off, geo->ws_opt); > + line->cur_sec = off + geo->ws_opt; > + smeta_secs -= lm->smeta_sec; > + } > + line->sec_in_line -= (lm->smeta_sec * lm->smeta_copies); > > - if (init && pblk_line_smeta_write(pblk, line, off)) { > + if (init && pblk_line_smeta_write(pblk, line)) { > pblk_debug(pblk, "line smeta I/O failed. Retry\n"); > return 0; > } > diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c > index 4211cd1..5717ad4 100644 > --- a/drivers/lightnvm/pblk-init.c > +++ b/drivers/lightnvm/pblk-init.c > @@ -27,6 +27,11 @@ static unsigned int write_buffer_size; > module_param(write_buffer_size, uint, 0644); > MODULE_PARM_DESC(write_buffer_size, "number of entries in a write buffer"); > > +static unsigned int smeta_copies = 1; > + > +module_param(smeta_copies, int, 0644); > +MODULE_PARM_DESC(smeta_copies, "number of smeta copies"); > + > struct pblk_global_caches { > struct kmem_cache *ws; > struct kmem_cache *rec; > @@ -867,7 +872,8 @@ static int pblk_line_mg_init(struct pblk *pblk) > * emeta depends on the number of LUNs allocated to the pblk instance > */ > for (i = 0; i < PBLK_DATA_LINES; i++) { > - l_mg->sline_meta[i] = kmalloc(lm->smeta_len, GFP_KERNEL); > + l_mg->sline_meta[i] = kmalloc(lm->smeta_len > + * lm->smeta_copies, GFP_KERNEL); > if (!l_mg->sline_meta[i]) > goto fail_free_smeta; > } > @@ -967,6 +973,12 @@ static int pblk_line_meta_init(struct pblk *pblk) > lm->mid_thrs = lm->sec_per_line / 2; > lm->high_thrs = lm->sec_per_line / 4; > lm->meta_distance = (geo->all_luns / 2) * pblk->min_write_pgs; > + lm->smeta_copies = smeta_copies; > + > + if (lm->smeta_copies < 1 || lm->smeta_copies > geo->all_luns) { > + pblk_err(pblk, "unsupported smeta copies parameter\n"); > + return -EINVAL; > + } > > /* Calculate necessary pages for smeta. See comment over struct > * line_smeta definition > @@ -998,10 +1010,11 @@ static int pblk_line_meta_init(struct pblk *pblk) > > lm->emeta_bb = geo->all_luns > i ? geo->all_luns - i : 0; > > - lm->min_blk_line = 1; > - if (geo->all_luns > 1) > - lm->min_blk_line += DIV_ROUND_UP(lm->smeta_sec + > - lm->emeta_sec[0], geo->clba); > + lm->min_blk_line = lm->smeta_copies; > + if (geo->all_luns > lm->smeta_copies) { > + lm->min_blk_line += DIV_ROUND_UP((lm->smeta_sec > + * lm->smeta_copies) + lm->emeta_sec[0], geo->clba); > + } > > if (lm->min_blk_line > lm->blk_per_line) { > pblk_err(pblk, "config. not supported. Min. LUN in line:%d\n", > diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c > index 74e5b17..038931d 100644 > --- a/drivers/lightnvm/pblk-recovery.c > +++ b/drivers/lightnvm/pblk-recovery.c > @@ -51,7 +51,8 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line) > if (!lba_list) > return 1; > > - data_start = pblk_line_smeta_start(pblk, line) + lm->smeta_sec; > + data_start = pblk_line_smeta_start(pblk, line) > + + (lm->smeta_sec * lm->smeta_copies); > data_end = line->emeta_ssec; > nr_valid_lbas = le64_to_cpu(emeta_buf->nr_valid_lbas); > > @@ -140,7 +141,8 @@ static u64 pblk_sec_in_open_line(struct pblk *pblk, struct pblk_line *line) > if (lm->blk_per_line - nr_bb != valid_chunks) > pblk_err(pblk, "recovery line %d is bad\n", line->id); > > - pblk_update_line_wp(pblk, line, written_secs - lm->smeta_sec); > + pblk_update_line_wp(pblk, line, written_secs - > + (lm->smeta_sec * lm->smeta_copies)); > > return written_secs; > } > @@ -383,12 +385,14 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line, > void *data; > dma_addr_t dma_ppa_list, dma_meta_list; > __le64 *lba_list; > - u64 paddr = pblk_line_smeta_start(pblk, line) + lm->smeta_sec; > + u64 paddr = pblk_line_smeta_start(pblk, line) + > + (lm->smeta_sec * lm->smeta_copies); > bool padded = false; > int rq_ppas, rq_len; > int i, j; > int ret; > - u64 left_ppas = pblk_sec_in_open_line(pblk, line) - lm->smeta_sec; > + u64 left_ppas = pblk_sec_in_open_line(pblk, line) - > + (lm->smeta_sec * lm->smeta_copies); > > if (pblk_line_wps_are_unbalanced(pblk, line)) > pblk_warn(pblk, "recovering unbalanced line (%d)\n", line->id); > @@ -722,7 +726,7 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk) > > line = &pblk->lines[i]; > > - memset(smeta, 0, lm->smeta_len); > + memset(smeta, 0, lm->smeta_len * lm->smeta_copies); > line->smeta = smeta; > line->lun_bitmap = ((void *)(smeta_buf)) + > sizeof(struct line_smeta); > diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c > index b014957..944372c 100644 > --- a/drivers/lightnvm/pblk-rl.c > +++ b/drivers/lightnvm/pblk-rl.c > @@ -218,7 +218,8 @@ void pblk_rl_init(struct pblk_rl *rl, int budget, int threshold) > unsigned int rb_windows; > > /* Consider sectors used for metadata */ > - sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines; > + sec_meta = ((lm->smeta_sec * lm->smeta_copies) > + + lm->emeta_sec[0]) * l_mg->nr_free_lines; > blk_meta = DIV_ROUND_UP(sec_meta, geo->clba); > > rl->high = pblk->op_blks - blk_meta - lm->blk_per_line; > diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h > index 3a84c8a..0999245 100644 > --- a/drivers/lightnvm/pblk.h > +++ b/drivers/lightnvm/pblk.h > @@ -547,6 +547,7 @@ struct pblk_line_mgmt { > struct pblk_line_meta { > unsigned int smeta_len; /* Total length for smeta */ > unsigned int smeta_sec; /* Sectors needed for smeta */ > + unsigned int smeta_copies; /* Number of smeta copies */ > > unsigned int emeta_len[4]; /* Lengths for emeta: > * [0]: Total > -- > 2.9.5 Looks good. Reviewed-by: Javier González