linux_dsm_epyc7002/drivers/lightnvm/sysblk.c
Matias Bjørling 8b4970c41f lightnvm: introduce factory reset
Now that a device can be managed using the system blocks, a method to
reset the device is necessary as well. This patch introduces logic to
reset the device easily to factory state and exposes it through an
ioctl.

The ioctl takes the following flags:

  NVM_FACTORY_ERASE_ONLY_USER
      By default all blocks, except host-reserved blocks are erased upon
      factory reset. Instead of this, only erase host-reserved blocks.
  NVM_FACTORY_RESET_HOST_BLKS
      Mark host-reserved blocks to be erased and set their type to free.
  NVM_FACTORY_RESET_GRWN_BBLKS
      Mark "grown bad blocks" to be erased and set their type to free.

Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
2016-01-12 08:21:18 -07:00

742 lines
17 KiB
C

/*
* Copyright (C) 2015 Matias Bjorling. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
* USA.
*
*/
#include <linux/lightnvm.h>
#define MAX_SYSBLKS 3 /* remember to update mapping scheme on change */
#define MAX_BLKS_PR_SYSBLK 2 /* 2 blks with 256 pages and 3000 erases
* enables ~1.5M updates per sysblk unit
*/
struct sysblk_scan {
/* A row is a collection of flash blocks for a system block. */
int nr_rows;
int row;
int act_blk[MAX_SYSBLKS];
int nr_ppas;
struct ppa_addr ppas[MAX_SYSBLKS * MAX_BLKS_PR_SYSBLK];/* all sysblks */
};
static inline int scan_ppa_idx(int row, int blkid)
{
return (row * MAX_BLKS_PR_SYSBLK) + blkid;
}
void nvm_sysblk_to_cpu(struct nvm_sb_info *info, struct nvm_system_block *sb)
{
info->seqnr = be32_to_cpu(sb->seqnr);
info->erase_cnt = be32_to_cpu(sb->erase_cnt);
info->version = be16_to_cpu(sb->version);
strncpy(info->mmtype, sb->mmtype, NVM_MMTYPE_LEN);
info->fs_ppa.ppa = be64_to_cpu(sb->fs_ppa);
}
void nvm_cpu_to_sysblk(struct nvm_system_block *sb, struct nvm_sb_info *info)
{
sb->magic = cpu_to_be32(NVM_SYSBLK_MAGIC);
sb->seqnr = cpu_to_be32(info->seqnr);
sb->erase_cnt = cpu_to_be32(info->erase_cnt);
sb->version = cpu_to_be16(info->version);
strncpy(sb->mmtype, info->mmtype, NVM_MMTYPE_LEN);
sb->fs_ppa = cpu_to_be64(info->fs_ppa.ppa);
}
static int nvm_setup_sysblks(struct nvm_dev *dev, struct ppa_addr *sysblk_ppas)
{
int nr_rows = min_t(int, MAX_SYSBLKS, dev->nr_chnls);
int i;
for (i = 0; i < nr_rows; i++)
sysblk_ppas[i].ppa = 0;
/* if possible, place sysblk at first channel, middle channel and last
* channel of the device. If not, create only one or two sys blocks
*/
switch (dev->nr_chnls) {
case 2:
sysblk_ppas[1].g.ch = 1;
/* fall-through */
case 1:
sysblk_ppas[0].g.ch = 0;
break;
default:
sysblk_ppas[0].g.ch = 0;
sysblk_ppas[1].g.ch = dev->nr_chnls / 2;
sysblk_ppas[2].g.ch = dev->nr_chnls - 1;
break;
}
return nr_rows;
}
void nvm_setup_sysblk_scan(struct nvm_dev *dev, struct sysblk_scan *s,
struct ppa_addr *sysblk_ppas)
{
memset(s, 0, sizeof(struct sysblk_scan));
s->nr_rows = nvm_setup_sysblks(dev, sysblk_ppas);
}
static int sysblk_get_host_blks(struct ppa_addr ppa, int nr_blks, u8 *blks,
void *private)
{
struct sysblk_scan *s = private;
int i, nr_sysblk = 0;
for (i = 0; i < nr_blks; i++) {
if (blks[i] != NVM_BLK_T_HOST)
continue;
if (s->nr_ppas == MAX_BLKS_PR_SYSBLK * MAX_SYSBLKS) {
pr_err("nvm: too many host blks\n");
return -EINVAL;
}
ppa.g.blk = i;
s->ppas[scan_ppa_idx(s->row, nr_sysblk)] = ppa;
s->nr_ppas++;
nr_sysblk++;
}
return 0;
}
static int nvm_get_all_sysblks(struct nvm_dev *dev, struct sysblk_scan *s,
struct ppa_addr *ppas, nvm_bb_update_fn *fn)
{
struct ppa_addr dppa;
int i, ret;
s->nr_ppas = 0;
for (i = 0; i < s->nr_rows; i++) {
dppa = generic_to_dev_addr(dev, ppas[i]);
s->row = i;
ret = dev->ops->get_bb_tbl(dev, dppa, dev->blks_per_lun, fn, s);
if (ret) {
pr_err("nvm: failed bb tbl for ppa (%u %u)\n",
ppas[i].g.ch,
ppas[i].g.blk);
return ret;
}
}
return ret;
}
/*
* scans a block for latest sysblk.
* Returns:
* 0 - newer sysblk not found. PPA is updated to latest page.
* 1 - newer sysblk found and stored in *cur. PPA is updated to
* next valid page.
* <0- error.
*/
static int nvm_scan_block(struct nvm_dev *dev, struct ppa_addr *ppa,
struct nvm_system_block *sblk)
{
struct nvm_system_block *cur;
int pg, cursz, ret, found = 0;
/* the full buffer for a flash page is allocated. Only the first of it
* contains the system block information
*/
cursz = dev->sec_size * dev->sec_per_pg * dev->nr_planes;
cur = kmalloc(cursz, GFP_KERNEL);
if (!cur)
return -ENOMEM;
/* perform linear scan through the block */
for (pg = 0; pg < dev->lps_per_blk; pg++) {
ppa->g.pg = ppa_to_slc(dev, pg);
ret = nvm_submit_ppa(dev, ppa, 1, NVM_OP_PREAD, NVM_IO_SLC_MODE,
cur, cursz);
if (ret) {
if (ret == NVM_RSP_ERR_EMPTYPAGE) {
pr_debug("nvm: sysblk scan empty ppa (%u %u %u %u)\n",
ppa->g.ch,
ppa->g.lun,
ppa->g.blk,
ppa->g.pg);
break;
}
pr_err("nvm: read failed (%x) for ppa (%u %u %u %u)",
ret,
ppa->g.ch,
ppa->g.lun,
ppa->g.blk,
ppa->g.pg);
break; /* if we can't read a page, continue to the
* next blk
*/
}
if (be32_to_cpu(cur->magic) != NVM_SYSBLK_MAGIC) {
pr_debug("nvm: scan break for ppa (%u %u %u %u)\n",
ppa->g.ch,
ppa->g.lun,
ppa->g.blk,
ppa->g.pg);
break; /* last valid page already found */
}
if (be32_to_cpu(cur->seqnr) < be32_to_cpu(sblk->seqnr))
continue;
memcpy(sblk, cur, sizeof(struct nvm_system_block));
found = 1;
}
kfree(cur);
return found;
}
static int nvm_set_bb_tbl(struct nvm_dev *dev, struct sysblk_scan *s, int type)
{
struct nvm_rq rqd;
int ret;
if (s->nr_ppas > dev->ops->max_phys_sect) {
pr_err("nvm: unable to update all sysblocks atomically\n");
return -EINVAL;
}
memset(&rqd, 0, sizeof(struct nvm_rq));
nvm_set_rqd_ppalist(dev, &rqd, s->ppas, s->nr_ppas);
nvm_generic_to_addr_mode(dev, &rqd);
ret = dev->ops->set_bb_tbl(dev, &rqd, type);
nvm_free_rqd_ppalist(dev, &rqd);
if (ret) {
pr_err("nvm: sysblk failed bb mark\n");
return -EINVAL;
}
return 0;
}
static int sysblk_get_free_blks(struct ppa_addr ppa, int nr_blks, u8 *blks,
void *private)
{
struct sysblk_scan *s = private;
struct ppa_addr *sppa;
int i, blkid = 0;
for (i = 0; i < nr_blks; i++) {
if (blks[i] == NVM_BLK_T_HOST)
return -EEXIST;
if (blks[i] != NVM_BLK_T_FREE)
continue;
sppa = &s->ppas[scan_ppa_idx(s->row, blkid)];
sppa->g.ch = ppa.g.ch;
sppa->g.lun = ppa.g.lun;
sppa->g.blk = i;
s->nr_ppas++;
blkid++;
pr_debug("nvm: use (%u %u %u) as sysblk\n",
sppa->g.ch, sppa->g.lun, sppa->g.blk);
if (blkid > MAX_BLKS_PR_SYSBLK - 1)
return 0;
}
pr_err("nvm: sysblk failed get sysblk\n");
return -EINVAL;
}
static int nvm_write_and_verify(struct nvm_dev *dev, struct nvm_sb_info *info,
struct sysblk_scan *s)
{
struct nvm_system_block nvmsb;
void *buf;
int i, sect, ret, bufsz;
struct ppa_addr *ppas;
nvm_cpu_to_sysblk(&nvmsb, info);
/* buffer for flash page */
bufsz = dev->sec_size * dev->sec_per_pg * dev->nr_planes;
buf = kzalloc(bufsz, GFP_KERNEL);
if (!buf)
return -ENOMEM;
memcpy(buf, &nvmsb, sizeof(struct nvm_system_block));
ppas = kcalloc(dev->sec_per_pg, sizeof(struct ppa_addr), GFP_KERNEL);
if (!ppas) {
ret = -ENOMEM;
goto err;
}
/* Write and verify */
for (i = 0; i < s->nr_rows; i++) {
ppas[0] = s->ppas[scan_ppa_idx(i, s->act_blk[i])];
pr_debug("nvm: writing sysblk to ppa (%u %u %u %u)\n",
ppas[0].g.ch,
ppas[0].g.lun,
ppas[0].g.blk,
ppas[0].g.pg);
/* Expand to all sectors within a flash page */
if (dev->sec_per_pg > 1) {
for (sect = 1; sect < dev->sec_per_pg; sect++) {
ppas[sect].ppa = ppas[0].ppa;
ppas[sect].g.sec = sect;
}
}
ret = nvm_submit_ppa(dev, ppas, dev->sec_per_pg, NVM_OP_PWRITE,
NVM_IO_SLC_MODE, buf, bufsz);
if (ret) {
pr_err("nvm: sysblk failed program (%u %u %u)\n",
ppas[0].g.ch,
ppas[0].g.lun,
ppas[0].g.blk);
break;
}
ret = nvm_submit_ppa(dev, ppas, dev->sec_per_pg, NVM_OP_PREAD,
NVM_IO_SLC_MODE, buf, bufsz);
if (ret) {
pr_err("nvm: sysblk failed read (%u %u %u)\n",
ppas[0].g.ch,
ppas[0].g.lun,
ppas[0].g.blk);
break;
}
if (memcmp(buf, &nvmsb, sizeof(struct nvm_system_block))) {
pr_err("nvm: sysblk failed verify (%u %u %u)\n",
ppas[0].g.ch,
ppas[0].g.lun,
ppas[0].g.blk);
ret = -EINVAL;
break;
}
}
kfree(ppas);
err:
kfree(buf);
return ret;
}
static int nvm_prepare_new_sysblks(struct nvm_dev *dev, struct sysblk_scan *s)
{
int i, ret;
unsigned long nxt_blk;
struct ppa_addr *ppa;
for (i = 0; i < s->nr_rows; i++) {
nxt_blk = (s->act_blk[i] + 1) % MAX_BLKS_PR_SYSBLK;
ppa = &s->ppas[scan_ppa_idx(i, nxt_blk)];
ppa->g.pg = ppa_to_slc(dev, 0);
ret = nvm_erase_ppa(dev, ppa, 1);
if (ret)
return ret;
s->act_blk[i] = nxt_blk;
}
return 0;
}
int nvm_get_sysblock(struct nvm_dev *dev, struct nvm_sb_info *info)
{
struct ppa_addr sysblk_ppas[MAX_SYSBLKS];
struct sysblk_scan s;
struct nvm_system_block *cur;
int i, j, found = 0;
int ret = -ENOMEM;
/*
* 1. setup sysblk locations
* 2. get bad block list
* 3. filter on host-specific (type 3)
* 4. iterate through all and find the highest seq nr.
* 5. return superblock information
*/
if (!dev->ops->get_bb_tbl)
return -EINVAL;
nvm_setup_sysblk_scan(dev, &s, sysblk_ppas);
mutex_lock(&dev->mlock);
ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, sysblk_get_host_blks);
if (ret)
goto err_sysblk;
/* no sysblocks initialized */
if (!s.nr_ppas)
goto err_sysblk;
cur = kzalloc(sizeof(struct nvm_system_block), GFP_KERNEL);
if (!cur)
goto err_sysblk;
/* find the latest block across all sysblocks */
for (i = 0; i < s.nr_rows; i++) {
for (j = 0; j < MAX_BLKS_PR_SYSBLK; j++) {
struct ppa_addr ppa = s.ppas[scan_ppa_idx(i, j)];
ret = nvm_scan_block(dev, &ppa, cur);
if (ret > 0)
found = 1;
else if (ret < 0)
break;
}
}
nvm_sysblk_to_cpu(info, cur);
kfree(cur);
err_sysblk:
mutex_unlock(&dev->mlock);
if (found)
return 1;
return ret;
}
int nvm_update_sysblock(struct nvm_dev *dev, struct nvm_sb_info *new)
{
/* 1. for each latest superblock
* 2. if room
* a. write new flash page entry with the updated information
* 3. if no room
* a. find next available block on lun (linear search)
* if none, continue to next lun
* if none at all, report error. also report that it wasn't
* possible to write to all superblocks.
* c. write data to block.
*/
struct ppa_addr sysblk_ppas[MAX_SYSBLKS];
struct sysblk_scan s;
struct nvm_system_block *cur;
int i, j, ppaidx, found = 0;
int ret = -ENOMEM;
if (!dev->ops->get_bb_tbl)
return -EINVAL;
nvm_setup_sysblk_scan(dev, &s, sysblk_ppas);
mutex_lock(&dev->mlock);
ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, sysblk_get_host_blks);
if (ret)
goto err_sysblk;
cur = kzalloc(sizeof(struct nvm_system_block), GFP_KERNEL);
if (!cur)
goto err_sysblk;
/* Get the latest sysblk for each sysblk row */
for (i = 0; i < s.nr_rows; i++) {
found = 0;
for (j = 0; j < MAX_BLKS_PR_SYSBLK; j++) {
ppaidx = scan_ppa_idx(i, j);
ret = nvm_scan_block(dev, &s.ppas[ppaidx], cur);
if (ret > 0) {
s.act_blk[i] = j;
found = 1;
} else if (ret < 0)
break;
}
}
if (!found) {
pr_err("nvm: no valid sysblks found to update\n");
ret = -EINVAL;
goto err_cur;
}
/*
* All sysblocks found. Check that they have same page id in their flash
* blocks
*/
for (i = 1; i < s.nr_rows; i++) {
struct ppa_addr l = s.ppas[scan_ppa_idx(0, s.act_blk[0])];
struct ppa_addr r = s.ppas[scan_ppa_idx(i, s.act_blk[i])];
if (l.g.pg != r.g.pg) {
pr_err("nvm: sysblks not on same page. Previous update failed.\n");
ret = -EINVAL;
goto err_cur;
}
}
/*
* Check that there haven't been another update to the seqnr since we
* began
*/
if ((new->seqnr - 1) != be32_to_cpu(cur->seqnr)) {
pr_err("nvm: seq is not sequential\n");
ret = -EINVAL;
goto err_cur;
}
/*
* When all pages in a block has been written, a new block is selected
* and writing is performed on the new block.
*/
if (s.ppas[scan_ppa_idx(0, s.act_blk[0])].g.pg ==
dev->lps_per_blk - 1) {
ret = nvm_prepare_new_sysblks(dev, &s);
if (ret)
goto err_cur;
}
ret = nvm_write_and_verify(dev, new, &s);
err_cur:
kfree(cur);
err_sysblk:
mutex_unlock(&dev->mlock);
return ret;
}
int nvm_init_sysblock(struct nvm_dev *dev, struct nvm_sb_info *info)
{
struct ppa_addr sysblk_ppas[MAX_SYSBLKS];
struct sysblk_scan s;
int ret;
/*
* 1. select master blocks and select first available blks
* 2. get bad block list
* 3. mark MAX_SYSBLKS block as host-based device allocated.
* 4. write and verify data to block
*/
if (!dev->ops->get_bb_tbl || !dev->ops->set_bb_tbl)
return -EINVAL;
if (!(dev->mccap & NVM_ID_CAP_SLC) || !dev->lps_per_blk) {
pr_err("nvm: memory does not support SLC access\n");
return -EINVAL;
}
/* Index all sysblocks and mark them as host-driven */
nvm_setup_sysblk_scan(dev, &s, sysblk_ppas);
mutex_lock(&dev->mlock);
ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, sysblk_get_free_blks);
if (ret)
goto err_mark;
ret = nvm_set_bb_tbl(dev, &s, NVM_BLK_T_HOST);
if (ret)
goto err_mark;
/* Write to the first block of each row */
ret = nvm_write_and_verify(dev, info, &s);
err_mark:
mutex_unlock(&dev->mlock);
return ret;
}
struct factory_blks {
struct nvm_dev *dev;
int flags;
unsigned long *blks;
};
static int factory_nblks(int nblks)
{
/* Round up to nearest BITS_PER_LONG */
return (nblks + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1);
}
static unsigned int factory_blk_offset(struct nvm_dev *dev, int ch, int lun)
{
int nblks = factory_nblks(dev->blks_per_lun);
return ((ch * dev->luns_per_chnl * nblks) + (lun * nblks)) /
BITS_PER_LONG;
}
static int nvm_factory_blks(struct ppa_addr ppa, int nr_blks, u8 *blks,
void *private)
{
struct factory_blks *f = private;
struct nvm_dev *dev = f->dev;
int i, lunoff;
lunoff = factory_blk_offset(dev, ppa.g.ch, ppa.g.lun);
/* non-set bits correspond to the block must be erased */
for (i = 0; i < nr_blks; i++) {
switch (blks[i]) {
case NVM_BLK_T_FREE:
if (f->flags & NVM_FACTORY_ERASE_ONLY_USER)
set_bit(i, &f->blks[lunoff]);
break;
case NVM_BLK_T_HOST:
if (!(f->flags & NVM_FACTORY_RESET_HOST_BLKS))
set_bit(i, &f->blks[lunoff]);
break;
case NVM_BLK_T_GRWN_BAD:
if (!(f->flags & NVM_FACTORY_RESET_GRWN_BBLKS))
set_bit(i, &f->blks[lunoff]);
break;
default:
set_bit(i, &f->blks[lunoff]);
break;
}
}
return 0;
}
static int nvm_fact_get_blks(struct nvm_dev *dev, struct ppa_addr *erase_list,
int max_ppas, struct factory_blks *f)
{
struct ppa_addr ppa;
int ch, lun, blkid, idx, done = 0, ppa_cnt = 0;
unsigned long *offset;
while (!done) {
done = 1;
for (ch = 0; ch < dev->nr_chnls; ch++) {
for (lun = 0; lun < dev->luns_per_chnl; lun++) {
idx = factory_blk_offset(dev, ch, lun);
offset = &f->blks[idx];
blkid = find_first_zero_bit(offset,
dev->blks_per_lun);
if (blkid >= dev->blks_per_lun)
continue;
set_bit(blkid, offset);
ppa.ppa = 0;
ppa.g.ch = ch;
ppa.g.lun = lun;
ppa.g.blk = blkid;
pr_debug("nvm: erase ppa (%u %u %u)\n",
ppa.g.ch,
ppa.g.lun,
ppa.g.blk);
erase_list[ppa_cnt] = ppa;
ppa_cnt++;
done = 0;
if (ppa_cnt == max_ppas)
return ppa_cnt;
}
}
}
return ppa_cnt;
}
static int nvm_fact_get_bb_tbl(struct nvm_dev *dev, struct ppa_addr ppa,
nvm_bb_update_fn *fn, void *priv)
{
struct ppa_addr dev_ppa;
int ret;
dev_ppa = generic_to_dev_addr(dev, ppa);
ret = dev->ops->get_bb_tbl(dev, dev_ppa, dev->blks_per_lun, fn, priv);
if (ret)
pr_err("nvm: failed bb tbl for ch%u lun%u\n",
ppa.g.ch, ppa.g.blk);
return ret;
}
static int nvm_fact_select_blks(struct nvm_dev *dev, struct factory_blks *f)
{
int ch, lun, ret;
struct ppa_addr ppa;
ppa.ppa = 0;
for (ch = 0; ch < dev->nr_chnls; ch++) {
for (lun = 0; lun < dev->luns_per_chnl; lun++) {
ppa.g.ch = ch;
ppa.g.lun = lun;
ret = nvm_fact_get_bb_tbl(dev, ppa, nvm_factory_blks,
f);
if (ret)
return ret;
}
}
return 0;
}
int nvm_dev_factory(struct nvm_dev *dev, int flags)
{
struct factory_blks f;
struct ppa_addr *ppas;
int ppa_cnt, ret = -ENOMEM;
int max_ppas = dev->ops->max_phys_sect / dev->nr_planes;
struct ppa_addr sysblk_ppas[MAX_SYSBLKS];
struct sysblk_scan s;
f.blks = kzalloc(factory_nblks(dev->blks_per_lun) * dev->nr_luns,
GFP_KERNEL);
if (!f.blks)
return ret;
ppas = kcalloc(max_ppas, sizeof(struct ppa_addr), GFP_KERNEL);
if (!ppas)
goto err_blks;
f.dev = dev;
f.flags = flags;
/* create list of blks to be erased */
ret = nvm_fact_select_blks(dev, &f);
if (ret)
goto err_ppas;
/* continue to erase until list of blks until empty */
while ((ppa_cnt = nvm_fact_get_blks(dev, ppas, max_ppas, &f)) > 0)
nvm_erase_ppa(dev, ppas, ppa_cnt);
/* mark host reserved blocks free */
if (flags & NVM_FACTORY_RESET_HOST_BLKS) {
nvm_setup_sysblk_scan(dev, &s, sysblk_ppas);
mutex_lock(&dev->mlock);
ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas,
sysblk_get_host_blks);
if (!ret)
ret = nvm_set_bb_tbl(dev, &s, NVM_BLK_T_FREE);
mutex_unlock(&dev->mlock);
}
err_ppas:
kfree(ppas);
err_blks:
kfree(f.blks);
return ret;
}
EXPORT_SYMBOL(nvm_dev_factory);