[dm-devel] [PATCH v6 1/6] pmem: Add functions for reading/writing page to/from pmem

Pankaj Gupta pankaj.gupta.linux at gmail.com
Sat Feb 29 08:04:00 UTC 2020


On Fri, 28 Feb 2020 at 17:35, Vivek Goyal <vgoyal at redhat.com> wrote:
>
> This splits pmem_do_bvec() into pmem_do_read() and pmem_do_write().
> pmem_do_write() will be used by pmem zero_page_range() as well. Hence
> sharing the same code.
>
> Suggested-by: Christoph Hellwig <hch at infradead.org>
> Reviewed-by: Christoph Hellwig <hch at lst.de>
> Signed-off-by: Vivek Goyal <vgoyal at redhat.com>
> ---
>  drivers/nvdimm/pmem.c | 86 +++++++++++++++++++++++++------------------
>  1 file changed, 50 insertions(+), 36 deletions(-)
>
> diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
> index 4eae441f86c9..075b11682192 100644
> --- a/drivers/nvdimm/pmem.c
> +++ b/drivers/nvdimm/pmem.c
> @@ -136,9 +136,25 @@ static blk_status_t read_pmem(struct page *page, unsigned int off,
>         return BLK_STS_OK;
>  }
>
> -static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
> -                       unsigned int len, unsigned int off, unsigned int op,
> -                       sector_t sector)
> +static blk_status_t pmem_do_read(struct pmem_device *pmem,
> +                       struct page *page, unsigned int page_off,
> +                       sector_t sector, unsigned int len)
> +{
> +       blk_status_t rc;
> +       phys_addr_t pmem_off = sector * 512 + pmem->data_offset;

minor nit,  maybe 512 is replaced by macro? Looks like its used at multiple
places, maybe can keep at is for now.

> +       void *pmem_addr = pmem->virt_addr + pmem_off;
> +
> +       if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
> +               return BLK_STS_IOERR;
> +
> +       rc = read_pmem(page, page_off, pmem_addr, len);
> +       flush_dcache_page(page);
> +       return rc;
> +}
> +
> +static blk_status_t pmem_do_write(struct pmem_device *pmem,
> +                       struct page *page, unsigned int page_off,
> +                       sector_t sector, unsigned int len)
>  {
>         blk_status_t rc = BLK_STS_OK;
>         bool bad_pmem = false;
> @@ -148,34 +164,25 @@ static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
>         if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
>                 bad_pmem = true;
>
> -       if (!op_is_write(op)) {
> -               if (unlikely(bad_pmem))
> -                       rc = BLK_STS_IOERR;
> -               else {
> -                       rc = read_pmem(page, off, pmem_addr, len);
> -                       flush_dcache_page(page);
> -               }
> -       } else {
> -               /*
> -                * Note that we write the data both before and after
> -                * clearing poison.  The write before clear poison
> -                * handles situations where the latest written data is
> -                * preserved and the clear poison operation simply marks
> -                * the address range as valid without changing the data.
> -                * In this case application software can assume that an
> -                * interrupted write will either return the new good
> -                * data or an error.
> -                *
> -                * However, if pmem_clear_poison() leaves the data in an
> -                * indeterminate state we need to perform the write
> -                * after clear poison.
> -                */
> -               flush_dcache_page(page);
> -               write_pmem(pmem_addr, page, off, len);
> -               if (unlikely(bad_pmem)) {
> -                       rc = pmem_clear_poison(pmem, pmem_off, len);
> -                       write_pmem(pmem_addr, page, off, len);
> -               }
> +       /*
> +        * Note that we write the data both before and after
> +        * clearing poison.  The write before clear poison
> +        * handles situations where the latest written data is
> +        * preserved and the clear poison operation simply marks
> +        * the address range as valid without changing the data.
> +        * In this case application software can assume that an
> +        * interrupted write will either return the new good
> +        * data or an error.
> +        *
> +        * However, if pmem_clear_poison() leaves the data in an
> +        * indeterminate state we need to perform the write
> +        * after clear poison.
> +        */
> +       flush_dcache_page(page);
> +       write_pmem(pmem_addr, page, page_off, len);
> +       if (unlikely(bad_pmem)) {
> +               rc = pmem_clear_poison(pmem, pmem_off, len);
> +               write_pmem(pmem_addr, page, page_off, len);
>         }
>
>         return rc;
> @@ -197,8 +204,12 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
>
>         do_acct = nd_iostat_start(bio, &start);
>         bio_for_each_segment(bvec, bio, iter) {
> -               rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len,
> -                               bvec.bv_offset, bio_op(bio), iter.bi_sector);
> +               if (op_is_write(bio_op(bio)))
> +                       rc = pmem_do_write(pmem, bvec.bv_page, bvec.bv_offset,
> +                               iter.bi_sector, bvec.bv_len);
> +               else
> +                       rc = pmem_do_read(pmem, bvec.bv_page, bvec.bv_offset,
> +                               iter.bi_sector, bvec.bv_len);
>                 if (rc) {
>                         bio->bi_status = rc;
>                         break;
> @@ -223,9 +234,12 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
>         struct pmem_device *pmem = bdev->bd_queue->queuedata;
>         blk_status_t rc;
>
> -       rc = pmem_do_bvec(pmem, page, hpage_nr_pages(page) * PAGE_SIZE,
> -                         0, op, sector);
> -
> +       if (op_is_write(op))
> +               rc = pmem_do_write(pmem, page, 0, sector,
> +                                  hpage_nr_pages(page) * PAGE_SIZE);
> +       else
> +               rc = pmem_do_read(pmem, page, 0, sector,
> +                                  hpage_nr_pages(page) * PAGE_SIZE);
>         /*
>          * The ->rw_page interface is subtle and tricky.  The core
>          * retries on any error, so we can only invoke page_endio() in
> --
> 2.20.1

Reviewed-by: Pankaj Gupta <pankaj.gupta.linux at gmail.com>
> _______________________________________________
> Linux-nvdimm mailing list -- linux-nvdimm at lists.01.org
> To unsubscribe send an email to linux-nvdimm-leave at lists.01.org





More information about the dm-devel mailing list