[dm-devel] [PATCH v6 1/6] pmem: Add functions for reading/writing page to/from pmem
Pankaj Gupta
pankaj.gupta.linux at gmail.com
Sat Feb 29 08:04:00 UTC 2020
On Fri, 28 Feb 2020 at 17:35, Vivek Goyal <vgoyal at redhat.com> wrote:
>
> This splits pmem_do_bvec() into pmem_do_read() and pmem_do_write().
> pmem_do_write() will be used by pmem zero_page_range() as well. Hence
> sharing the same code.
>
> Suggested-by: Christoph Hellwig <hch at infradead.org>
> Reviewed-by: Christoph Hellwig <hch at lst.de>
> Signed-off-by: Vivek Goyal <vgoyal at redhat.com>
> ---
> drivers/nvdimm/pmem.c | 86 +++++++++++++++++++++++++------------------
> 1 file changed, 50 insertions(+), 36 deletions(-)
>
> diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
> index 4eae441f86c9..075b11682192 100644
> --- a/drivers/nvdimm/pmem.c
> +++ b/drivers/nvdimm/pmem.c
> @@ -136,9 +136,25 @@ static blk_status_t read_pmem(struct page *page, unsigned int off,
> return BLK_STS_OK;
> }
>
> -static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
> - unsigned int len, unsigned int off, unsigned int op,
> - sector_t sector)
> +static blk_status_t pmem_do_read(struct pmem_device *pmem,
> + struct page *page, unsigned int page_off,
> + sector_t sector, unsigned int len)
> +{
> + blk_status_t rc;
> + phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
minor nit, maybe 512 is replaced by macro? Looks like its used at multiple
places, maybe can keep at is for now.
> + void *pmem_addr = pmem->virt_addr + pmem_off;
> +
> + if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
> + return BLK_STS_IOERR;
> +
> + rc = read_pmem(page, page_off, pmem_addr, len);
> + flush_dcache_page(page);
> + return rc;
> +}
> +
> +static blk_status_t pmem_do_write(struct pmem_device *pmem,
> + struct page *page, unsigned int page_off,
> + sector_t sector, unsigned int len)
> {
> blk_status_t rc = BLK_STS_OK;
> bool bad_pmem = false;
> @@ -148,34 +164,25 @@ static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
> if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
> bad_pmem = true;
>
> - if (!op_is_write(op)) {
> - if (unlikely(bad_pmem))
> - rc = BLK_STS_IOERR;
> - else {
> - rc = read_pmem(page, off, pmem_addr, len);
> - flush_dcache_page(page);
> - }
> - } else {
> - /*
> - * Note that we write the data both before and after
> - * clearing poison. The write before clear poison
> - * handles situations where the latest written data is
> - * preserved and the clear poison operation simply marks
> - * the address range as valid without changing the data.
> - * In this case application software can assume that an
> - * interrupted write will either return the new good
> - * data or an error.
> - *
> - * However, if pmem_clear_poison() leaves the data in an
> - * indeterminate state we need to perform the write
> - * after clear poison.
> - */
> - flush_dcache_page(page);
> - write_pmem(pmem_addr, page, off, len);
> - if (unlikely(bad_pmem)) {
> - rc = pmem_clear_poison(pmem, pmem_off, len);
> - write_pmem(pmem_addr, page, off, len);
> - }
> + /*
> + * Note that we write the data both before and after
> + * clearing poison. The write before clear poison
> + * handles situations where the latest written data is
> + * preserved and the clear poison operation simply marks
> + * the address range as valid without changing the data.
> + * In this case application software can assume that an
> + * interrupted write will either return the new good
> + * data or an error.
> + *
> + * However, if pmem_clear_poison() leaves the data in an
> + * indeterminate state we need to perform the write
> + * after clear poison.
> + */
> + flush_dcache_page(page);
> + write_pmem(pmem_addr, page, page_off, len);
> + if (unlikely(bad_pmem)) {
> + rc = pmem_clear_poison(pmem, pmem_off, len);
> + write_pmem(pmem_addr, page, page_off, len);
> }
>
> return rc;
> @@ -197,8 +204,12 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
>
> do_acct = nd_iostat_start(bio, &start);
> bio_for_each_segment(bvec, bio, iter) {
> - rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len,
> - bvec.bv_offset, bio_op(bio), iter.bi_sector);
> + if (op_is_write(bio_op(bio)))
> + rc = pmem_do_write(pmem, bvec.bv_page, bvec.bv_offset,
> + iter.bi_sector, bvec.bv_len);
> + else
> + rc = pmem_do_read(pmem, bvec.bv_page, bvec.bv_offset,
> + iter.bi_sector, bvec.bv_len);
> if (rc) {
> bio->bi_status = rc;
> break;
> @@ -223,9 +234,12 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
> struct pmem_device *pmem = bdev->bd_queue->queuedata;
> blk_status_t rc;
>
> - rc = pmem_do_bvec(pmem, page, hpage_nr_pages(page) * PAGE_SIZE,
> - 0, op, sector);
> -
> + if (op_is_write(op))
> + rc = pmem_do_write(pmem, page, 0, sector,
> + hpage_nr_pages(page) * PAGE_SIZE);
> + else
> + rc = pmem_do_read(pmem, page, 0, sector,
> + hpage_nr_pages(page) * PAGE_SIZE);
> /*
> * The ->rw_page interface is subtle and tricky. The core
> * retries on any error, so we can only invoke page_endio() in
> --
> 2.20.1
Reviewed-by: Pankaj Gupta <pankaj.gupta.linux at gmail.com>
> _______________________________________________
> Linux-nvdimm mailing list -- linux-nvdimm at lists.01.org
> To unsubscribe send an email to linux-nvdimm-leave at lists.01.org
More information about the dm-devel
mailing list