[dm-devel] [patch 4/4] dm-writecache: use new API for flushing

Mikulas Patocka mpatocka at redhat.com
Sat May 19 05:25:07 UTC 2018


Use new API for flushing persistent memory.

The problem is this:
* on X86-64, non-temporal stores have the best performance
* ARM64 doesn't have non-temporal stores, so we must flush cache. We
  should flush cache as late as possible, because it performs better this
  way.

We introduce functions pmem_memcpy, pmem_flush and pmem_commit. To commit
data persistently, all three functions must be called.

The macro pmem_assign may be used instead of pmem_memcpy. pmem_assign
(unlike pmem_memcpy) guarantees that 8-byte values are written atomically.

On X86, pmem_memcpy is memcpy_flushcache, pmem_flush is empty and
pmem_commit is wmb.

On ARM64, pmem_memcpy is memcpy, pmem_flush is arch_wb_cache_pmem and
pmem_commit is empty.

Signed-off-by: Mike Snitzer <msnitzer at redhat.com>
Signed-off-by: Mikulas Patocka <mpatocka at redhat.com>

---
 drivers/md/dm-writecache.c |  100 +++++++++++++++++++++++++--------------------
 1 file changed, 56 insertions(+), 44 deletions(-)

Index: linux-2.6/drivers/md/dm-writecache.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-writecache.c	2018-05-19 06:20:28.000000000 +0200
+++ linux-2.6/drivers/md/dm-writecache.c	2018-05-19 07:10:26.000000000 +0200
@@ -14,6 +14,7 @@
 #include <linux/dm-kcopyd.h>
 #include <linux/dax.h>
 #include <linux/pfn_t.h>
+#include <linux/libnvdimm.h>
 
 #define DM_MSG_PREFIX "writecache"
 
@@ -47,14 +48,48 @@
  * On ARM64, cache flushing is more efficient.
  */
 #if defined(CONFIG_X86_64)
-#define EAGER_DATA_FLUSH
-#define NT_STORE(dest, src)				\
-do {							\
-	typeof(src) val = (src);			\
-	memcpy_flushcache(&(dest), &val, sizeof(src));	\
+
+static void pmem_memcpy(void *dest, void *src, size_t len)
+{
+	memcpy_flushcache(dest, src, len);
+}
+
+#define __pmem_assign(dest, src, uniq)				\
+do {								\
+	typeof(dest) uniq = (src);				\
+	memcpy_flushcache(&(dest), &uniq, sizeof(dest));	\
 } while (0)
+
+#define pmem_assign(dest, src)					\
+	__pmem_assign(dest, src, __UNIQUE_ID(pmem_assign))
+
+static void pmem_flush(void *dest, size_t len)
+{
+}
+
+static void pmem_commit(void)
+{
+	wmb();
+}
+
 #else
-#define NT_STORE(dest, src)	WRITE_ONCE(dest, src)
+
+static void pmem_memcpy(void *dest, void *src, size_t len)
+{
+	memcpy(dest, src, len);
+}
+
+#define pmem_assign(dest, src)		WRITE_ONCE(dest, src)
+
+static void pmem_flush(void *dest, size_t len)
+{
+	arch_wb_cache_pmem(dest, len);
+}
+
+static void pmem_commit(void)
+{
+}
+
 #endif
 
 #if defined(__HAVE_ARCH_MEMCPY_MCSAFE) && !defined(DM_WRITECACHE_ONLY_SSD)
@@ -105,7 +140,7 @@ struct wc_entry {
 };
 
 #ifndef DM_WRITECACHE_ONLY_SSD
-#define WC_MODE_PMEM(wc)			((wc)->pmem_mode)
+#define WC_MODE_PMEM(wc)			(likely((wc)->pmem_mode))
 #define WC_MODE_FUA(wc)				((wc)->writeback_fua)
 #else
 #define WC_MODE_PMEM(wc)			false
@@ -400,21 +435,6 @@ static void persistent_memory_invalidate
 		invalidate_kernel_vmap_range(ptr, size);
 }
 
-static void persistent_memory_flush(struct dm_writecache *wc, void *ptr, size_t size)
-{
-#ifndef EAGER_DATA_FLUSH
-	dax_flush(wc->ssd_dev->dax_dev, ptr, size);
-#endif
-}
-
-static void persistent_memory_commit_flushed(void)
-{
-#ifdef EAGER_DATA_FLUSH
-	/* needed since memcpy_flushcache is used instead of dax_flush */
-	wmb();
-#endif
-}
-
 static struct wc_memory_superblock *sb(struct dm_writecache *wc)
 {
 	return wc->memory_map;
@@ -462,21 +482,20 @@ static void clear_seq_count(struct dm_wr
 #ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS
 	e->seq_count = -1;
 #endif
-	NT_STORE(memory_entry(wc, e)->seq_count, cpu_to_le64(-1));
+	pmem_assign(memory_entry(wc, e)->seq_count, cpu_to_le64(-1));
 }
 
 static void write_original_sector_seq_count(struct dm_writecache *wc, struct wc_entry *e,
 					    uint64_t original_sector, uint64_t seq_count)
 {
-	struct wc_memory_entry *me_p, me;
+	struct wc_memory_entry me;
 #ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS
 	e->original_sector = original_sector;
 	e->seq_count = seq_count;
 #endif
-	me_p = memory_entry(wc, e);
 	me.original_sector = cpu_to_le64(original_sector);
 	me.seq_count = cpu_to_le64(seq_count);
-	NT_STORE(*me_p, me);
+	pmem_assign(*memory_entry(wc, e), me);
 }
 
 #define writecache_error(wc, err, msg, arg...)				\
@@ -491,8 +510,7 @@ do {									\
 static void writecache_flush_all_metadata(struct dm_writecache *wc)
 {
 	if (WC_MODE_PMEM(wc)) {
-		persistent_memory_flush(wc,
-			sb(wc), offsetof(struct wc_memory_superblock, entries[wc->n_blocks]));
+		pmem_flush(sb(wc), offsetof(struct wc_memory_superblock, entries[wc->n_blocks]));
 	} else {
 		memset(wc->dirty_bitmap, -1, wc->dirty_bitmap_size);
 	}
@@ -501,7 +519,7 @@ static void writecache_flush_all_metadat
 static void writecache_flush_region(struct dm_writecache *wc, void *ptr, size_t size)
 {
 	if (WC_MODE_PMEM(wc))
-		persistent_memory_flush(wc, ptr, size);
+		pmem_flush(ptr, size);
 	else
 		__set_bit(((char *)ptr - (char *)wc->memory_map) / BITMAP_GRANULARITY,
 			  wc->dirty_bitmap);
@@ -579,7 +597,7 @@ static void ssd_commit_flushed(struct dm
 static void writecache_commit_flushed(struct dm_writecache *wc)
 {
 	if (WC_MODE_PMEM(wc))
-		persistent_memory_commit_flushed();
+		pmem_commit();
 	else
 		ssd_commit_flushed(wc);
 }
@@ -788,10 +806,8 @@ static void writecache_poison_lists(stru
 static void writecache_flush_entry(struct dm_writecache *wc, struct wc_entry *e)
 {
 	writecache_flush_region(wc, memory_entry(wc, e), sizeof(struct wc_memory_entry));
-#ifndef EAGER_DATA_FLUSH
 	if (WC_MODE_PMEM(wc))
 		writecache_flush_region(wc, memory_data(wc, e), wc->block_size);
-#endif
 }
 
 static bool writecache_entry_is_committed(struct dm_writecache *wc, struct wc_entry *e)
@@ -834,7 +850,7 @@ static void writecache_flush(struct dm_w
 	writecache_wait_for_ios(wc, WRITE);
 
 	wc->seq_count++;
-	NT_STORE(sb(wc)->seq_count, cpu_to_le64(wc->seq_count));
+	pmem_assign(sb(wc)->seq_count, cpu_to_le64(wc->seq_count));
 	writecache_flush_region(wc, &sb(wc)->seq_count, sizeof sb(wc)->seq_count);
 	writecache_commit_flushed(wc);
 
@@ -1152,11 +1168,7 @@ static void bio_copy_block(struct dm_wri
 			}
 		} else {
 			flush_dcache_page(bio_page(bio));
-#ifdef EAGER_DATA_FLUSH
-			memcpy_flushcache(data, buf, size);
-#else
-			memcpy(data, buf, size);
-#endif
+			pmem_memcpy(data, buf, size);
 		}
 
 		bvec_kunmap_irq(buf, &flags);
@@ -1850,18 +1862,18 @@ static int init_memory(struct dm_writeca
 		return r;
 
 	for (b = 0; b < ARRAY_SIZE(sb(wc)->padding); b++)
-		NT_STORE(sb(wc)->padding[b], cpu_to_le64(0));
-	NT_STORE(sb(wc)->version, cpu_to_le32(MEMORY_SUPERBLOCK_VERSION));
-	NT_STORE(sb(wc)->block_size, cpu_to_le32(wc->block_size));
-	NT_STORE(sb(wc)->n_blocks, cpu_to_le64(wc->n_blocks));
-	NT_STORE(sb(wc)->seq_count, cpu_to_le64(0));
+		pmem_assign(sb(wc)->padding[b], cpu_to_le64(0));
+	pmem_assign(sb(wc)->version, cpu_to_le32(MEMORY_SUPERBLOCK_VERSION));
+	pmem_assign(sb(wc)->block_size, cpu_to_le32(wc->block_size));
+	pmem_assign(sb(wc)->n_blocks, cpu_to_le64(wc->n_blocks));
+	pmem_assign(sb(wc)->seq_count, cpu_to_le64(0));
 
 	for (b = 0; b < wc->n_blocks; b++)
 		write_original_sector_seq_count(wc, &wc->entries[b], -1, -1);
 
 	writecache_flush_all_metadata(wc);
 	writecache_commit_flushed(wc);
-	NT_STORE(sb(wc)->magic, cpu_to_le32(MEMORY_SUPERBLOCK_MAGIC));
+	pmem_assign(sb(wc)->magic, cpu_to_le32(MEMORY_SUPERBLOCK_MAGIC));
 	writecache_flush_region(wc, &sb(wc)->magic, sizeof sb(wc)->magic);
 	writecache_commit_flushed(wc);
 




More information about the dm-devel mailing list