[dm-devel] x86: optimize memcpy_flushcache

Mike Snitzer snitzer at redhat.com
Mon Jun 18 13:17:12 UTC 2018


On Mon, Jun 18 2018 at  8:50am -0400,
Mikulas Patocka <mpatocka at redhat.com> wrote:

> Hi Mike
> 
> Could you please push this patch to the kernel 4.18-rc? Dan Williams said 
> that he will submit it, but he forgot about it.
> 
> Without this patch, dm-writecache is suffering 2% penalty because of 
> memcpy_flushcache overhead.

I cannot send this to Linus directly, it needs to go through the x86
tree.

I already tried to get a slightly revised version of this upstream, see:
https://www.redhat.com/archives/dm-devel/2018-May/msg00080.html

I'll try a resend.. but the 4.18 merge window is now closed.

Mike


> From: Mikulas Patocka <mpatocka at redhat.com>
> 
> I use memcpy_flushcache in my persistent memory driver for metadata
> updates and it turns out that the overhead of memcpy_flushcache causes 2%
> performance degradation compared to "movnti" instruction explicitly coded
> using inline assembler.
> 
> This patch recognizes memcpy_flushcache calls with constant short length
> and turns them into inline assembler - so that I don't have to use inline
> assembler in the driver.
> 
> Signed-off-by: Mikulas Patocka <mpatocka at redhat.com>
> 
> ---
>  arch/x86/include/asm/string_64.h |   20 +++++++++++++++++++-
>  arch/x86/lib/usercopy_64.c       |    4 ++--
>  2 files changed, 21 insertions(+), 3 deletions(-)
> 
> Index: linux-2.6/arch/x86/include/asm/string_64.h
> ===================================================================
> --- linux-2.6.orig/arch/x86/include/asm/string_64.h
> +++ linux-2.6/arch/x86/include/asm/string_64.h
> @@ -149,7 +149,25 @@ memcpy_mcsafe(void *dst, const void *src
>  
>  #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
>  #define __HAVE_ARCH_MEMCPY_FLUSHCACHE 1
> -void memcpy_flushcache(void *dst, const void *src, size_t cnt);
> +void __memcpy_flushcache(void *dst, const void *src, size_t cnt);
> +static __always_inline void memcpy_flushcache(void *dst, const void *src, size_t cnt)
> +{
> +	if (__builtin_constant_p(cnt)) {
> +		switch (cnt) {
> +			case 4:
> +				asm ("movntil %1, %0" : "=m"(*(u32 *)dst) : "r"(*(u32 *)src));
> +				return;
> +			case 8:
> +				asm ("movntiq %1, %0" : "=m"(*(u64 *)dst) : "r"(*(u64 *)src));
> +				return;
> +			case 16:
> +				asm ("movntiq %1, %0" : "=m"(*(u64 *)dst) : "r"(*(u64 *)src));
> +				asm ("movntiq %1, %0" : "=m"(*(u64 *)(dst + 8)) : "r"(*(u64 *)(src + 8)));
> +				return;
> +		}
> +	}
> +	__memcpy_flushcache(dst, src, cnt);
> +}
>  #endif
>  
>  #endif /* __KERNEL__ */
> Index: linux-2.6/arch/x86/lib/usercopy_64.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/lib/usercopy_64.c
> +++ linux-2.6/arch/x86/lib/usercopy_64.c
> @@ -153,7 +153,7 @@ long __copy_user_flushcache(void *dst, c
>  	return rc;
>  }
>  
> -void memcpy_flushcache(void *_dst, const void *_src, size_t size)
> +void __memcpy_flushcache(void *_dst, const void *_src, size_t size)
>  {
>  	unsigned long dest = (unsigned long) _dst;
>  	unsigned long source = (unsigned long) _src;
> @@ -216,7 +216,7 @@ void memcpy_flushcache(void *_dst, const
>  		clean_cache_range((void *) dest, size);
>  	}
>  }
> -EXPORT_SYMBOL_GPL(memcpy_flushcache);
> +EXPORT_SYMBOL_GPL(__memcpy_flushcache);
>  
>  void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
>  		size_t len)




More information about the dm-devel mailing list