[Virtio-fs] [PATCH][RFC] Support multiqueue mode by setting cpu affinity
piaojun
piaojun at huawei.com
Thu Aug 22 05:18:00 UTC 2019
On 2019/8/21 23:38, Stefan Hajnoczi wrote:
> On Fri, Aug 09, 2019 at 02:04:54PM +0800, piaojun wrote:
>> Set cpu affinity for each queue in multiqueue mode to improve the iops
>> performance.
>>
>> >From my test, the iops is increased by adding multiqueues as below,
>> but it has not achieved my expect yet due to some reason. So I'm
>> considering if we could drop some locks when operating vq as it is
>> binded to one vCPU. I'm very glad to have a discuss with other
>> developers.
>>
>> Further more, I modified virtiofsd to support multiqueue which just for
>> testing.
>>
>> Test Environment:
>> Guest configuration:
>> 8 vCPU
>> 8GB RAM
>> Linux 5.1 (vivek-aug-06-2019)
>>
>> Host configuration:
>> Intel(R) Xeon(R) CPU E5-2670 0 @ 2.60GHz (8 cores x 4 threads)
>> 32GB RAM
>> Linux 3.10.0
>> EXT4 + 4G Ramdisk
>>
>> ---
>> Single-queue:
>> # fio -direct=1 -time_based -iodepth=128 -rw=randwrite -ioengine=libaio -bs=4k -size=1G -numjob=8 -runtime=30 -group_reporting -name=file -filename=/mnt/virtiofs/file
>> file: (g=0): rw=randwrite, bs=4K-4K/4K-4K/4K-4K, ioengine=libaio, iodepth=128
>> ...
>> fio-2.13
>> Starting 8 processes
>> Jobs: 8 (f=8): [w(8)] [100.0% done] [0KB/316.5MB/0KB /s] [0/81.2K/0 iops] [eta 00m:00s]
>> file: (groupid=0, jobs=8): err= 0: pid=5808: Fri Aug 9 20:35:22 2019
>> write: io=9499.9MB, bw=324251KB/s, iops=81062, runt= 30001msec
>>
>> Multi-queues:
>> # fio -direct=1 -time_based -iodepth=128 -rw=randwrite -ioengine=libaio -bs=4k -size=1G -numjob=8 -runtime=30 -group_reporting -name=file -filename=/mnt/virtiofs/file
>> file: (g=0): rw=randwrite, bs=4K-4K/4K-4K/4K-4K, ioengine=libaio, iodepth=128
>> ...
>> fio-2.13
>> Starting 8 processes
>> Jobs: 8 (f=8): [w(8)] [100.0% done] [0KB/444.6MB/0KB /s] [0/114K/0 iops] [eta 00m:00s]
>> file: (groupid=0, jobs=8): err= 0: pid=5704: Fri Aug 9 20:38:47 2019
>> write: io=12967MB, bw=442582KB/s, iops=110645, runt= 30001msec
>> ---
>
> How does the same fio command-line perform on the host when bound to 8
> CPUs?
Not test yet.
>
> What about the virtiofsd changes? Did you implement host CPU affinity
> for the virtqueue processing threads and their workqueues?
I just delete the limit code for single queue:
fv_queue_set_started()
if (qidx > 1) // delete this check
And I have not implemented host CPU affinity yet, but I'm interested in it.
>
> I wonder if numbers are better if you use 8 files instead of 1 file.
I will test as you suggest, and share the result again.
Jun
>
>> Signed-off-by: Jun Piao <piaojun at huawei.com>
>> ---
>> fs/fuse/virtio_fs.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++--
>> 1 file changed, 66 insertions(+), 2 deletions(-)
>>
>> diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
>> index a04c320..7ba36fc 100644
>> --- a/fs/fuse/virtio_fs.c
>> +++ b/fs/fuse/virtio_fs.c
>> @@ -12,6 +12,7 @@
>> #include <linux/virtio.h>
>> #include <linux/virtio_fs.h>
>> #include <linux/delay.h>
>> +#include <linux/cpu.h>
>> #include "fuse_i.h"
>>
>> /* List of virtio-fs device instances and a lock for the list */
>> @@ -61,6 +62,9 @@ struct virtio_fs {
>> void *window_kaddr;
>> phys_addr_t window_phys_addr;
>> size_t window_len;
>> +
>> + /* Does the affinity hint is set for virtqueues? */
>> + bool affinity_hint_set;
>> };
>>
>> struct virtio_fs_forget {
>> @@ -378,6 +382,44 @@ static void virtio_fs_vq_done(struct virtqueue *vq)
>> schedule_work(&fsvq->done_work);
>> }
>>
>> +static void virtio_fs_clean_affinity(struct virtio_fs *fs)
>> +{
>> + int i;
>> +
>> + if (fs->affinity_hint_set) {
>> + for (i = 0; i < fs->num_queues; i++)
>> + virtqueue_set_affinity(fs->vqs[i].vq, NULL);
>> +
>> + fs->affinity_hint_set = false;
>> + }
>> +}
>> +
>> +static void virtio_fs_set_affinity(struct virtio_fs *fs)
>> +{
>> + int i = 0, cpu;
>> +
>> + /*
>> + * In single queue mode, we don't set the cpu affinity.
>> + */
>> + if (fs->num_queues == 1) {
>> + virtio_fs_clean_affinity(fs);
>> + fs->affinity_hint_set = false;
>> + return;
>> + }
>> +
>> + /*
>> + * In multiqueue mode, we let the queue to be private to one cpu
>> + * by setting the affinity hint to eliminate the contention.
>> + */
>> + for_each_online_cpu(cpu) {
>> + virtqueue_set_affinity(fs->vqs[i].vq, cpumask_of(cpu));
>> + if (++i >= fs->num_queues)
>> + break;
>> + }
>> +
>> + fs->affinity_hint_set = true;
>> +}
>> +
>> /* Initialize virtqueues */
>> static int virtio_fs_setup_vqs(struct virtio_device *vdev,
>> struct virtio_fs *fs)
>> @@ -440,6 +482,11 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev,
>> fs->vqs[i].vq = vqs[i];
>> fs->vqs[i].connected = true;
>> }
>> +
>> + /* set affinity for vqs */
>> + get_online_cpus();
>> + virtio_fs_set_affinity(fs);
>> + put_online_cpus();
>> out:
>> kfree(names);
>> kfree(callbacks);
>> @@ -451,6 +498,7 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev,
>> static void virtio_fs_cleanup_vqs(struct virtio_device *vdev,
>> struct virtio_fs *fs)
>> {
>> + virtio_fs_clean_affinity(fs);
>> vdev->config->del_vqs(vdev);
>> }
>>
>> @@ -954,10 +1002,22 @@ static int virtio_fs_enqueue_req(struct virtqueue *vq, struct fuse_req *req)
>> return ret;
>> }
>>
>> +static unsigned virtio_fs_pick_vq_mq(struct virtio_fs *fs)
>> +{
>> + unsigned queue_id;
>> + unsigned long flags;
>> +
>> + local_irq_save(flags);
>> + queue_id = (smp_processor_id() % fs->num_queues) + VQ_REQUEST;
>> + local_irq_restore(flags);
>> +
>> + return queue_id;
>> +}
>> +
>> static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq)
>> __releases(fiq->waitq.lock)
>> {
>> - unsigned queue_id = VQ_REQUEST; /* TODO multiqueue */
>> + unsigned queue_id = VQ_REQUEST;
>> struct virtio_fs *fs;
>> struct fuse_conn *fc;
>> struct fuse_req *req;
>> @@ -972,6 +1032,8 @@ static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq)
>> spin_unlock(&fiq->waitq.lock);
>>
>> fs = fiq->priv;
>> + if (fs->num_queues > 1)
>> + queue_id = virtio_fs_pick_vq_mq(fs);
>> fc = fs->vqs[queue_id].fud->fc;
>>
>> dev_dbg(&fs->vqs[queue_id].vq->vdev->dev,
>> @@ -1066,9 +1128,11 @@ static int virtio_fs_fill_super(struct super_block *sb, char *opts,
>>
>> err = -ENOMEM;
>> /* Allocate fuse_dev for hiprio and notification queues */
>> - for (i = 0; i < VQ_REQUEST; i++) {
>> + for (i = 0; i < VQ_REQUEST + fs->num_queues; i++) {
>> struct virtio_fs_vq *fsvq = &fs->vqs[i];
>>
>> + if (i == VQ_REQUEST)
>> + continue; /* will be allocated in fuse_fill_super_common */
>> fsvq->fud = fuse_dev_alloc();
>> if (!fsvq->fud)
>> goto err_free_fuse_devs;
>> --
>>
>> _______________________________________________
>> Virtio-fs mailing list
>> Virtio-fs at redhat.com
>> https://www.redhat.com/mailman/listinfo/virtio-fs
More information about the Virtio-fs
mailing list