[dm-devel] [PATCH 10/12] Limit bio_endio recursion
Ming Lei
tom.leiming at gmail.com
Thu Apr 7 05:44:43 UTC 2016
On Thu, 7 Apr 2016 11:54:49 +0800
Ming Lei <tom.leiming at gmail.com> wrote:
> On Mon, Apr 4, 2016 at 1:06 PM, Shaun Tancheff <shaun at tancheff.com> wrote:
> > Recursive endio calls can exceed 16k stack. Tested with
> > 32k stack and observed:
> >
> > Depth Size Location (293 entries)
> > ----- ---- --------
> > 0) 21520 16 __module_text_address+0x12/0x60
> > 1) 21504 8 __module_address+0x5/0x140
> > 2) 21496 24 __module_text_address+0x12/0x60
> > 3) 21472 16 is_module_text_address+0xe/0x20
> > 4) 21456 8 __kernel_text_address+0x50/0x80
> > 5) 21448 136 print_context_stack+0x5a/0xf0
> > 6) 21312 144 dump_trace+0x14c/0x300
> > 7) 21168 8 save_stack_trace+0x2f/0x50
> > 8) 21160 88 set_track+0x64/0x130
> > 9) 21072 96 free_debug_processing+0x200/0x290
> > 10) 20976 176 __slab_free+0x164/0x290
> > 11) 20800 48 kmem_cache_free+0x1b0/0x1e0
> > 12) 20752 16 mempool_free_slab+0x17/0x20
> > 13) 20736 48 mempool_free+0x2f/0x90
> > 14) 20688 16 bvec_free+0x36/0x40
> > 15) 20672 32 bio_free+0x3b/0x60
> > 16) 20640 16 bio_put+0x23/0x30
> > 17) 20624 64 end_bio_extent_writepage+0xcf/0xe0
> > 18) 20560 48 bio_endio+0x57/0x90
> > 19) 20512 48 btrfs_end_bio+0xa8/0x160
> > 20) 20464 48 bio_endio+0x57/0x90
> > 21) 20416 112 dec_pending+0x121/0x270
> > 22) 20304 64 clone_endio+0x7a/0x100
> > 23) 20240 48 bio_endio+0x57/0x90
> > ...
> > 277) 1264 64 clone_endio+0x7a/0x100
> > 278) 1200 48 bio_endio+0x57/0x90
> > 279) 1152 112 dec_pending+0x121/0x270
> > 280) 1040 64 clone_endio+0x7a/0x100
> > 281) 976 48 bio_endio+0x57/0x90
> > 282) 928 80 blk_update_request+0x8f/0x340
> > 283) 848 80 scsi_end_request+0x33/0x1c0
> > 284) 768 112 scsi_io_completion+0xb5/0x620
> > 285) 656 48 scsi_finish_command+0xcf/0x120
> > 286) 608 48 scsi_softirq_done+0x126/0x150
> > 287) 560 24 blk_done_softirq+0x78/0x90
> > 288) 536 136 __do_softirq+0xfd/0x280
> > 289) 400 16 run_ksoftirqd+0x28/0x50
> > 290) 384 64 smpboot_thread_fn+0x105/0x160
> > 291) 320 144 kthread+0xc9/0xe0
> > 292) 176 176 ret_from_fork+0x3f/0x70
> >
> > Based on earlier patch by Mikulas Patocka <mpatocka at redhat.com>.
> > https://lkml.org/lkml/2008/6/24/18
>
> Looks a empty link, and the following had the discussion too:
>
> http://linux-kernel.2935.n7.nabble.com/PATCH-1-2-Avoid-bio-endio-recursion-td306043.html
>
> >
> > Signed-off-by: Shaun Tancheff <shaun.tancheff at seagate.com>
> > ---
> > block/bio.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---
> > 1 file changed, 54 insertions(+), 3 deletions(-)
> >
> > diff --git a/block/bio.c b/block/bio.c
> > index d42e69c..4ac19f6 100644
> > --- a/block/bio.c
> > +++ b/block/bio.c
> > @@ -1733,6 +1733,59 @@ static inline bool bio_remaining_done(struct bio *bio)
> > return false;
> > }
> >
> > +static DEFINE_PER_CPU(struct bio **, bio_end_queue) = { NULL };
>
> The idea looks very nice, but this patch can't be applid to current block-next
> branch.
>
> Looks it might be simpler to implement the approach by using percpu bio_list.
Cc Mikulas & Christoph
How about the following implementation with bio_list? Looks more readable and
simpler.
Just run a recent testcase of bcache over raid1(virtio-blk, virtio-blk), looks
it does work, :-)
---
diff --git a/block/bio.c b/block/bio.c
index f124a0a..b97dfe8 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -68,6 +68,8 @@ static DEFINE_MUTEX(bio_slab_lock);
static struct bio_slab *bio_slabs;
static unsigned int bio_slab_nr, bio_slab_max;
+static DEFINE_PER_CPU(struct bio_list *, bio_end_list) = { NULL };
+
static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
{
unsigned int sz = sizeof(struct bio) + extra_size;
@@ -1737,6 +1739,46 @@ static inline bool bio_remaining_done(struct bio *bio)
return false;
}
+/* disable local irq when manipulating the percpu bio_list */
+static void unwind_bio_endio(struct bio *bio)
+{
+ struct bio_list bl_in_stack;
+ struct bio_list *bl;
+ unsigned long flags;
+ bool clear_list = false;
+
+ local_irq_save(flags);
+
+ bl = this_cpu_read(bio_end_list);
+ if (!bl) {
+ bl = &bl_in_stack;
+ bio_list_init(bl);
+ clear_list = true;
+ }
+
+ if (!bio_list_empty(bl)) {
+ bio_list_add(bl, bio);
+ goto out;
+ }
+
+ while (1) {
+ local_irq_restore(flags);
+
+ if (!bio)
+ break;
+
+ if (bio->bi_end_io)
+ bio->bi_end_io(bio);
+
+ local_irq_save(flags);
+ bio = bio_list_pop(bl);
+ }
+ if (clear_list)
+ this_cpu_write(bio_end_list, NULL);
+ out:
+ local_irq_restore(flags);
+}
+
/**
* bio_endio - end I/O on a bio
* @bio: bio
@@ -1765,8 +1807,7 @@ again:
goto again;
}
- if (bio->bi_end_io)
- bio->bi_end_io(bio);
+ unwind_bio_endio(bio);
}
EXPORT_SYMBOL(bio_endio);
>
> > +
> > +static struct bio *unwind_bio_endio(struct bio *bio)
> > +{
> > + struct bio ***bio_end_queue_ptr;
> > + struct bio *bio_queue;
> > + struct bio *chain_bio = NULL;
> > + int error = bio->bi_error;
> > + unsigned long flags;
> > +
> > + local_irq_save(flags);
>
> If we may resue current->bio_list for this purpose, disabling irq should have
> been avoided, but looks it is difficult to do in that way, maybe impossible.
> Also not realistic to introduce a new per-task variable.
>
> > + bio_end_queue_ptr = this_cpu_ptr(&bio_end_queue);
> > +
> > + if (*bio_end_queue_ptr) {
> > + **bio_end_queue_ptr = bio;
> > + *bio_end_queue_ptr = &bio->bi_next;
> > + bio->bi_next = NULL;
> > + } else {
> > + bio_queue = NULL;
> > + *bio_end_queue_ptr = &bio_queue;
>
> Suggest to comment that bio_queue is the bottom-most stack variable,
> otherwise looks a bit tricky to understand.
>
> > +
> > +next_bio:
> > + if (bio->bi_end_io == bio_chain_endio) {
> > + struct bio *parent = bio->bi_private;
> > +
> > + bio_put(bio);
> > + chain_bio = parent;
> > + goto out;
> > + }
> > +
> > + if (bio->bi_end_io) {
> > + if (!bio->bi_error)
> > + bio->bi_error = error;
> > + bio->bi_end_io(bio);
> > + }
> > +
> > + if (bio_queue) {
> > + bio = bio_queue;
> > + bio_queue = bio->bi_next;
> > + if (!bio_queue)
> > + *bio_end_queue_ptr = &bio_queue;
> > + goto next_bio;
> > + }
> > + *bio_end_queue_ptr = NULL;
> > + }
> > +
> > +out:
> > +
> > + local_irq_restore(flags);
> > +
> > + return chain_bio;
> > +}
> > +
> > /**
> > * bio_endio - end I/O on a bio
> > * @bio: bio
> > @@ -1762,9 +1815,7 @@ void bio_endio(struct bio *bio)
> > bio_put(bio);
> > bio = parent;
> > } else {
> > - if (bio->bi_end_io)
> > - bio->bi_end_io(bio);
> > - bio = NULL;
> > + bio = unwind_bio_endio(bio);
> > }
> > }
> > }
> > --
> > 1.9.1
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-block" in
> > the body of a message to majordomo at vger.kernel.org
> > More majordomo info at http://vger.kernel.org/majordomo-info.html
>
>
>
More information about the dm-devel
mailing list