From 2ff396be602f10b5eab8e73b24f20348fa2de159 Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Tue, 2 Sep 2014 13:17:00 -0400 Subject: [PATCH 1/2] aio: add missing smp_rmb() in read_events_ring We ran into a case on ppc64 running mariadb where io_getevents would return zeroed out I/O events. After adding instrumentation, it became clear that there was some missing synchronization between reading the tail pointer and the events themselves. This small patch fixes the problem in testing. Thanks to Zach for helping to look into this, and suggesting the fix. Signed-off-by: Jeff Moyer Signed-off-by: Benjamin LaHaise Cc: stable@vger.kernel.org --- fs/aio.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/aio.c b/fs/aio.c index 97bc62cbe2da..5f2e9c6c328e 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1111,6 +1111,12 @@ static long aio_read_events_ring(struct kioctx *ctx, tail = ring->tail; kunmap_atomic(ring); + /* + * Ensure that once we've read the current tail pointer, that + * we also see the events that were stored up to the tail. + */ + smp_rmb(); + pr_debug("h%u t%u m%u\n", head, tail, ctx->nr_events); if (head == tail) From 6098b45b32e6baeacc04790773ced9340601d511 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Wed, 3 Sep 2014 17:45:44 +0800 Subject: [PATCH 2/2] aio: block exit_aio() until all context requests are completed It seems that exit_aio() also needs to wait for all iocbs to complete (like io_destroy), but we missed the wait step in current implemention, so fix it in the same way as we did in io_destroy. Signed-off-by: Gu Zheng Signed-off-by: Benjamin LaHaise Cc: stable@vger.kernel.org --- fs/aio.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/aio.c b/fs/aio.c index 5f2e9c6c328e..733750096b71 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -793,6 +793,8 @@ void exit_aio(struct mm_struct *mm) for (i = 0; i < table->nr; ++i) { struct kioctx *ctx = table->table[i]; + struct completion requests_done = + COMPLETION_INITIALIZER_ONSTACK(requests_done); if (!ctx) continue; @@ -804,7 +806,10 @@ void exit_aio(struct mm_struct *mm) * that it needs to unmap the area, just set it to 0. */ ctx->mmap_size = 0; - kill_ioctx(mm, ctx, NULL); + kill_ioctx(mm, ctx, &requests_done); + + /* Wait until all IO for the context are done. */ + wait_for_completion(&requests_done); } RCU_INIT_POINTER(mm->ioctx_table, NULL);