From 545b2342fda6b787c7894941715a719180f11c0f Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Nov 19 2015 15:59:31 +0000 Subject: import ceph-common-0.80.7-3.el7 --- diff --git a/SOURCES/0010-CephContext-Add-AssociatedSingletonObject-to-allow-C.patch b/SOURCES/0010-CephContext-Add-AssociatedSingletonObject-to-allow-C.patch new file mode 100644 index 0000000..ab2e24a --- /dev/null +++ b/SOURCES/0010-CephContext-Add-AssociatedSingletonObject-to-allow-C.patch @@ -0,0 +1,103 @@ +From d4b0d7fbed1f1f321e05b8c6fe2d392294f49875 Mon Sep 17 00:00:00 2001 +From: Haomai Wang +Date: Mon, 1 Dec 2014 23:54:16 +0800 +Subject: [PATCH 10/22] CephContext: Add AssociatedSingletonObject to allow + CephContext's singleton + +If some objects associated to CephContext want to create a singleton object, +it can inherit AssociatedSingletonObject and implement destruction to get notified. + +Signed-off-by: Haomai Wang +(cherry picked from commit 7fed5dee4f96a83d1d6914f6fc0895bba2d15b99) +(cherry picked from commit 3fea27c7f6b1b1403bce4d7736367975798a8634) +--- + src/common/ceph_context.cc | 6 ++++++ + src/common/ceph_context.h | 20 ++++++++++++++++++++ + 2 files changed, 26 insertions(+) + +diff --git a/src/common/ceph_context.cc b/src/common/ceph_context.cc +index 4ebf79e..77488b6 100644 +--- a/src/common/ceph_context.cc ++++ b/src/common/ceph_context.cc +@@ -265,6 +265,7 @@ CephContext::CephContext(uint32_t module_type_) + _crypto_aes(NULL) + { + ceph_spin_init(&_service_thread_lock); ++ ceph_spin_init(&_associated_objs_lock); + + _log = new ceph::log::Log(&_conf->subsys); + _log->start(); +@@ -298,6 +299,10 @@ CephContext::~CephContext() + { + join_service_thread(); + ++ for (map::iterator it = _associated_objs.begin(); ++ it != _associated_objs.end(); it++) ++ delete it->second; ++ + if (_conf->lockdep) { + lockdep_unregister_ceph_context(this); + } +@@ -335,6 +340,7 @@ CephContext::~CephContext() + + delete _conf; + ceph_spin_destroy(&_service_thread_lock); ++ ceph_spin_destroy(&_associated_objs_lock); + + delete _crypto_none; + delete _crypto_aes; +diff --git a/src/common/ceph_context.h b/src/common/ceph_context.h +index ba60620..e7b8b6c 100644 +--- a/src/common/ceph_context.h ++++ b/src/common/ceph_context.h +@@ -17,6 +17,8 @@ + + #include + #include ++#include ++using namespace std; + + #include "include/buffer.h" + #include "include/atomic.h" +@@ -58,6 +60,10 @@ private: + ~CephContext(); + atomic_t nref; + public: ++ class AssociatedSingletonObject { ++ public: ++ virtual ~AssociatedSingletonObject() {} ++ }; + CephContext *get() { + nref.inc(); + return this; +@@ -102,6 +108,17 @@ public: + void do_command(std::string command, cmdmap_t& cmdmap, std::string format, + bufferlist *out); + ++ template ++ void lookup_or_create_singleton_object(T*& p, const string &name) { ++ ceph_spin_lock(&_associated_objs_lock); ++ if (!_associated_objs.count(name)) { ++ p = new T(this); ++ _associated_objs[name] = reinterpret_cast(p); ++ } else { ++ p = reinterpret_cast(_associated_objs[name]); ++ } ++ ceph_spin_unlock(&_associated_objs_lock); ++ } + /** + * get a crypto handler + */ +@@ -138,6 +155,9 @@ private: + + ceph::HeartbeatMap *_heartbeat_map; + ++ ceph_spinlock_t _associated_objs_lock; ++ map _associated_objs; ++ + // crypto + CryptoNone *_crypto_none; + CryptoAES *_crypto_aes; +-- +2.1.0 + diff --git a/SOURCES/0011-common-ceph_context-don-t-import-std-namespace.patch b/SOURCES/0011-common-ceph_context-don-t-import-std-namespace.patch new file mode 100644 index 0000000..b380e3c --- /dev/null +++ b/SOURCES/0011-common-ceph_context-don-t-import-std-namespace.patch @@ -0,0 +1,47 @@ +From 6484aebec707514360422273b688e443c6f0c3b2 Mon Sep 17 00:00:00 2001 +From: Sage Weil +Date: Fri, 5 Dec 2014 14:21:08 -0800 +Subject: [PATCH 11/22] common/ceph_context: don't import std namespace + +This was broken by 7fed5dee4f96a83d1d6914f6fc0895bba2d15b99 + +Signed-off-by: Sage Weil +(cherry picked from commit 9029813b2784fe71879b1b7684855415b4c7566b) +(cherry picked from commit e391ebf3f471825c7efd84e61824b9e39d0e30c2) +--- + src/common/ceph_context.h | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/src/common/ceph_context.h b/src/common/ceph_context.h +index e7b8b6c..7241c85 100644 +--- a/src/common/ceph_context.h ++++ b/src/common/ceph_context.h +@@ -18,7 +18,6 @@ + #include + #include + #include +-using namespace std; + + #include "include/buffer.h" + #include "include/atomic.h" +@@ -109,7 +108,7 @@ public: + bufferlist *out); + + template +- void lookup_or_create_singleton_object(T*& p, const string &name) { ++ void lookup_or_create_singleton_object(T*& p, const std::string &name) { + ceph_spin_lock(&_associated_objs_lock); + if (!_associated_objs.count(name)) { + p = new T(this); +@@ -156,7 +155,7 @@ private: + ceph::HeartbeatMap *_heartbeat_map; + + ceph_spinlock_t _associated_objs_lock; +- map _associated_objs; ++ std::map _associated_objs; + + // crypto + CryptoNone *_crypto_none; +-- +2.1.0 + diff --git a/SOURCES/0012-WorkQueue-add-new-ContextWQ-work-queue.patch b/SOURCES/0012-WorkQueue-add-new-ContextWQ-work-queue.patch new file mode 100644 index 0000000..6e9405b --- /dev/null +++ b/SOURCES/0012-WorkQueue-add-new-ContextWQ-work-queue.patch @@ -0,0 +1,58 @@ +From 25cc3d73d9a69edd8ae1a67920b48de8a8a6b91f Mon Sep 17 00:00:00 2001 +From: Jason Dillaman +Date: Wed, 8 Apr 2015 16:46:34 -0400 +Subject: [PATCH 12/22] WorkQueue: add new ContextWQ work queue + +The queue holds a collection of Context pointers that will +be completed by the thread pool. + +Signed-off-by: Jason Dillaman +(cherry picked from commit 24a33e977f7b71962adeeb48f75d488a76e70fa9) +(cherry picked from commit f28f18f6aa1930ad2cdbedb5ac7a94aafb49ed2f) +--- + src/common/WorkQueue.h | 31 +++++++++++++++++++++++++++++++ + 1 file changed, 31 insertions(+) + +diff --git a/src/common/WorkQueue.h b/src/common/WorkQueue.h +index cbf49a8..07aea2d 100644 +--- a/src/common/WorkQueue.h ++++ b/src/common/WorkQueue.h +@@ -433,4 +433,35 @@ public: + } + }; + ++class ContextWQ : public ThreadPool::WorkQueueVal { ++public: ++ ContextWQ(const string &name, time_t ti, ThreadPool *tp) ++ : ThreadPool::WorkQueueVal(name, ti, 0, tp) {} ++ ++ void queue(Context *ctx) { ++ ThreadPool::WorkQueueVal::queue(ctx); ++ } ++ ++protected: ++ virtual void _enqueue(Context *item) { ++ _queue.push_back(item); ++ } ++ virtual void _enqueue_front(Context *item) { ++ _queue.push_front(item); ++ } ++ virtual bool _empty() { ++ return _queue.empty(); ++ } ++ virtual Context *_dequeue() { ++ Context *item = _queue.front(); ++ _queue.pop_front(); ++ return item; ++ } ++ virtual void _process(Context *item) { ++ item->complete(0); ++ } ++private: ++ list _queue; ++}; ++ + #endif +-- +2.1.0 + diff --git a/SOURCES/0013-WorkQueue-added-virtual-destructor.patch b/SOURCES/0013-WorkQueue-added-virtual-destructor.patch new file mode 100644 index 0000000..57fdbaa --- /dev/null +++ b/SOURCES/0013-WorkQueue-added-virtual-destructor.patch @@ -0,0 +1,28 @@ +From d1510d9d33fe4091fc8cdf364683fc832e698b30 Mon Sep 17 00:00:00 2001 +From: Jason Dillaman +Date: Mon, 11 May 2015 17:05:49 -0400 +Subject: [PATCH 13/22] WorkQueue: added virtual destructor + +Signed-off-by: Jason Dillaman +(cherry picked from commit b3f5a75332c058816dc39b71e9d2b36e752159f4) +(cherry picked from commit 78afe97c87f3b4edda762a7a3c93ad79c437fc5d) +--- + src/common/WorkQueue.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/common/WorkQueue.h b/src/common/WorkQueue.h +index 07aea2d..53bed77 100644 +--- a/src/common/WorkQueue.h ++++ b/src/common/WorkQueue.h +@@ -330,7 +330,7 @@ private: + + public: + ThreadPool(CephContext *cct_, string nm, int n, const char *option = NULL); +- ~ThreadPool(); ++ virtual ~ThreadPool(); + + /// return number of threads currently running + int get_num_threads() { +-- +2.1.0 + diff --git a/SOURCES/0014-librbd-add-task-pool-work-queue-for-AIO-requests.patch b/SOURCES/0014-librbd-add-task-pool-work-queue-for-AIO-requests.patch new file mode 100644 index 0000000..91609c5 --- /dev/null +++ b/SOURCES/0014-librbd-add-task-pool-work-queue-for-AIO-requests.patch @@ -0,0 +1,148 @@ +From 5f7f6c3c6aa3640162e5dc91d4e648b2c17e74d2 Mon Sep 17 00:00:00 2001 +From: Jason Dillaman +Date: Wed, 8 Apr 2015 17:24:08 -0400 +Subject: [PATCH 14/22] librbd: add task pool / work queue for AIO requests + +Signed-off-by: Jason Dillaman +(cherry picked from commit afb896d91f886b647baf38f7ec94cc3739f6d2a9) +(cherry picked from commit 9faaeae4c181944636b4119c0a797181ded0651f) + +Conflicts: + src/librbd/ImageCtx.cc: removed refs to pending AIO + src/librbd/ImageCtx.h: removed refs to pending AIO + src/librbd/internal.cc: removed refs to pending AIO +--- + src/common/config_opts.h | 2 ++ + src/librbd/ImageCtx.cc | 30 +++++++++++++++++++++++++++++- + src/librbd/ImageCtx.h | 3 +++ + src/librbd/internal.cc | 4 ++++ + 4 files changed, 38 insertions(+), 1 deletion(-) + +diff --git a/src/common/config_opts.h b/src/common/config_opts.h +index da9bcdf..f024011 100644 +--- a/src/common/config_opts.h ++++ b/src/common/config_opts.h +@@ -719,6 +719,8 @@ OPTION(journal_ignore_corruption, OPT_BOOL, false) // assume journal is not corr + OPTION(rados_mon_op_timeout, OPT_DOUBLE, 0) // how many seconds to wait for a response from the monitor before returning an error from a rados operation. 0 means on limit. + OPTION(rados_osd_op_timeout, OPT_DOUBLE, 0) // how many seconds to wait for a response from osds before returning an error from a rados operation. 0 means no limit. + ++OPTION(rbd_op_threads, OPT_INT, 1) ++OPTION(rbd_op_thread_timeout, OPT_INT, 60) + OPTION(rbd_cache, OPT_BOOL, false) // whether to enable caching (writeback unless rbd_cache_max_dirty is 0) + OPTION(rbd_cache_writethrough_until_flush, OPT_BOOL, false) // whether to make writeback caching writethrough until flush is called, to be sure the user of librbd will send flushs so that writeback is safe + OPTION(rbd_cache_size, OPT_LONGLONG, 32<<20) // cache size in bytes +diff --git a/src/librbd/ImageCtx.cc b/src/librbd/ImageCtx.cc +index b5c2db6..4850b9a 100644 +--- a/src/librbd/ImageCtx.cc ++++ b/src/librbd/ImageCtx.cc +@@ -6,6 +6,7 @@ + #include "common/dout.h" + #include "common/errno.h" + #include "common/perf_counters.h" ++#include "common/WorkQueue.h" + + #include "librbd/internal.h" + #include "librbd/WatchCtx.h" +@@ -27,6 +28,23 @@ using librados::snap_t; + using librados::IoCtx; + + namespace librbd { ++ ++namespace { ++ ++class ThreadPoolSingleton : public ThreadPool { ++public: ++ ThreadPoolSingleton(CephContext *cct) ++ : ThreadPool(cct, "librbd::thread_pool", cct->_conf->rbd_op_threads, ++ "rbd_op_threads") { ++ start(); ++ } ++ virtual ~ThreadPoolSingleton() { ++ stop(); ++ } ++}; ++ ++} // anonymous namespace ++ + ImageCtx::ImageCtx(const string &image_name, const string &image_id, + const char *snap, IoCtx& p, bool ro) + : cct((CephContext*)p.cct()), +@@ -51,7 +69,8 @@ namespace librbd { + format_string(NULL), + id(image_id), parent(NULL), + stripe_unit(0), stripe_count(0), +- object_cacher(NULL), writeback_handler(NULL), object_set(NULL) ++ object_cacher(NULL), writeback_handler(NULL), object_set(NULL), ++ aio_work_queue(NULL) + { + md_ctx.dup(p); + data_ctx.dup(p); +@@ -96,6 +115,13 @@ namespace librbd { + object_set->return_enoent = true; + object_cacher->start(); + } ++ ++ ThreadPoolSingleton *thread_pool_singleton; ++ cct->lookup_or_create_singleton_object( ++ thread_pool_singleton, "librbd::thread_pool"); ++ aio_work_queue = new ContextWQ("librbd::aio_work_queue", ++ cct->_conf->rbd_op_thread_timeout, ++ thread_pool_singleton); + } + + ImageCtx::~ImageCtx() { +@@ -113,6 +139,8 @@ namespace librbd { + object_set = NULL; + } + delete[] format_string; ++ ++ delete aio_work_queue; + } + + int ImageCtx::init() { +diff --git a/src/librbd/ImageCtx.h b/src/librbd/ImageCtx.h +index 83ed044..218527a 100644 +--- a/src/librbd/ImageCtx.h ++++ b/src/librbd/ImageCtx.h +@@ -25,6 +25,7 @@ + #include "librbd/parent_types.h" + + class CephContext; ++class ContextWQ; + class PerfCounters; + + namespace librbd { +@@ -89,6 +90,8 @@ namespace librbd { + LibrbdWriteback *writeback_handler; + ObjectCacher::ObjectSet *object_set; + ++ ContextWQ *aio_work_queue; ++ + /** + * Either image_name or image_id must be set. + * If id is not known, pass the empty std::string, +diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc +index afa4660..b0c8ebc 100644 +--- a/src/librbd/internal.cc ++++ b/src/librbd/internal.cc +@@ -9,6 +9,7 @@ + #include "common/dout.h" + #include "common/errno.h" + #include "common/Throttle.h" ++#include "common/WorkQueue.h" + #include "cls/lock/cls_lock_client.h" + #include "include/stringify.h" + +@@ -2118,6 +2119,9 @@ reprotect_and_return_err: + void close_image(ImageCtx *ictx) + { + ldout(ictx->cct, 20) << "close_image " << ictx << dendl; ++ ++ ictx->aio_work_queue->drain(); ++ + if (ictx->object_cacher) + ictx->shutdown_cache(); // implicitly flushes + else +-- +2.1.0 + diff --git a/SOURCES/0015-librbd-avoid-blocking-AIO-API-methods.patch b/SOURCES/0015-librbd-avoid-blocking-AIO-API-methods.patch new file mode 100644 index 0000000..87eb9f8 --- /dev/null +++ b/SOURCES/0015-librbd-avoid-blocking-AIO-API-methods.patch @@ -0,0 +1,204 @@ +From 6122f6c80df874f68ec6d8e2e0098cf703cc531e Mon Sep 17 00:00:00 2001 +From: Jason Dillaman +Date: Wed, 8 Apr 2015 19:06:52 -0400 +Subject: [PATCH 15/22] librbd: avoid blocking AIO API methods + +Enqueue all AIO API methods within the new librbd thread pool to +reduce the possibility of any blocking operations. To maintain +backwards compatibility with the legacy return codes of the API's +AIO methods, it's still possible to block attempting to acquire +the snap_lock. + +Fixes: #11056 +Signed-off-by: Jason Dillaman +(cherry picked from commit 3a7b5e30efdb21aa1a0aeb68a5d02a1ac2a5faf3) +(cherry picked from commit aa45ee0d7d6f9d8e2cc43ef43f0a9762977ca53f) +--- + src/librbd/librbd.cc | 110 +++++++++++++++++++++++++++++++++++++++++++++------ + 1 file changed, 99 insertions(+), 11 deletions(-) + +diff --git a/src/librbd/librbd.cc b/src/librbd/librbd.cc +index 658f24b..7f966be 100644 +--- a/src/librbd/librbd.cc ++++ b/src/librbd/librbd.cc +@@ -20,6 +20,7 @@ + #include "common/errno.h" + #include "common/snap_types.h" + #include "common/perf_counters.h" ++#include "common/WorkQueue.h" + #include "include/Context.h" + #include "include/rbd/librbd.hpp" + #include "osdc/ObjectCacher.h" +@@ -45,6 +46,82 @@ using ceph::bufferlist; + using librados::snap_t; + using librados::IoCtx; + ++namespace { ++ ++class C_AioReadWQ : public Context { ++public: ++ C_AioReadWQ(librbd::ImageCtx *ictx, uint64_t off, size_t len, ++ char *buf, bufferlist *pbl, librbd::AioCompletion *c) ++ : m_ictx(ictx), m_off(off), m_len(len), m_buf(buf), m_pbl(pbl), m_comp(c) { ++ } ++protected: ++ virtual void finish(int r) { ++ librbd::aio_read(m_ictx, m_off, m_len, m_buf, m_pbl, m_comp); ++ } ++private: ++ librbd::ImageCtx *m_ictx; ++ uint64_t m_off; ++ uint64_t m_len; ++ char *m_buf; ++ bufferlist *m_pbl; ++ librbd::AioCompletion *m_comp; ++}; ++ ++class C_AioWriteWQ : public Context { ++public: ++ C_AioWriteWQ(librbd::ImageCtx *ictx, uint64_t off, size_t len, ++ const char *buf, librbd::AioCompletion *c) ++ : m_ictx(ictx), m_off(off), m_len(len), m_buf(buf), m_comp(c) { ++ } ++protected: ++ virtual void finish(int r) { ++ librbd::aio_write(m_ictx, m_off, m_len, m_buf, m_comp); ++ } ++private: ++ librbd::ImageCtx *m_ictx; ++ uint64_t m_off; ++ uint64_t m_len; ++ const char *m_buf; ++ librbd::AioCompletion *m_comp; ++}; ++ ++class C_AioDiscardWQ : public Context { ++public: ++ C_AioDiscardWQ(librbd::ImageCtx *ictx, uint64_t off, uint64_t len, ++ librbd::AioCompletion *c) ++ : m_ictx(ictx), m_off(off), m_len(len), m_comp(c) { ++ } ++protected: ++ virtual void finish(int r) { ++ librbd::aio_discard(m_ictx, m_off, m_len, m_comp); ++ } ++private: ++ librbd::ImageCtx *m_ictx; ++ uint64_t m_off; ++ uint64_t m_len; ++ librbd::AioCompletion *m_comp; ++}; ++ ++class C_AioFlushWQ : public Context { ++public: ++ C_AioFlushWQ(librbd::ImageCtx *ictx, librbd::AioCompletion *c) ++ : m_ictx(ictx), m_comp(c) { ++ } ++protected: ++ virtual void finish(int r) { ++ librbd::aio_flush(m_ictx, m_comp); ++ } ++private: ++ librbd::ImageCtx *m_ictx; ++ librbd::AioCompletion *m_comp; ++}; ++ ++librbd::AioCompletion* get_aio_completion(librbd::RBD::AioCompletion *comp) { ++ return reinterpret_cast(comp->pc); ++} ++ ++} // anonymous namespace ++ + namespace librbd { + ProgressContext::~ProgressContext() + { +@@ -483,14 +560,17 @@ namespace librbd { + ImageCtx *ictx = (ImageCtx *)ctx; + if (bl.length() < len) + return -EINVAL; +- return librbd::aio_write(ictx, off, len, bl.c_str(), +- (librbd::AioCompletion *)c->pc); ++ ictx->aio_work_queue->queue(new C_AioWriteWQ(ictx, off, len, bl.c_str(), ++ get_aio_completion(c))); ++ return 0; + } + + int Image::aio_discard(uint64_t off, uint64_t len, RBD::AioCompletion *c) + { + ImageCtx *ictx = (ImageCtx *)ctx; +- return librbd::aio_discard(ictx, off, len, (librbd::AioCompletion *)c->pc); ++ ictx->aio_work_queue->queue(new C_AioDiscardWQ(ictx, off, len, ++ get_aio_completion(c))); ++ return 0; + } + + int Image::aio_read(uint64_t off, size_t len, bufferlist& bl, +@@ -499,7 +579,9 @@ namespace librbd { + ImageCtx *ictx = (ImageCtx *)ctx; + ldout(ictx->cct, 10) << "Image::aio_read() buf=" << (void *)bl.c_str() << "~" + << (void *)(bl.c_str() + len - 1) << dendl; +- return librbd::aio_read(ictx, off, len, NULL, &bl, (librbd::AioCompletion *)c->pc); ++ ictx->aio_work_queue->queue(new C_AioReadWQ(ictx, off, len, NULL, &bl, ++ get_aio_completion(c))); ++ return 0; + } + + int Image::flush() +@@ -511,7 +593,8 @@ namespace librbd { + int Image::aio_flush(RBD::AioCompletion *c) + { + ImageCtx *ictx = (ImageCtx *)ctx; +- return librbd::aio_flush(ictx, (librbd::AioCompletion *)c->pc); ++ ictx->aio_work_queue->queue(new C_AioFlushWQ(ictx, get_aio_completion(c))); ++ return 0; + } + + int Image::invalidate_cache() +@@ -1102,8 +1185,9 @@ extern "C" int rbd_aio_write(rbd_image_t image, uint64_t off, size_t len, + { + librbd::ImageCtx *ictx = (librbd::ImageCtx *)image; + librbd::RBD::AioCompletion *comp = (librbd::RBD::AioCompletion *)c; +- return librbd::aio_write(ictx, off, len, buf, +- (librbd::AioCompletion *)comp->pc); ++ ictx->aio_work_queue->queue(new C_AioWriteWQ(ictx, off, len, buf, ++ get_aio_completion(comp))); ++ return 0; + } + + extern "C" int rbd_aio_discard(rbd_image_t image, uint64_t off, uint64_t len, +@@ -1111,7 +1195,9 @@ extern "C" int rbd_aio_discard(rbd_image_t image, uint64_t off, uint64_t len, + { + librbd::ImageCtx *ictx = (librbd::ImageCtx *)image; + librbd::RBD::AioCompletion *comp = (librbd::RBD::AioCompletion *)c; +- return librbd::aio_discard(ictx, off, len, (librbd::AioCompletion *)comp->pc); ++ ictx->aio_work_queue->queue(new C_AioDiscardWQ(ictx, off, len, ++ get_aio_completion(comp))); ++ return 0; + } + + extern "C" int rbd_aio_read(rbd_image_t image, uint64_t off, size_t len, +@@ -1119,8 +1205,9 @@ extern "C" int rbd_aio_read(rbd_image_t image, uint64_t off, size_t len, + { + librbd::ImageCtx *ictx = (librbd::ImageCtx *)image; + librbd::RBD::AioCompletion *comp = (librbd::RBD::AioCompletion *)c; +- return librbd::aio_read(ictx, off, len, buf, NULL, +- (librbd::AioCompletion *)comp->pc); ++ ictx->aio_work_queue->queue(new C_AioReadWQ(ictx, off, len, buf, NULL, ++ get_aio_completion(comp))); ++ return 0; + } + + extern "C" int rbd_flush(rbd_image_t image) +@@ -1133,7 +1220,8 @@ extern "C" int rbd_aio_flush(rbd_image_t image, rbd_completion_t c) + { + librbd::ImageCtx *ictx = (librbd::ImageCtx *)image; + librbd::RBD::AioCompletion *comp = (librbd::RBD::AioCompletion *)c; +- return librbd::aio_flush(ictx, (librbd::AioCompletion *)comp->pc); ++ ictx->aio_work_queue->queue(new C_AioFlushWQ(ictx, get_aio_completion(comp))); ++ return 0; + } + + extern "C" int rbd_invalidate_cache(rbd_image_t image) +-- +2.1.0 + diff --git a/SOURCES/0016-librbd-add-new-fail-method-to-AioCompletion.patch b/SOURCES/0016-librbd-add-new-fail-method-to-AioCompletion.patch new file mode 100644 index 0000000..63a2ea9 --- /dev/null +++ b/SOURCES/0016-librbd-add-new-fail-method-to-AioCompletion.patch @@ -0,0 +1,132 @@ +From 1560268428ba1e0137b65012c9e740dbc6e7bc8f Mon Sep 17 00:00:00 2001 +From: Jason Dillaman +Date: Wed, 8 Apr 2015 20:18:50 -0400 +Subject: [PATCH 16/22] librbd: add new fail method to AioCompletion + +Helper method to handle passing fatal errors generated within +librbd (not from the OSDs) back to the client. + +Signed-off-by: Jason Dillaman +(cherry picked from commit 6d1d0c867855a96bee4c13a0c0a39a0e002ccd12) +(cherry picked from commit a8a1d2c947bf7b345c49864e8bd569bdcf39da72) + +Conflicts: + src/librbd/AioCompletion.cc: removed refs to pending AIO + src/librbd/AioCompletion.h: removed refs to pending AIO +--- + src/librbd/AioCompletion.cc | 41 +++++++++++++++++++++++++++++++++++++++-- + src/librbd/AioCompletion.h | 25 ++----------------------- + 2 files changed, 41 insertions(+), 25 deletions(-) + +diff --git a/src/librbd/AioCompletion.cc b/src/librbd/AioCompletion.cc +index 86b5b50..531f151 100644 +--- a/src/librbd/AioCompletion.cc ++++ b/src/librbd/AioCompletion.cc +@@ -5,6 +5,7 @@ + + #include "common/ceph_context.h" + #include "common/dout.h" ++#include "common/errno.h" + + #include "librbd/AioRequest.h" + #include "librbd/internal.h" +@@ -25,7 +26,7 @@ namespace librbd { + building = false; + if (!pending_count) { + finalize(cct, rval); +- complete(); ++ complete(cct); + } + lock.Unlock(); + } +@@ -54,6 +55,42 @@ namespace librbd { + } + } + ++ void AioCompletion::complete(CephContext *cct) { ++ utime_t elapsed; ++ assert(lock.is_locked()); ++ elapsed = ceph_clock_now(cct) - start_time; ++ switch (aio_type) { ++ case AIO_TYPE_READ: ++ ictx->perfcounter->tinc(l_librbd_aio_rd_latency, elapsed); break; ++ case AIO_TYPE_WRITE: ++ ictx->perfcounter->tinc(l_librbd_aio_wr_latency, elapsed); break; ++ case AIO_TYPE_DISCARD: ++ ictx->perfcounter->tinc(l_librbd_aio_discard_latency, elapsed); break; ++ case AIO_TYPE_FLUSH: ++ ictx->perfcounter->tinc(l_librbd_aio_flush_latency, elapsed); break; ++ default: ++ lderr(cct) << "completed invalid aio_type: " << aio_type << dendl; ++ break; ++ } ++ ++ if (complete_cb) { ++ complete_cb(rbd_comp, complete_arg); ++ } ++ done = true; ++ cond.Signal(); ++ } ++ ++ void AioCompletion::fail(CephContext *cct, int r) ++ { ++ lderr(cct) << "AioCompletion::fail() " << this << ": " << cpp_strerror(r) ++ << dendl; ++ lock.Lock(); ++ assert(pending_count == 0); ++ rval = r; ++ complete(cct); ++ put_unlock(); ++ } ++ + void AioCompletion::complete_request(CephContext *cct, ssize_t r) + { + ldout(cct, 20) << "AioCompletion::complete_request() " +@@ -70,7 +107,7 @@ namespace librbd { + int count = --pending_count; + if (!count && !building) { + finalize(cct, rval); +- complete(); ++ complete(cct); + } + put_unlock(); + } +diff --git a/src/librbd/AioCompletion.h b/src/librbd/AioCompletion.h +index aaccefe..82d3442 100644 +--- a/src/librbd/AioCompletion.h ++++ b/src/librbd/AioCompletion.h +@@ -97,29 +97,8 @@ namespace librbd { + start_time = ceph_clock_now(ictx->cct); + } + +- void complete() { +- utime_t elapsed; +- assert(lock.is_locked()); +- elapsed = ceph_clock_now(ictx->cct) - start_time; +- switch (aio_type) { +- case AIO_TYPE_READ: +- ictx->perfcounter->tinc(l_librbd_aio_rd_latency, elapsed); break; +- case AIO_TYPE_WRITE: +- ictx->perfcounter->tinc(l_librbd_aio_wr_latency, elapsed); break; +- case AIO_TYPE_DISCARD: +- ictx->perfcounter->tinc(l_librbd_aio_discard_latency, elapsed); break; +- case AIO_TYPE_FLUSH: +- ictx->perfcounter->tinc(l_librbd_aio_flush_latency, elapsed); break; +- default: +- lderr(ictx->cct) << "completed invalid aio_type: " << aio_type << dendl; +- break; +- } +- if (complete_cb) { +- complete_cb(rbd_comp, complete_arg); +- } +- done = true; +- cond.Signal(); +- } ++ void complete(CephContext *cct); ++ void fail(CephContext *cct, int r); + + void set_complete_cb(void *cb_arg, callback_t cb) { + complete_cb = cb; +-- +2.1.0 + diff --git a/SOURCES/0017-Throttle-added-pending_error-method-to-SimpleThrottl.patch b/SOURCES/0017-Throttle-added-pending_error-method-to-SimpleThrottl.patch new file mode 100644 index 0000000..4b235ce --- /dev/null +++ b/SOURCES/0017-Throttle-added-pending_error-method-to-SimpleThrottl.patch @@ -0,0 +1,52 @@ +From 91faeff9492b3b4d014863b424329f138efdb185 Mon Sep 17 00:00:00 2001 +From: Jason Dillaman +Date: Wed, 8 Apr 2015 21:48:21 -0400 +Subject: [PATCH 17/22] Throttle: added pending_error method to SimpleThrottle + +Allow the client of SimpleThrottle to detect an async error +so that it can exit early. + +Signed-off-by: Jason Dillaman +(cherry picked from commit b88b88c5df91325fb713c2031a56bffe421268e0) +(cherry picked from commit c2ea7e85dcbca60e6ae09d4ef99d02834d02b2ed) +--- + src/common/Throttle.cc | 6 ++++++ + src/common/Throttle.h | 3 ++- + 2 files changed, 8 insertions(+), 1 deletion(-) + +diff --git a/src/common/Throttle.cc b/src/common/Throttle.cc +index 026d731..5c68a1f 100644 +--- a/src/common/Throttle.cc ++++ b/src/common/Throttle.cc +@@ -267,6 +267,12 @@ void SimpleThrottle::end_op(int r) + m_cond.Signal(); + } + ++bool SimpleThrottle::pending_error() const ++{ ++ Mutex::Locker l(m_lock); ++ return (m_ret < 0); ++} ++ + int SimpleThrottle::wait_for_ret() + { + Mutex::Locker l(m_lock); +diff --git a/src/common/Throttle.h b/src/common/Throttle.h +index 6d03988..b171e27 100644 +--- a/src/common/Throttle.h ++++ b/src/common/Throttle.h +@@ -76,9 +76,10 @@ public: + ~SimpleThrottle(); + void start_op(); + void end_op(int r); ++ bool pending_error() const; + int wait_for_ret(); + private: +- Mutex m_lock; ++ mutable Mutex m_lock; + Cond m_cond; + uint64_t m_max; + uint64_t m_current; +-- +2.1.0 + diff --git a/SOURCES/0018-librbd-internal-AIO-methods-no-longer-return-result.patch b/SOURCES/0018-librbd-internal-AIO-methods-no-longer-return-result.patch new file mode 100644 index 0000000..d5dad23 --- /dev/null +++ b/SOURCES/0018-librbd-internal-AIO-methods-no-longer-return-result.patch @@ -0,0 +1,417 @@ +From df42b802cfb5df6cea9015abcc3ea91a5f903009 Mon Sep 17 00:00:00 2001 +From: Jason Dillaman +Date: Wed, 8 Apr 2015 21:37:50 -0400 +Subject: [PATCH 18/22] librbd: internal AIO methods no longer return result + +All failures should be returned via the AioCompletion. + +Signed-off-by: Jason Dillaman +(cherry picked from commit 9ab42d613128ab08c688ddbea93df4c95068b9cd) +(cherry picked from commit 04eea0add8bc5501b125bb8d5e716d70abcf9dcc) +--- + src/librbd/internal.cc | 167 ++++++++++++++++++++----------------------------- + src/librbd/internal.h | 17 ++--- + 2 files changed, 76 insertions(+), 108 deletions(-) + +diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc +index b0c8ebc..477d936 100644 +--- a/src/librbd/internal.cc ++++ b/src/librbd/internal.cc +@@ -1982,13 +1982,7 @@ reprotect_and_return_err: + + Context *ctx = new C_CopyWrite(m_throttle, m_bl); + AioCompletion *comp = aio_create_completion_internal(ctx, rbd_ctx_cb); +- r = aio_write(m_dest, m_offset, m_bl->length(), m_bl->c_str(), comp); +- if (r < 0) { +- ctx->complete(r); +- comp->release(); +- lderr(m_dest->cct) << "error writing to destination image at offset " +- << m_offset << ": " << cpp_strerror(r) << dendl; +- } ++ aio_write(m_dest, m_offset, m_bl->length(), m_bl->c_str(), comp); + } + private: + SimpleThrottle *m_throttle; +@@ -2021,20 +2015,15 @@ reprotect_and_return_err: + SimpleThrottle throttle(cct->_conf->rbd_concurrent_management_ops, false); + uint64_t period = src->get_stripe_period(); + for (uint64_t offset = 0; offset < src_size; offset += period) { ++ if (throttle.pending_error()) { ++ return throttle.wait_for_ret(); ++ } ++ + uint64_t len = min(period, src_size - offset); + bufferlist *bl = new bufferlist(); + Context *ctx = new C_CopyRead(&throttle, dest, offset, bl); + AioCompletion *comp = aio_create_completion_internal(ctx, rbd_ctx_cb); +- r = aio_read(src, offset, len, NULL, bl, comp); +- if (r < 0) { +- ctx->complete(r); +- comp->release(); +- throttle.wait_for_ret(); +- lderr(cct) << "could not read from source image from " +- << offset << " to " << offset + len << ": " +- << cpp_strerror(r) << dendl; +- return r; +- } ++ aio_read(src, offset, len, NULL, bl, comp); + prog_ctx.update_progress(offset, src_size); + } + +@@ -2408,12 +2397,7 @@ reprotect_and_return_err: + + Context *ctx = new C_SafeCond(&mylock, &cond, &done, &ret); + AioCompletion *c = aio_create_completion_internal(ctx, rbd_ctx_cb); +- r = aio_read(ictx, off, read_len, NULL, &bl, c); +- if (r < 0) { +- c->release(); +- delete ctx; +- return r; +- } ++ aio_read(ictx, off, read_len, NULL, &bl, c); + + mylock.Lock(); + while (!done) +@@ -2643,12 +2627,7 @@ reprotect_and_return_err: + + Context *ctx = new C_SafeCond(&mylock, &cond, &done, &ret); + AioCompletion *c = aio_create_completion_internal(ctx, rbd_ctx_cb); +- int r = aio_read(ictx, image_extents, buf, pbl, c); +- if (r < 0) { +- c->release(); +- delete ctx; +- return r; +- } ++ aio_read(ictx, image_extents, buf, pbl, c); + + mylock.Lock(); + while (!done) +@@ -2677,12 +2656,7 @@ reprotect_and_return_err: + + Context *ctx = new C_SafeCond(&mylock, &cond, &done, &ret); + AioCompletion *c = aio_create_completion_internal(ctx, rbd_ctx_cb); +- r = aio_write(ictx, off, mylen, buf, c); +- if (r < 0) { +- c->release(); +- delete ctx; +- return r; +- } ++ aio_write(ictx, off, mylen, buf, c); + + mylock.Lock(); + while (!done) +@@ -2713,12 +2687,7 @@ reprotect_and_return_err: + + Context *ctx = new C_SafeCond(&mylock, &cond, &done, &ret); + AioCompletion *c = aio_create_completion_internal(ctx, rbd_ctx_cb); +- int r = aio_discard(ictx, off, len, c); +- if (r < 0) { +- c->release(); +- delete ctx; +- return r; +- } ++ aio_discard(ictx, off, len, c); + + mylock.Lock(); + while (!done) +@@ -2836,18 +2805,20 @@ reprotect_and_return_err: + return 0; + } + +- int aio_flush(ImageCtx *ictx, AioCompletion *c) ++ void aio_flush(ImageCtx *ictx, AioCompletion *c) + { + CephContext *cct = ictx->cct; + ldout(cct, 20) << "aio_flush " << ictx << " completion " << c << dendl; + ++ c->get(); + int r = ictx_check(ictx); +- if (r < 0) +- return r; ++ if (r < 0) { ++ c->fail(cct, r); ++ return; ++ } + + ictx->user_flushed(); + +- c->get(); + c->add_request(); + c->init_time(ictx, AIO_TYPE_FLUSH); + C_AioWrite *req_comp = new C_AioWrite(cct, c); +@@ -2862,8 +2833,6 @@ reprotect_and_return_err: + c->finish_adding_requests(cct); + c->put(); + ictx->perfcounter->inc(l_librbd_aio_flush); +- +- return 0; + } + + int flush(ImageCtx *ictx) +@@ -2911,21 +2880,26 @@ reprotect_and_return_err: + return ictx->invalidate_cache(); + } + +- int aio_write(ImageCtx *ictx, uint64_t off, size_t len, const char *buf, +- AioCompletion *c) ++ void aio_write(ImageCtx *ictx, uint64_t off, size_t len, const char *buf, ++ AioCompletion *c) + { + CephContext *cct = ictx->cct; + ldout(cct, 20) << "aio_write " << ictx << " off = " << off << " len = " + << len << " buf = " << (void*)buf << dendl; + ++ c->get(); + int r = ictx_check(ictx); +- if (r < 0) +- return r; ++ if (r < 0) { ++ c->fail(cct, r); ++ return; ++ } + + uint64_t mylen = len; + r = clip_io(ictx, off, &mylen); +- if (r < 0) +- return r; ++ if (r < 0) { ++ c->fail(cct, r); ++ return; ++ } + + ictx->snap_lock.get_read(); + snapid_t snap_id = ictx->snap_id; +@@ -2936,8 +2910,10 @@ reprotect_and_return_err: + ictx->parent_lock.put_read(); + ictx->snap_lock.put_read(); + +- if (snap_id != CEPH_NOSNAP || ictx->read_only) +- return -EROFS; ++ if (snap_id != CEPH_NOSNAP || ictx->read_only) { ++ c->fail(cct, -EROFS); ++ return; ++ } + + ldout(cct, 20) << " parent overlap " << overlap << dendl; + +@@ -2948,7 +2924,6 @@ reprotect_and_return_err: + &ictx->layout, off, mylen, 0, extents); + } + +- c->get(); + c->init_time(ictx, AIO_TYPE_WRITE); + for (vector::iterator p = extents.begin(); p != extents.end(); ++p) { + ldout(cct, 20) << " oid " << p->oid << " " << p->offset << "~" << p->length +@@ -2978,34 +2953,35 @@ reprotect_and_return_err: + bl, snapc, snap_id, req_comp); + c->add_request(); + r = req->send(); +- if (r < 0) +- goto done; ++ assert(r == 0); + } + } +- done: ++ + c->finish_adding_requests(ictx->cct); + c->put(); + + ictx->perfcounter->inc(l_librbd_aio_wr); + ictx->perfcounter->inc(l_librbd_aio_wr_bytes, mylen); +- +- /* FIXME: cleanup all the allocated stuff */ +- return r; + } + +- int aio_discard(ImageCtx *ictx, uint64_t off, uint64_t len, AioCompletion *c) ++ void aio_discard(ImageCtx *ictx, uint64_t off, uint64_t len, AioCompletion *c) + { + CephContext *cct = ictx->cct; + ldout(cct, 20) << "aio_discard " << ictx << " off = " << off << " len = " + << len << dendl; + ++ c->get(); + int r = ictx_check(ictx); +- if (r < 0) +- return r; ++ if (r < 0) { ++ c->fail(cct, r); ++ return; ++ } + + r = clip_io(ictx, off, &len); +- if (r < 0) +- return r; ++ if (r < 0) { ++ c->fail(cct, r); ++ return; ++ } + + // TODO: check for snap + ictx->snap_lock.get_read(); +@@ -3017,8 +2993,10 @@ reprotect_and_return_err: + ictx->parent_lock.put_read(); + ictx->snap_lock.put_read(); + +- if (snap_id != CEPH_NOSNAP || ictx->read_only) +- return -EROFS; ++ if (snap_id != CEPH_NOSNAP || ictx->read_only) { ++ c->fail(cct, -EROFS); ++ return; ++ } + + // map + vector extents; +@@ -3027,7 +3005,6 @@ reprotect_and_return_err: + &ictx->layout, off, len, 0, extents); + } + +- c->get(); + c->init_time(ictx, AIO_TYPE_DISCARD); + for (vector::iterator p = extents.begin(); p != extents.end(); ++p) { + ldout(cct, 20) << " oid " << p->oid << " " << p->offset << "~" << p->length +@@ -3059,11 +3036,9 @@ reprotect_and_return_err: + } + + r = req->send(); +- if (r < 0) +- goto done; ++ assert(r == 0); + } +- r = 0; +- done: ++ + if (ictx->object_cacher) { + Mutex::Locker l(ictx->cache_lock); + ictx->object_cacher->discard_set(ictx->object_set, extents); +@@ -3074,9 +3049,6 @@ reprotect_and_return_err: + + ictx->perfcounter->inc(l_librbd_aio_discard); + ictx->perfcounter->inc(l_librbd_aio_discard_bytes, len); +- +- /* FIXME: cleanup all the allocated stuff */ +- return r; + } + + void rbd_req_cb(completion_t cb, void *arg) +@@ -3086,23 +3058,27 @@ reprotect_and_return_err: + req->complete(comp->get_return_value()); + } + +- int aio_read(ImageCtx *ictx, uint64_t off, size_t len, ++ void aio_read(ImageCtx *ictx, uint64_t off, size_t len, + char *buf, bufferlist *bl, + AioCompletion *c) + { + vector > image_extents(1); + image_extents[0] = make_pair(off, len); +- return aio_read(ictx, image_extents, buf, bl, c); ++ aio_read(ictx, image_extents, buf, bl, c); + } + +- int aio_read(ImageCtx *ictx, const vector >& image_extents, +- char *buf, bufferlist *pbl, AioCompletion *c) ++ void aio_read(ImageCtx *ictx, const vector >& image_extents, ++ char *buf, bufferlist *pbl, AioCompletion *c) + { +- ldout(ictx->cct, 20) << "aio_read " << ictx << " completion " << c << " " << image_extents << dendl; ++ CephContext *cct = ictx->cct; ++ ldout(cct, 20) << "aio_read " << ictx << " completion " << c << " " << image_extents << dendl; + ++ c->get(); + int r = ictx_check(ictx); +- if (r < 0) +- return r; ++ if (r < 0) { ++ c->fail(cct, r); ++ return; ++ } + + ictx->snap_lock.get_read(); + snap_t snap_id = ictx->snap_id; +@@ -3117,8 +3093,10 @@ reprotect_and_return_err: + ++p) { + uint64_t len = p->second; + r = clip_io(ictx, p->first, &len); +- if (r < 0) +- return r; ++ if (r < 0) { ++ c->fail(cct, r); ++ return; ++ } + if (len == 0) + continue; + +@@ -3127,13 +3105,10 @@ reprotect_and_return_err: + buffer_ofs += len; + } + +- int64_t ret; +- + c->read_buf = buf; + c->read_buf_len = buffer_ofs; + c->read_bl = pbl; + +- c->get(); + c->init_time(ictx, AIO_TYPE_READ); + for (map >::iterator p = object_extents.begin(); p != object_extents.end(); ++p) { + for (vector::iterator q = p->second.begin(); q != p->second.end(); ++q) { +@@ -3155,24 +3130,16 @@ reprotect_and_return_err: + cache_comp); + } else { + r = req->send(); +- if (r < 0 && r == -ENOENT) +- r = 0; +- if (r < 0) { +- ret = r; +- goto done; +- } ++ assert(r == 0); + } + } + } +- ret = buffer_ofs; +- done: +- c->finish_adding_requests(ictx->cct); ++ ++ c->finish_adding_requests(cct); + c->put(); + + ictx->perfcounter->inc(l_librbd_aio_rd); + ictx->perfcounter->inc(l_librbd_aio_rd_bytes, buffer_ofs); +- +- return ret; + } + + AioCompletion *aio_create_completion() { +diff --git a/src/librbd/internal.h b/src/librbd/internal.h +index 1e9fd9a..7712a39 100644 +--- a/src/librbd/internal.h ++++ b/src/librbd/internal.h +@@ -179,14 +179,15 @@ namespace librbd { + char *buf, bufferlist *pbl); + ssize_t write(ImageCtx *ictx, uint64_t off, size_t len, const char *buf); + int discard(ImageCtx *ictx, uint64_t off, uint64_t len); +- int aio_write(ImageCtx *ictx, uint64_t off, size_t len, const char *buf, +- AioCompletion *c); +- int aio_discard(ImageCtx *ictx, uint64_t off, uint64_t len, AioCompletion *c); +- int aio_read(ImageCtx *ictx, uint64_t off, size_t len, +- char *buf, bufferlist *pbl, AioCompletion *c); +- int aio_read(ImageCtx *ictx, const vector >& image_extents, +- char *buf, bufferlist *pbl, AioCompletion *c); +- int aio_flush(ImageCtx *ictx, AioCompletion *c); ++ ++ void aio_write(ImageCtx *ictx, uint64_t off, size_t len, const char *buf, ++ AioCompletion *c); ++ void aio_discard(ImageCtx *ictx, uint64_t off, uint64_t len, AioCompletion *c); ++ void aio_read(ImageCtx *ictx, uint64_t off, size_t len, ++ char *buf, bufferlist *pbl, AioCompletion *c); ++ void aio_read(ImageCtx *ictx, const vector >& image_extents, ++ char *buf, bufferlist *pbl, AioCompletion *c); ++ void aio_flush(ImageCtx *ictx, AioCompletion *c); + int flush(ImageCtx *ictx); + int _flush(ImageCtx *ictx); + int invalidate_cache(ImageCtx *ictx); +-- +2.1.0 + diff --git a/SOURCES/0019-tests-update-librbd-AIO-tests-to-remove-result-code.patch b/SOURCES/0019-tests-update-librbd-AIO-tests-to-remove-result-code.patch new file mode 100644 index 0000000..c4ca294 --- /dev/null +++ b/SOURCES/0019-tests-update-librbd-AIO-tests-to-remove-result-code.patch @@ -0,0 +1,38 @@ +From af41826151c652b60ad3258a4bdcf18f38ffcbca Mon Sep 17 00:00:00 2001 +From: Jason Dillaman +Date: Wed, 8 Apr 2015 21:55:36 -0400 +Subject: [PATCH 19/22] tests: update librbd AIO tests to remove result code + +Signed-off-by: Jason Dillaman +(cherry picked from commit 948b15eb52fd5d9ce842fa12ee0cecda17353b01) +(cherry picked from commit 37bb3000d3eff640a34a3ac48be470ce58627f87) +--- + src/test/librbd/test_librbd.cc | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +diff --git a/src/test/librbd/test_librbd.cc b/src/test/librbd/test_librbd.cc +index 7f35418..a903dd2 100644 +--- a/src/test/librbd/test_librbd.cc ++++ b/src/test/librbd/test_librbd.cc +@@ -745,8 +745,16 @@ TEST(LibRBD, TestIO) + ASSERT_EQ(10, rbd_write(image, info.size - 10, 100, test_data)); + + rbd_aio_create_completion(NULL, (rbd_callback_t) simple_read_cb, &comp); +- ASSERT_EQ(-EINVAL, rbd_aio_write(image, info.size, 1, test_data, comp)); +- ASSERT_EQ(-EINVAL, rbd_aio_read(image, info.size, 1, test_data, comp)); ++ ASSERT_EQ(0, rbd_aio_write(image, info.size, 1, test_data, comp)); ++ ASSERT_EQ(0, rbd_aio_wait_for_complete(comp)); ++ ASSERT_EQ(-EINVAL, rbd_aio_get_return_value(comp)); ++ rbd_aio_release(comp); ++ ++ rbd_aio_create_completion(NULL, (rbd_callback_t) simple_read_cb, &comp); ++ ASSERT_EQ(0, rbd_aio_read(image, info.size, 1, test_data, comp)); ++ ASSERT_EQ(0, rbd_aio_wait_for_complete(comp)); ++ ASSERT_EQ(-EINVAL, rbd_aio_get_return_value(comp)); ++ rbd_aio_release(comp); + + ASSERT_EQ(0, rbd_close(image)); + +-- +2.1.0 + diff --git a/SOURCES/0020-librbd-AioRequest-send-no-longer-returns-a-result.patch b/SOURCES/0020-librbd-AioRequest-send-no-longer-returns-a-result.patch new file mode 100644 index 0000000..5c83b41 --- /dev/null +++ b/SOURCES/0020-librbd-AioRequest-send-no-longer-returns-a-result.patch @@ -0,0 +1,175 @@ +From 7ccb4ae63d3ae885e1c7e6652f4d935f7ac02844 Mon Sep 17 00:00:00 2001 +From: Jason Dillaman +Date: Thu, 9 Apr 2015 13:33:09 -0400 +Subject: [PATCH 20/22] librbd: AioRequest::send no longer returns a result + +The librados calls used by AioRequest::send should always return +zero unless there is a bug. + +Signed-off-by: Jason Dillaman +(cherry picked from commit c77bce3311ab62892eb8c1d883263ba7ed663b20) + +Conflicts: + src/librbd/AioRequest.cc + src/librbd/AioRequest.h + src/librbd/internal.cc + +(cherry picked from commit 7fea9b6954c7e8b913c7ab561f8b28432ecb19fa) +--- + src/librbd/AioRequest.cc | 15 +++++++++------ + src/librbd/AioRequest.h | 6 +++--- + src/librbd/internal.cc | 23 ++++++++++------------- + 3 files changed, 22 insertions(+), 22 deletions(-) + +diff --git a/src/librbd/AioRequest.cc b/src/librbd/AioRequest.cc +index 5cf9a11..dee6eba 100644 +--- a/src/librbd/AioRequest.cc ++++ b/src/librbd/AioRequest.cc +@@ -85,8 +85,9 @@ namespace librbd { + return true; + } + +- int AioRead::send() { +- ldout(m_ictx->cct, 20) << "send " << this << " " << m_oid << " " << m_object_off << "~" << m_object_len << dendl; ++ void AioRead::send() { ++ ldout(m_ictx->cct, 20) << "send " << this << " " << m_oid << " " ++ << m_object_off << "~" << m_object_len << dendl; + + librados::AioCompletion *rados_completion = + librados::Rados::aio_create_completion(this, rados_req_cb, NULL); +@@ -99,10 +100,11 @@ namespace librbd { + } else { + op.read(m_object_off, m_object_len, &m_read_data, NULL); + } ++ + r = m_ioctx->aio_operate(m_oid, rados_completion, &op, flags, NULL); ++ assert(r == 0); + + rados_completion->release(); +- return r; + } + + /** write **/ +@@ -224,16 +226,17 @@ namespace librbd { + return finished; + } + +- int AbstractWrite::send() { +- ldout(m_ictx->cct, 20) << "send " << this << " " << m_oid << " " << m_object_off << "~" << m_object_len << dendl; ++ void AbstractWrite::send() { ++ ldout(m_ictx->cct, 20) << "send " << this << " " << m_oid << " " ++ << m_object_off << "~" << m_object_len << dendl; + librados::AioCompletion *rados_completion = + librados::Rados::aio_create_completion(this, NULL, rados_req_cb); + int r; + assert(m_write.size()); + r = m_ioctx->aio_operate(m_oid, rados_completion, &m_write, + m_snap_seq, m_snaps); ++ assert(r == 0); + rados_completion->release(); +- return r; + } + + void AbstractWrite::send_copyup() { +diff --git a/src/librbd/AioRequest.h b/src/librbd/AioRequest.h +index d6103f9..882b535 100644 +--- a/src/librbd/AioRequest.h ++++ b/src/librbd/AioRequest.h +@@ -43,7 +43,7 @@ namespace librbd { + } + + virtual bool should_complete(int r) = 0; +- virtual int send() = 0; ++ virtual void send() = 0; + + protected: + void read_from_parent(vector >& image_extents); +@@ -73,7 +73,7 @@ namespace librbd { + } + virtual ~AioRead() {} + virtual bool should_complete(int r); +- virtual int send(); ++ virtual void send(); + + ceph::bufferlist &data() { + return m_read_data; +@@ -100,7 +100,7 @@ namespace librbd { + bool hide_enoent); + virtual ~AbstractWrite() {} + virtual bool should_complete(int r); +- virtual int send(); ++ virtual void send(); + void guard_write(); + + bool has_parent() const { +diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc +index 477d936..0a51da2 100644 +--- a/src/librbd/internal.cc ++++ b/src/librbd/internal.cc +@@ -2179,6 +2179,10 @@ reprotect_and_return_err: + SimpleThrottle throttle(cct->_conf->rbd_concurrent_management_ops, false); + + for (uint64_t ono = 0; ono < overlap_objects; ono++) { ++ if (throttle.pending_error()) { ++ return throttle.wait_for_ret(); ++ } ++ + { + RWLock::RLocker l(ictx->parent_lock); + // stop early if the parent went away - it just means +@@ -2202,12 +2206,7 @@ reprotect_and_return_err: + Context *comp = new C_SimpleThrottle(&throttle); + AioWrite *req = new AioWrite(ictx, oid, ono, 0, objectx, object_overlap, + bl, snapc, CEPH_NOSNAP, comp); +- r = req->send(); +- if (r < 0) { +- lderr(cct) << "failed to flatten object " << oid << dendl; +- goto err; +- } +- ++ req->send(); + prog_ctx.update_progress(ono, overlap_objects); + } + +@@ -2952,8 +2951,7 @@ reprotect_and_return_err: + objectx, object_overlap, + bl, snapc, snap_id, req_comp); + c->add_request(); +- r = req->send(); +- assert(r == 0); ++ req->send(); + } + } + +@@ -3035,10 +3033,10 @@ reprotect_and_return_err: + snapc, snap_id, req_comp); + } + +- r = req->send(); +- assert(r == 0); ++ req->send(); + } + ++ r = 0; + if (ictx->object_cacher) { + Mutex::Locker l(ictx->cache_lock); + ictx->object_cacher->discard_set(ictx->object_set, extents); +@@ -3129,13 +3127,12 @@ reprotect_and_return_err: + q->length, q->offset, + cache_comp); + } else { +- r = req->send(); +- assert(r == 0); ++ req->send(); + } + } + } + +- c->finish_adding_requests(cct); ++ c->finish_adding_requests(ictx->cct); + c->put(); + + ictx->perfcounter->inc(l_librbd_aio_rd); +-- +2.1.0 + diff --git a/SOURCES/0021-librbd-new-rbd_non_blocking_aio-config-option.patch b/SOURCES/0021-librbd-new-rbd_non_blocking_aio-config-option.patch new file mode 100644 index 0000000..e735b12 --- /dev/null +++ b/SOURCES/0021-librbd-new-rbd_non_blocking_aio-config-option.patch @@ -0,0 +1,154 @@ +From 19fca914a6f8ae08d5d65679a686d442556ead38 Mon Sep 17 00:00:00 2001 +From: Jason Dillaman +Date: Mon, 1 Jun 2015 22:56:11 -0400 +Subject: [PATCH 21/22] librbd: new rbd_non_blocking_aio config option + +Setting this option to false reverts librbd to legacy behavior +where AIO operations could potentially block. + +Signed-off-by: Jason Dillaman +(cherry picked from commit 769cad12716b85d87eacc1069dd9f5c21cad3915) +(cherry picked from commit bdd544d60b5d3390df9d36079d3d76e6bfae1593) +--- + src/common/config_opts.h | 1 + + src/librbd/librbd.cc | 57 ++++++++++++++++++++++++++++++++++++------------ + 2 files changed, 44 insertions(+), 14 deletions(-) + +diff --git a/src/common/config_opts.h b/src/common/config_opts.h +index f024011..4d64341 100644 +--- a/src/common/config_opts.h ++++ b/src/common/config_opts.h +@@ -721,6 +721,7 @@ OPTION(rados_osd_op_timeout, OPT_DOUBLE, 0) // how many seconds to wait for a re + + OPTION(rbd_op_threads, OPT_INT, 1) + OPTION(rbd_op_thread_timeout, OPT_INT, 60) ++OPTION(rbd_non_blocking_aio, OPT_BOOL, true) // process AIO ops from a worker thread to prevent blocking + OPTION(rbd_cache, OPT_BOOL, false) // whether to enable caching (writeback unless rbd_cache_max_dirty is 0) + OPTION(rbd_cache_writethrough_until_flush, OPT_BOOL, false) // whether to make writeback caching writethrough until flush is called, to be sure the user of librbd will send flushs so that writeback is safe + OPTION(rbd_cache_size, OPT_LONGLONG, 32<<20) // cache size in bytes +diff --git a/src/librbd/librbd.cc b/src/librbd/librbd.cc +index 7f966be..244a5a0 100644 +--- a/src/librbd/librbd.cc ++++ b/src/librbd/librbd.cc +@@ -116,6 +116,41 @@ private: + librbd::AioCompletion *m_comp; + }; + ++void submit_aio_read(librbd::ImageCtx *ictx, uint64_t off, size_t len, ++ char *buf, bufferlist *pbl, librbd::AioCompletion *c) { ++ if (ictx->cct->_conf->rbd_non_blocking_aio) { ++ ictx->aio_work_queue->queue(new C_AioReadWQ(ictx, off, len, buf, pbl, c)); ++ } else { ++ librbd::aio_read(ictx, off, len, buf, pbl, c); ++ } ++} ++ ++void submit_aio_write(librbd::ImageCtx *ictx, uint64_t off, size_t len, ++ const char *buf, librbd::AioCompletion *c) { ++ if (ictx->cct->_conf->rbd_non_blocking_aio) { ++ ictx->aio_work_queue->queue(new C_AioWriteWQ(ictx, off, len, buf, c)); ++ } else { ++ librbd::aio_write(ictx, off, len, buf, c); ++ } ++} ++ ++void submit_aio_discard(librbd::ImageCtx *ictx, uint64_t off, uint64_t len, ++ librbd::AioCompletion *c) { ++ if (ictx->cct->_conf->rbd_non_blocking_aio) { ++ ictx->aio_work_queue->queue(new C_AioDiscardWQ(ictx, off, len, c)); ++ } else { ++ librbd::aio_discard(ictx, off, len, c); ++ } ++} ++ ++void submit_aio_flush(librbd::ImageCtx *ictx, librbd::AioCompletion *c) { ++ if (ictx->cct->_conf->rbd_non_blocking_aio) { ++ ictx->aio_work_queue->queue(new C_AioFlushWQ(ictx, c)); ++ } else { ++ librbd::aio_flush(ictx, c); ++ } ++} ++ + librbd::AioCompletion* get_aio_completion(librbd::RBD::AioCompletion *comp) { + return reinterpret_cast(comp->pc); + } +@@ -560,16 +595,14 @@ namespace librbd { + ImageCtx *ictx = (ImageCtx *)ctx; + if (bl.length() < len) + return -EINVAL; +- ictx->aio_work_queue->queue(new C_AioWriteWQ(ictx, off, len, bl.c_str(), +- get_aio_completion(c))); ++ submit_aio_write(ictx, off, len, bl.c_str(), get_aio_completion(c)); + return 0; + } + + int Image::aio_discard(uint64_t off, uint64_t len, RBD::AioCompletion *c) + { + ImageCtx *ictx = (ImageCtx *)ctx; +- ictx->aio_work_queue->queue(new C_AioDiscardWQ(ictx, off, len, +- get_aio_completion(c))); ++ submit_aio_discard(ictx, off, len, get_aio_completion(c)); + return 0; + } + +@@ -579,8 +612,7 @@ namespace librbd { + ImageCtx *ictx = (ImageCtx *)ctx; + ldout(ictx->cct, 10) << "Image::aio_read() buf=" << (void *)bl.c_str() << "~" + << (void *)(bl.c_str() + len - 1) << dendl; +- ictx->aio_work_queue->queue(new C_AioReadWQ(ictx, off, len, NULL, &bl, +- get_aio_completion(c))); ++ submit_aio_read(ictx, off, len, NULL, &bl, get_aio_completion(c)); + return 0; + } + +@@ -593,7 +625,7 @@ namespace librbd { + int Image::aio_flush(RBD::AioCompletion *c) + { + ImageCtx *ictx = (ImageCtx *)ctx; +- ictx->aio_work_queue->queue(new C_AioFlushWQ(ictx, get_aio_completion(c))); ++ submit_aio_flush(ictx, get_aio_completion(c)); + return 0; + } + +@@ -1185,8 +1217,7 @@ extern "C" int rbd_aio_write(rbd_image_t image, uint64_t off, size_t len, + { + librbd::ImageCtx *ictx = (librbd::ImageCtx *)image; + librbd::RBD::AioCompletion *comp = (librbd::RBD::AioCompletion *)c; +- ictx->aio_work_queue->queue(new C_AioWriteWQ(ictx, off, len, buf, +- get_aio_completion(comp))); ++ submit_aio_write(ictx, off, len, buf, get_aio_completion(comp)); + return 0; + } + +@@ -1195,8 +1226,7 @@ extern "C" int rbd_aio_discard(rbd_image_t image, uint64_t off, uint64_t len, + { + librbd::ImageCtx *ictx = (librbd::ImageCtx *)image; + librbd::RBD::AioCompletion *comp = (librbd::RBD::AioCompletion *)c; +- ictx->aio_work_queue->queue(new C_AioDiscardWQ(ictx, off, len, +- get_aio_completion(comp))); ++ submit_aio_discard(ictx, off, len, get_aio_completion(comp)); + return 0; + } + +@@ -1205,8 +1235,7 @@ extern "C" int rbd_aio_read(rbd_image_t image, uint64_t off, size_t len, + { + librbd::ImageCtx *ictx = (librbd::ImageCtx *)image; + librbd::RBD::AioCompletion *comp = (librbd::RBD::AioCompletion *)c; +- ictx->aio_work_queue->queue(new C_AioReadWQ(ictx, off, len, buf, NULL, +- get_aio_completion(comp))); ++ submit_aio_read(ictx, off, len, buf, NULL, get_aio_completion(comp)); + return 0; + } + +@@ -1220,7 +1249,7 @@ extern "C" int rbd_aio_flush(rbd_image_t image, rbd_completion_t c) + { + librbd::ImageCtx *ictx = (librbd::ImageCtx *)image; + librbd::RBD::AioCompletion *comp = (librbd::RBD::AioCompletion *)c; +- ictx->aio_work_queue->queue(new C_AioFlushWQ(ictx, get_aio_completion(comp))); ++ submit_aio_flush(ictx, get_aio_completion(comp)); + return 0; + } + +-- +2.1.0 + diff --git a/SOURCES/0022-tests-verify-librbd-blocking-aio-code-path.patch b/SOURCES/0022-tests-verify-librbd-blocking-aio-code-path.patch new file mode 100644 index 0000000..035a4b0 --- /dev/null +++ b/SOURCES/0022-tests-verify-librbd-blocking-aio-code-path.patch @@ -0,0 +1,88 @@ +From c5fd5640879ab0fed0e3ac0009bb37dec43f7ad1 Mon Sep 17 00:00:00 2001 +From: Jason Dillaman +Date: Tue, 2 Jun 2015 10:33:35 -0400 +Subject: [PATCH 22/22] tests: verify librbd blocking aio code path + +Signed-off-by: Jason Dillaman +(cherry picked from commit 4cf41486e9c9e1efcb863960a8f3e0326ffca7e5) +(cherry picked from commit 0e668f6a278bf4d2c7a80dc7a30325e1c1645ce0) + +Conflicts: + src/test/librbd/test_librbd.cc: removed refs to pending AIO +--- + src/test/librbd/test_librbd.cc | 59 ++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 59 insertions(+) + +diff --git a/src/test/librbd/test_librbd.cc b/src/test/librbd/test_librbd.cc +index a903dd2..0f3e857 100644 +--- a/src/test/librbd/test_librbd.cc ++++ b/src/test/librbd/test_librbd.cc +@@ -1867,6 +1867,65 @@ TEST(LibRBD, ZeroLengthRead) + ASSERT_EQ(0, destroy_one_pool(pool_name, &cluster)); + } + ++TEST(LibRBD, BlockingAIO) ++{ ++ librados::Rados rados; ++ librados::IoCtx ioctx; ++ string pool_name = get_temp_pool_name(); ++ ++ ASSERT_EQ("", create_one_pool_pp(pool_name, rados)); ++ ASSERT_EQ(0, rados.ioctx_create(pool_name.c_str(), ioctx)); ++ ++ librbd::RBD rbd; ++ std::string name = "testimg"; ++ uint64_t size = 1 << 20; ++ int order = 18; ++ ASSERT_EQ(0, create_image_pp(rbd, ioctx, name.c_str(), size, &order)); ++ ++ CephContext *cct = reinterpret_cast(ioctx.cct()); ++ cct->_conf->set_val_or_die("rbd_non_blocking_aio", "0"); ++ ++ librbd::Image image; ++ ASSERT_EQ(0, rbd.open(ioctx, image, name.c_str(), NULL)); ++ ++ bufferlist bl; ++ bl.append(std::string(256, '1')); ++ ++ librbd::RBD::AioCompletion *write_comp = ++ new librbd::RBD::AioCompletion(NULL, NULL); ++ ASSERT_EQ(0, image.aio_write(0, bl.length(), bl, write_comp)); ++ ++ librbd::RBD::AioCompletion *flush_comp = ++ new librbd::RBD::AioCompletion(NULL, NULL); ++ ASSERT_EQ(0, image.aio_flush(flush_comp)); ++ ASSERT_EQ(0, flush_comp->wait_for_complete()); ++ ASSERT_EQ(0, flush_comp->get_return_value()); ++ flush_comp->release(); ++ ++ ASSERT_EQ(1, write_comp->is_complete()); ++ ASSERT_EQ(0, write_comp->get_return_value()); ++ write_comp->release(); ++ ++ librbd::RBD::AioCompletion *discard_comp = ++ new librbd::RBD::AioCompletion(NULL, NULL); ++ ASSERT_EQ(0, image.aio_discard(128, 128, discard_comp)); ++ ASSERT_EQ(0, discard_comp->wait_for_complete()); ++ discard_comp->release(); ++ ++ librbd::RBD::AioCompletion *read_comp = ++ new librbd::RBD::AioCompletion(NULL, NULL); ++ bufferlist read_bl; ++ image.aio_read(0, bl.length(), read_bl, read_comp); ++ ASSERT_EQ(0, read_comp->wait_for_complete()); ++ ASSERT_EQ(bl.length(), read_comp->get_return_value()); ++ read_comp->release(); ++ ++ bufferlist expected_bl; ++ expected_bl.append(std::string(128, '1')); ++ expected_bl.append(std::string(128, '\0')); ++ ASSERT_TRUE(expected_bl.contents_equal(read_bl)); ++} ++ + int main(int argc, char **argv) + { + ::testing::InitGoogleTest(&argc, argv); +-- +2.1.0 + diff --git a/SPECS/ceph-common.spec b/SPECS/ceph-common.spec index 92fa026..27126ca 100644 --- a/SPECS/ceph-common.spec +++ b/SPECS/ceph-common.spec @@ -8,7 +8,7 @@ ################################################################################# Name: ceph-common Version: 0.80.7 -Release: 2%{?dist} +Release: 3%{?dist} Epoch: 1 Summary: Ceph Common License: GPLv2 @@ -26,6 +26,19 @@ Patch6: 0006-os-KeyValueDB-make-compaction-interface-generic.patch Patch7: 0007-mon-MonitorDBStore-uninline-init_options.patch Patch8: 0008-mon-MonitorDBStore-use-generic-KeyValueDB-create.patch Patch9: 0009-config-allow-unsafe-setting-of-config-values.patch +Patch10: 0010-CephContext-Add-AssociatedSingletonObject-to-allow-C.patch +Patch11: 0011-common-ceph_context-don-t-import-std-namespace.patch +Patch12: 0012-WorkQueue-add-new-ContextWQ-work-queue.patch +Patch13: 0013-WorkQueue-added-virtual-destructor.patch +Patch14: 0014-librbd-add-task-pool-work-queue-for-AIO-requests.patch +Patch15: 0015-librbd-avoid-blocking-AIO-API-methods.patch +Patch16: 0016-librbd-add-new-fail-method-to-AioCompletion.patch +Patch17: 0017-Throttle-added-pending_error-method-to-SimpleThrottl.patch +Patch18: 0018-librbd-internal-AIO-methods-no-longer-return-result.patch +Patch19: 0019-tests-update-librbd-AIO-tests-to-remove-result-code.patch +Patch20: 0020-librbd-AioRequest-send-no-longer-returns-a-result.patch +Patch21: 0021-librbd-new-rbd_non_blocking_aio-config-option.patch +Patch22: 0022-tests-verify-librbd-blocking-aio-code-path.patch Requires: librbd1 = %{epoch}:%{version}-%{release} Requires: librados2 = %{epoch}:%{version}-%{release} Requires: python-rbd = %{epoch}:%{version}-%{release} @@ -180,6 +193,19 @@ block device. %patch7 -p1 %patch8 -p1 %patch9 -p1 +%patch10 -p1 +%patch11 -p1 +%patch12 -p1 +%patch13 -p1 +%patch14 -p1 +%patch15 -p1 +%patch16 -p1 +%patch17 -p1 +%patch18 -p1 +%patch19 -p1 +%patch20 -p1 +%patch21 -p1 +%patch22 -p1 %build ./autogen.sh @@ -336,6 +362,9 @@ fi %{python_sitelib}/rbd.py* %changelog +* Wed Jun 17 2015 Boris Ranto - 1:0.80.7-3 +- Fix librbd: aio calls may block (1225188) + * Fri Nov 21 2014 Boris Ranto - 1:0.80.7-2 - We need to obsolete and provide python-ceph by ceph-common