arena_impl.h 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495
  1. // Protocol Buffers - Google's data interchange format
  2. // Copyright 2008 Google Inc. All rights reserved.
  3. // https://developers.google.com/protocol-buffers/
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are
  7. // met:
  8. //
  9. // * Redistributions of source code must retain the above copyright
  10. // notice, this list of conditions and the following disclaimer.
  11. // * Redistributions in binary form must reproduce the above
  12. // copyright notice, this list of conditions and the following disclaimer
  13. // in the documentation and/or other materials provided with the
  14. // distribution.
  15. // * Neither the name of Google Inc. nor the names of its
  16. // contributors may be used to endorse or promote products derived from
  17. // this software without specific prior written permission.
  18. //
  19. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. // This file defines an Arena allocator for better allocation performance.
  31. #ifndef GOOGLE_PROTOBUF_ARENA_IMPL_H__
  32. #define GOOGLE_PROTOBUF_ARENA_IMPL_H__
  33. #include <atomic>
  34. #include <limits>
  35. #include <typeinfo>
  36. #include <google/protobuf/stubs/common.h>
  37. #include <google/protobuf/stubs/logging.h>
  38. #ifdef ADDRESS_SANITIZER
  39. #include <sanitizer/asan_interface.h>
  40. #endif // ADDRESS_SANITIZER
  41. #include <google/protobuf/port_def.inc>
  42. namespace google {
  43. namespace protobuf {
  44. namespace internal {
  45. inline constexpr size_t AlignUpTo8(size_t n) {
  46. // Align n to next multiple of 8 (from Hacker's Delight, Chapter 3.)
  47. return (n + 7) & static_cast<size_t>(-8);
  48. }
  49. using LifecycleIdAtomic = uint64_t;
  50. // MetricsCollector collects stats for a particular arena.
  51. class PROTOBUF_EXPORT ArenaMetricsCollector {
  52. public:
  53. ArenaMetricsCollector(bool record_allocs) : record_allocs_(record_allocs) {}
  54. // Invoked when the arena is about to be destroyed. This method will
  55. // typically finalize any metric collection and delete the collector.
  56. // space_allocated is the space used by the arena.
  57. virtual void OnDestroy(uint64_t space_allocated) = 0;
  58. // OnReset() is called when the associated arena is reset.
  59. // space_allocated is the space used by the arena just before the reset.
  60. virtual void OnReset(uint64_t space_allocated) = 0;
  61. // OnAlloc is called when an allocation happens.
  62. // type_info is promised to be static - its lifetime extends to
  63. // match program's lifetime (It is given by typeid operator).
  64. // Note: typeid(void) will be passed as allocated_type every time we
  65. // intentionally want to avoid monitoring an allocation. (i.e. internal
  66. // allocations for managing the arena)
  67. virtual void OnAlloc(const std::type_info* allocated_type,
  68. uint64_t alloc_size) = 0;
  69. // Does OnAlloc() need to be called? If false, metric collection overhead
  70. // will be reduced since we will not do extra work per allocation.
  71. bool RecordAllocs() { return record_allocs_; }
  72. protected:
  73. // This class is destructed by the call to OnDestroy().
  74. ~ArenaMetricsCollector() = default;
  75. const bool record_allocs_;
  76. };
  77. struct AllocationPolicy {
  78. static constexpr size_t kDefaultStartBlockSize = 256;
  79. static constexpr size_t kDefaultMaxBlockSize = 8192;
  80. size_t start_block_size = kDefaultStartBlockSize;
  81. size_t max_block_size = kDefaultMaxBlockSize;
  82. void* (*block_alloc)(size_t) = nullptr;
  83. void (*block_dealloc)(void*, size_t) = nullptr;
  84. ArenaMetricsCollector* metrics_collector = nullptr;
  85. bool IsDefault() const {
  86. return start_block_size == kDefaultMaxBlockSize &&
  87. max_block_size == kDefaultMaxBlockSize && block_alloc == nullptr &&
  88. block_dealloc == nullptr && metrics_collector == nullptr;
  89. }
  90. };
  91. // A simple arena allocator. Calls to allocate functions must be properly
  92. // serialized by the caller, hence this class cannot be used as a general
  93. // purpose allocator in a multi-threaded program. It serves as a building block
  94. // for ThreadSafeArena, which provides a thread-safe arena allocator.
  95. //
  96. // This class manages
  97. // 1) Arena bump allocation + owning memory blocks.
  98. // 2) Maintaining a cleanup list.
  99. // It delagetes the actual memory allocation back to ThreadSafeArena, which
  100. // contains the information on block growth policy and backing memory allocation
  101. // used.
  102. class PROTOBUF_EXPORT SerialArena {
  103. public:
  104. struct Memory {
  105. void* ptr;
  106. size_t size;
  107. };
  108. // Node contains the ptr of the object to be cleaned up and the associated
  109. // cleanup function ptr.
  110. struct CleanupNode {
  111. void* elem; // Pointer to the object to be cleaned up.
  112. void (*cleanup)(void*); // Function pointer to the destructor or deleter.
  113. };
  114. // Creates a new SerialArena inside mem using the remaining memory as for
  115. // future allocations.
  116. static SerialArena* New(SerialArena::Memory mem, void* owner);
  117. // Free SerialArena returning the memory passed in to New
  118. template <typename Deallocator>
  119. Memory Free(Deallocator deallocator);
  120. void CleanupList();
  121. uint64_t SpaceAllocated() const {
  122. return space_allocated_.load(std::memory_order_relaxed);
  123. }
  124. uint64_t SpaceUsed() const;
  125. bool HasSpace(size_t n) { return n <= static_cast<size_t>(limit_ - ptr_); }
  126. void* AllocateAligned(size_t n, const AllocationPolicy* policy) {
  127. GOOGLE_DCHECK_EQ(internal::AlignUpTo8(n), n); // Must be already aligned.
  128. GOOGLE_DCHECK_GE(limit_, ptr_);
  129. if (PROTOBUF_PREDICT_FALSE(!HasSpace(n))) {
  130. return AllocateAlignedFallback(n, policy);
  131. }
  132. return AllocateFromExisting(n);
  133. }
  134. private:
  135. void* AllocateFromExisting(size_t n) {
  136. void* ret = ptr_;
  137. ptr_ += n;
  138. #ifdef ADDRESS_SANITIZER
  139. ASAN_UNPOISON_MEMORY_REGION(ret, n);
  140. #endif // ADDRESS_SANITIZER
  141. return ret;
  142. }
  143. public:
  144. // Allocate space if the current region provides enough space.
  145. bool MaybeAllocateAligned(size_t n, void** out) {
  146. GOOGLE_DCHECK_EQ(internal::AlignUpTo8(n), n); // Must be already aligned.
  147. GOOGLE_DCHECK_GE(limit_, ptr_);
  148. if (PROTOBUF_PREDICT_FALSE(!HasSpace(n))) return false;
  149. *out = AllocateFromExisting(n);
  150. return true;
  151. }
  152. std::pair<void*, CleanupNode*> AllocateAlignedWithCleanup(
  153. size_t n, const AllocationPolicy* policy) {
  154. if (PROTOBUF_PREDICT_FALSE(!HasSpace(n + kCleanupSize))) {
  155. return AllocateAlignedWithCleanupFallback(n, policy);
  156. }
  157. return AllocateFromExistingWithCleanupFallback(n);
  158. }
  159. private:
  160. std::pair<void*, CleanupNode*> AllocateFromExistingWithCleanupFallback(
  161. size_t n) {
  162. void* ret = ptr_;
  163. ptr_ += n;
  164. limit_ -= kCleanupSize;
  165. #ifdef ADDRESS_SANITIZER
  166. ASAN_UNPOISON_MEMORY_REGION(ret, n);
  167. ASAN_UNPOISON_MEMORY_REGION(limit_, kCleanupSize);
  168. #endif // ADDRESS_SANITIZER
  169. return CreatePair(ret, reinterpret_cast<CleanupNode*>(limit_));
  170. }
  171. public:
  172. void AddCleanup(void* elem, void (*cleanup)(void*),
  173. const AllocationPolicy* policy) {
  174. auto res = AllocateAlignedWithCleanup(0, policy);
  175. res.second->elem = elem;
  176. res.second->cleanup = cleanup;
  177. }
  178. void* owner() const { return owner_; }
  179. SerialArena* next() const { return next_; }
  180. void set_next(SerialArena* next) { next_ = next; }
  181. private:
  182. // Blocks are variable length malloc-ed objects. The following structure
  183. // describes the common header for all blocks.
  184. struct Block {
  185. Block(Block* next, size_t size) : next(next), size(size), start(nullptr) {}
  186. char* Pointer(size_t n) {
  187. GOOGLE_DCHECK(n <= size);
  188. return reinterpret_cast<char*>(this) + n;
  189. }
  190. Block* const next;
  191. const size_t size;
  192. CleanupNode* start;
  193. // data follows
  194. };
  195. void* owner_; // &ThreadCache of this thread;
  196. Block* head_; // Head of linked list of blocks.
  197. SerialArena* next_; // Next SerialArena in this linked list.
  198. size_t space_used_ = 0; // Necessary for metrics.
  199. std::atomic<size_t> space_allocated_;
  200. // Next pointer to allocate from. Always 8-byte aligned. Points inside
  201. // head_ (and head_->pos will always be non-canonical). We keep these
  202. // here to reduce indirection.
  203. char* ptr_;
  204. char* limit_;
  205. // Constructor is private as only New() should be used.
  206. inline SerialArena(Block* b, void* owner);
  207. void* AllocateAlignedFallback(size_t n, const AllocationPolicy* policy);
  208. std::pair<void*, CleanupNode*> AllocateAlignedWithCleanupFallback(
  209. size_t n, const AllocationPolicy* policy);
  210. void AllocateNewBlock(size_t n, const AllocationPolicy* policy);
  211. std::pair<void*, CleanupNode*> CreatePair(void* ptr, CleanupNode* node) {
  212. return {ptr, node};
  213. }
  214. public:
  215. static constexpr size_t kBlockHeaderSize = AlignUpTo8(sizeof(Block));
  216. static constexpr size_t kCleanupSize = AlignUpTo8(sizeof(CleanupNode));
  217. };
  218. // This class provides the core Arena memory allocation library. Different
  219. // implementations only need to implement the public interface below.
  220. // Arena is not a template type as that would only be useful if all protos
  221. // in turn would be templates, which will/cannot happen. However separating
  222. // the memory allocation part from the cruft of the API users expect we can
  223. // use #ifdef the select the best implementation based on hardware / OS.
  224. class PROTOBUF_EXPORT ThreadSafeArena {
  225. public:
  226. ThreadSafeArena() { Init(false); }
  227. ThreadSafeArena(char* mem, size_t size) { InitializeFrom(mem, size); }
  228. explicit ThreadSafeArena(void* mem, size_t size,
  229. const AllocationPolicy& policy) {
  230. if (policy.IsDefault()) {
  231. // Legacy code doesn't use the API above, but provides the initial block
  232. // through ArenaOptions. I suspect most do not touch the allocation
  233. // policy parameters.
  234. InitializeFrom(mem, size);
  235. } else {
  236. auto collector = policy.metrics_collector;
  237. bool record_allocs = collector && collector->RecordAllocs();
  238. InitializeWithPolicy(mem, size, record_allocs, policy);
  239. }
  240. }
  241. // Destructor deletes all owned heap allocated objects, and destructs objects
  242. // that have non-trivial destructors, except for proto2 message objects whose
  243. // destructors can be skipped. Also, frees all blocks except the initial block
  244. // if it was passed in.
  245. ~ThreadSafeArena();
  246. uint64_t Reset();
  247. uint64_t SpaceAllocated() const;
  248. uint64_t SpaceUsed() const;
  249. void* AllocateAligned(size_t n, const std::type_info* type) {
  250. SerialArena* arena;
  251. if (PROTOBUF_PREDICT_TRUE(GetSerialArenaFast(tag_and_id_, &arena))) {
  252. return arena->AllocateAligned(n, AllocPolicy());
  253. } else {
  254. return AllocateAlignedFallback(n, type);
  255. }
  256. }
  257. // This function allocates n bytes if the common happy case is true and
  258. // returns true. Otherwise does nothing and returns false. This strange
  259. // semantics is necessary to allow callers to program functions that only
  260. // have fallback function calls in tail position. This substantially improves
  261. // code for the happy path.
  262. PROTOBUF_NDEBUG_INLINE bool MaybeAllocateAligned(size_t n, void** out) {
  263. SerialArena* a;
  264. if (PROTOBUF_PREDICT_TRUE(GetSerialArenaFromThreadCache(tag_and_id_, &a))) {
  265. return a->MaybeAllocateAligned(n, out);
  266. }
  267. return false;
  268. }
  269. std::pair<void*, SerialArena::CleanupNode*> AllocateAlignedWithCleanup(
  270. size_t n, const std::type_info* type);
  271. // Add object pointer and cleanup function pointer to the list.
  272. void AddCleanup(void* elem, void (*cleanup)(void*));
  273. private:
  274. // Unique for each arena. Changes on Reset().
  275. uint64_t tag_and_id_;
  276. // The LSB of tag_and_id_ indicates if allocs in this arena are recorded.
  277. enum { kRecordAllocs = 1 };
  278. intptr_t alloc_policy_ = 0; // Tagged pointer to AllocPolicy.
  279. // The LSB of alloc_policy_ indicates if the user owns the initial block.
  280. enum { kUserOwnedInitialBlock = 1 };
  281. // Pointer to a linked list of SerialArena.
  282. std::atomic<SerialArena*> threads_;
  283. std::atomic<SerialArena*> hint_; // Fast thread-local block access
  284. const AllocationPolicy* AllocPolicy() const {
  285. return reinterpret_cast<const AllocationPolicy*>(alloc_policy_ & -8);
  286. }
  287. void InitializeFrom(void* mem, size_t size);
  288. void InitializeWithPolicy(void* mem, size_t size, bool record_allocs,
  289. AllocationPolicy policy);
  290. void* AllocateAlignedFallback(size_t n, const std::type_info* type);
  291. std::pair<void*, SerialArena::CleanupNode*>
  292. AllocateAlignedWithCleanupFallback(size_t n, const std::type_info* type);
  293. void AddCleanupFallback(void* elem, void (*cleanup)(void*));
  294. void Init(bool record_allocs);
  295. void SetInitialBlock(void* mem, size_t size);
  296. // Delete or Destruct all objects owned by the arena.
  297. void CleanupList();
  298. inline bool ShouldRecordAlloc() const { return tag_and_id_ & kRecordAllocs; }
  299. inline uint64_t LifeCycleId() const {
  300. return tag_and_id_ & (-kRecordAllocs - 1);
  301. }
  302. inline void RecordAlloc(const std::type_info* allocated_type,
  303. size_t n) const {
  304. AllocPolicy()->metrics_collector->OnAlloc(allocated_type, n);
  305. }
  306. inline void CacheSerialArena(SerialArena* serial) {
  307. thread_cache().last_serial_arena = serial;
  308. thread_cache().last_lifecycle_id_seen = LifeCycleId();
  309. // TODO(haberman): evaluate whether we would gain efficiency by getting rid
  310. // of hint_. It's the only write we do to ThreadSafeArena in the allocation
  311. // path, which will dirty the cache line.
  312. hint_.store(serial, std::memory_order_release);
  313. }
  314. PROTOBUF_NDEBUG_INLINE bool GetSerialArenaFast(uint64_t lifecycle_id,
  315. SerialArena** arena) {
  316. if (GetSerialArenaFromThreadCache(lifecycle_id, arena)) return true;
  317. if (lifecycle_id & kRecordAllocs) return false;
  318. // Check whether we own the last accessed SerialArena on this arena. This
  319. // fast path optimizes the case where a single thread uses multiple arenas.
  320. ThreadCache* tc = &thread_cache();
  321. SerialArena* serial = hint_.load(std::memory_order_acquire);
  322. if (PROTOBUF_PREDICT_TRUE(serial != NULL && serial->owner() == tc)) {
  323. *arena = serial;
  324. return true;
  325. }
  326. return false;
  327. }
  328. PROTOBUF_NDEBUG_INLINE bool GetSerialArenaFromThreadCache(
  329. uint64_t lifecycle_id, SerialArena** arena) {
  330. // If this thread already owns a block in this arena then try to use that.
  331. // This fast path optimizes the case where multiple threads allocate from
  332. // the same arena.
  333. ThreadCache* tc = &thread_cache();
  334. if (PROTOBUF_PREDICT_TRUE(tc->last_lifecycle_id_seen == lifecycle_id)) {
  335. *arena = tc->last_serial_arena;
  336. return true;
  337. }
  338. return false;
  339. }
  340. SerialArena* GetSerialArenaFallback(void* me);
  341. template <typename Functor>
  342. void PerSerialArena(Functor fn) {
  343. // By omitting an Acquire barrier we ensure that any user code that doesn't
  344. // properly synchronize Reset() or the destructor will throw a TSAN warning.
  345. SerialArena* serial = threads_.load(std::memory_order_relaxed);
  346. for (; serial; serial = serial->next()) fn(serial);
  347. }
  348. // Releases all memory except the first block which it returns. The first
  349. // block might be owned by the user and thus need some extra checks before
  350. // deleting.
  351. SerialArena::Memory Free(size_t* space_allocated);
  352. #ifdef _MSC_VER
  353. #pragma warning(disable : 4324)
  354. #endif
  355. struct alignas(64) ThreadCache {
  356. #if defined(GOOGLE_PROTOBUF_NO_THREADLOCAL)
  357. // If we are using the ThreadLocalStorage class to store the ThreadCache,
  358. // then the ThreadCache's default constructor has to be responsible for
  359. // initializing it.
  360. ThreadCache()
  361. : next_lifecycle_id(0),
  362. last_lifecycle_id_seen(-1),
  363. last_serial_arena(NULL) {}
  364. #endif
  365. // Number of per-thread lifecycle IDs to reserve. Must be power of two.
  366. // To reduce contention on a global atomic, each thread reserves a batch of
  367. // IDs. The following number is calculated based on a stress test with
  368. // ~6500 threads all frequently allocating a new arena.
  369. static constexpr size_t kPerThreadIds = 256;
  370. // Next lifecycle ID available to this thread. We need to reserve a new
  371. // batch, if `next_lifecycle_id & (kPerThreadIds - 1) == 0`.
  372. uint64_t next_lifecycle_id;
  373. // The ThreadCache is considered valid as long as this matches the
  374. // lifecycle_id of the arena being used.
  375. uint64_t last_lifecycle_id_seen;
  376. SerialArena* last_serial_arena;
  377. };
  378. // Lifecycle_id can be highly contended variable in a situation of lots of
  379. // arena creation. Make sure that other global variables are not sharing the
  380. // cacheline.
  381. #ifdef _MSC_VER
  382. #pragma warning(disable : 4324)
  383. #endif
  384. struct alignas(64) CacheAlignedLifecycleIdGenerator {
  385. std::atomic<LifecycleIdAtomic> id;
  386. };
  387. static CacheAlignedLifecycleIdGenerator lifecycle_id_generator_;
  388. #if defined(GOOGLE_PROTOBUF_NO_THREADLOCAL)
  389. // Android ndk does not support __thread keyword so we use a custom thread
  390. // local storage class we implemented.
  391. // iOS also does not support the __thread keyword.
  392. static ThreadCache& thread_cache();
  393. #elif defined(PROTOBUF_USE_DLLS)
  394. // Thread local variables cannot be exposed through DLL interface but we can
  395. // wrap them in static functions.
  396. static ThreadCache& thread_cache();
  397. #else
  398. static PROTOBUF_THREAD_LOCAL ThreadCache thread_cache_;
  399. static ThreadCache& thread_cache() { return thread_cache_; }
  400. #endif
  401. GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ThreadSafeArena);
  402. // All protos have pointers back to the arena hence Arena must have
  403. // pointer stability.
  404. ThreadSafeArena(ThreadSafeArena&&) = delete;
  405. ThreadSafeArena& operator=(ThreadSafeArena&&) = delete;
  406. public:
  407. // kBlockHeaderSize is sizeof(Block), aligned up to the nearest multiple of 8
  408. // to protect the invariant that pos is always at a multiple of 8.
  409. static constexpr size_t kBlockHeaderSize = SerialArena::kBlockHeaderSize;
  410. static constexpr size_t kSerialArenaSize =
  411. (sizeof(SerialArena) + 7) & static_cast<size_t>(-8);
  412. static_assert(kBlockHeaderSize % 8 == 0,
  413. "kBlockHeaderSize must be a multiple of 8.");
  414. static_assert(kSerialArenaSize % 8 == 0,
  415. "kSerialArenaSize must be a multiple of 8.");
  416. };
  417. } // namespace internal
  418. } // namespace protobuf
  419. } // namespace google
  420. #include <google/protobuf/port_undef.inc>
  421. #endif // GOOGLE_PROTOBUF_ARENA_IMPL_H__