i#794: replace malloc, parts 6 and 7: the actual allocator
and an interface to plug it in
- add option -replace_malloc
- relies on drwrap_replace_native() (DRi#760) to invoke the replacement routines o uses a static OP_ret instr to handle differing xl8 from replace_native transformation
- allocator: o allocator is targeted to shadow mode but the design allows for adding a headerless implementation for pattern mode (i#879) with only additions and not major changes by using the same header and free list data structures whether co-located or separate o allocator is a relatively simple multi-free-list-per-size implementation which can be augmented in the future o on Linux uses brk until exhausted and then switches to mmap arenas which are each extended in-place via mremap until failure o large allocs are mmapped and not delay-freed o pre-us mallocs are stored in a table o existing heap regions are used to iterate over multiple arenas o large malloc table is used to iterate over mmap chunks o using libc mmap for large mallocs for now (xref i#199) o using TRY only on drmem queries: so allocator is not as robust as I'd like wrt app args, but that costs performance
- interaction with rest of code: o wrap vs replace are multiplexed using a function pointer interface o existing client_* interface is used to take shadow or profiling or pattern actions on allocs and free o existing wrapping code is kept in place as currently operators are still wrapped (xref i#882)
- FIXME before ready for primetime: o Windows support: allocator itself, and also turning all aux heap routines into nops o callstacks: currently empty o maintain delay free rbtree o obtain alloc type from TLS for mismatch detection
- TODO nice-to-have: o notify DR of brk/mmap o replace operators: i#882 o make alloc routines robust wrt app args (simple approach has perf hit)
- added redzone8 and redzone1024 tests
git-svn-id: http://drmemory.googlecode.com/svn/trunk@846 5b54908e-ead4-fc18-b48a-0285e6dbc711
Changes
--- CMakeLists.txt 9f1df662fe27fd4fb700f723f512855920ed6969
+++ CMakeLists.txt f6dc6910105f7a2bd25cd93732b95ece28a1c3d3
@@ -625,6 +625,7 @@
+ common/alloc_replace.c
@@ -654,6 +655,7 @@
+ common/alloc_replace.c
--- common/alloc.c 9f1df662fe27fd4fb700f723f512855920ed6969
+++ common/alloc.c f6dc6910105f7a2bd25cd93732b95ece28a1c3d3
@@ -135,6 +135,7 @@
+#include "alloc_replace.h"
@@ -190,11 +191,22 @@
+/* points at the per-malloc API to use */
+malloc_interface_t malloc_interface;
+
-app_pc
-get_brk(void)
-{- return (app_pc) raw_syscall_1arg(SYS_brk, 0);
+byte *
+get_brk(bool pre_us)
+{+ if (pre_us && alloc_ops.replace_malloc)
+ return alloc_replace_orig_brk();
+ return (byte *) raw_syscall_1arg(SYS_brk, 0);
+}
+
+byte *
+set_brk(byte *new_val)
+{+ return (byte *) raw_syscall_1arg(SYS_brk, (ptr_int_t) new_val);
@@ -222,6 +234,9 @@
+
+static void
+malloc_wrap_init(void);
@@ -325,140 +340,6 @@
-
-typedef enum {- /* For Linux and for Cygwin, and for any other allocator connected via
- * a to-be-implemented API (PR 406756)
- */
- /* Typically only one of these size routines is provided */
- HEAP_ROUTINE_SIZE_USABLE,
- HEAP_ROUTINE_SIZE_REQUESTED,
- HEAP_ROUTINE_MALLOC,
- HEAP_ROUTINE_REALLOC,
- HEAP_ROUTINE_FREE,
- /* BSD libc calloc simply calls malloc and then zeroes out
- * the resulting memory: thus, nothing special for us to watch.
- * But glibc calloc does its own allocating.
- */
- HEAP_ROUTINE_CALLOC,
- HEAP_ROUTINE_POSIX_MEMALIGN,
- HEAP_ROUTINE_MEMALIGN,
- HEAP_ROUTINE_VALLOC,
- HEAP_ROUTINE_PVALLOC,
- /* On Windows, we must watch debug operator delete b/c it reads
- * malloc's headers (i#26). On both platforms we want to watch
- * the operators to find mismatches (i#123).
- */
- HEAP_ROUTINE_NEW,
- HEAP_ROUTINE_NEW_ARRAY,
- HEAP_ROUTINE_DELETE,
- HEAP_ROUTINE_DELETE_ARRAY,
- /* Group label for routines that might read heap headers but
- * need no explicit argument modification
- */
- HEAP_ROUTINE_STATS,
- /* Group label for un-handled routine */
- HEAP_ROUTINE_NOT_HANDLED,
- /* Should collapse these two once have aligned-malloc routine support */
- HEAP_ROUTINE_NOT_HANDLED_NOTIFY,
-#ifdef LINUX
- HEAP_ROUTINE_LAST = HEAP_ROUTINE_NOT_HANDLED_NOTIFY,
-#else
- /* Debug CRT routines, which take in extra params */
- HEAP_ROUTINE_SIZE_REQUESTED_DBG,
- HEAP_ROUTINE_MALLOC_DBG,
- HEAP_ROUTINE_REALLOC_DBG,
- HEAP_ROUTINE_FREE_DBG,
- HEAP_ROUTINE_CALLOC_DBG,
- /* Free wrapper used in place of real delete or delete[] operators (i#722,i#655) */
- HEAP_ROUTINE_DebugHeapDelete,
- /* To avoid debug CRT checks (i#51) */
- HEAP_ROUTINE_SET_DBG,
- HEAP_ROUTINE_DBG_NOP,
- /* FIXME PR 595798: for cygwin allocator we have to track library call */
- HEAP_ROUTINE_SBRK,
- HEAP_ROUTINE_LAST = HEAP_ROUTINE_SBRK,
- /* The primary routines we hook are the Rtl*Heap routines, in addition
- * to malloc routines in each library since some either do their own
- * internal parceling (PR 476805) or add padding for debug purposes
- * which we want to treat as unaddressable (DRi#284)
- */
- RTL_ROUTINE_MALLOC,
- RTL_ROUTINE_REALLOC,
- RTL_ROUTINE_FREE,
- RTL_ROUTINE_VALIDATE,
- RTL_ROUTINE_SIZE,
- RTL_ROUTINE_CREATE,
- RTL_ROUTINE_DESTROY,
- RTL_ROUTINE_GETINFO,
- RTL_ROUTINE_SETINFO,
- RTL_ROUTINE_SETFLAGS,
- RTL_ROUTINE_HEAPINFO,
- RTL_ROUTINE_CREATE_ACTCXT, /* for csrss-allocated memory: i#352 */
- RTL_ROUTINE_LOCK,
- RTL_ROUTINE_UNLOCK,
- RTL_ROUTINE_QUERY,
- RTL_ROUTINE_NYI,
- RTL_ROUTINE_SHUTDOWN,
- RTL_ROUTINE_LAST = RTL_ROUTINE_SHUTDOWN,
-#endif
- HEAP_ROUTINE_COUNT,
-} routine_type_t;
-
-static inline bool
-is_size_routine(routine_type_t type)
-{- return (type == HEAP_ROUTINE_SIZE_USABLE || type == HEAP_ROUTINE_SIZE_REQUESTED
- IF_WINDOWS(|| type == RTL_ROUTINE_SIZE
- || type == HEAP_ROUTINE_SIZE_REQUESTED_DBG));
-}
-
-static inline bool
-is_size_requested_routine(routine_type_t type)
-{- return (type == HEAP_ROUTINE_SIZE_REQUESTED
- IF_WINDOWS(|| type == RTL_ROUTINE_SIZE
- || type == HEAP_ROUTINE_SIZE_REQUESTED_DBG));
-}
-
-static inline bool
-is_free_routine(routine_type_t type)
-{- return (type == HEAP_ROUTINE_FREE
- IF_WINDOWS(|| type == RTL_ROUTINE_FREE || type == HEAP_ROUTINE_FREE_DBG));
-}
-
-static inline bool
-is_malloc_routine(routine_type_t type)
-{- return (type == HEAP_ROUTINE_MALLOC
- IF_WINDOWS(|| type == RTL_ROUTINE_MALLOC|| type == HEAP_ROUTINE_MALLOC_DBG));
-}
-
-static inline bool
-is_realloc_routine(routine_type_t type)
-{- return (type == HEAP_ROUTINE_REALLOC
- IF_WINDOWS(|| type == RTL_ROUTINE_REALLOC|| type == HEAP_ROUTINE_REALLOC_DBG));
-}
-
-static inline bool
-is_calloc_routine(routine_type_t type)
-{- return (type == HEAP_ROUTINE_CALLOC IF_WINDOWS(|| type == HEAP_ROUTINE_CALLOC_DBG));
-}
-
-static inline bool
-is_new_routine(routine_type_t type)
-{- return (type == HEAP_ROUTINE_NEW || type == HEAP_ROUTINE_NEW_ARRAY);
-}
-
-static inline bool
-is_delete_routine(routine_type_t type)
-{- return (type == HEAP_ROUTINE_DELETE || type == HEAP_ROUTINE_DELETE_ARRAY);
-}
@@ -723,7 +604,7 @@
-typedef struct _alloc_routine_entry_t {+struct _alloc_routine_entry_t {@@ -735,7 +616,7 @@
-} alloc_routine_entry_t;
+};
@@ -1202,27 +1083,11 @@
-#ifdef WINDOWS
- if (e->type == HEAP_ROUTINE_DBG_NOP) {- /* cdecl so no args to clean up.
- * we can't just insert a generated ret b/c our slowpath
- * assumes the raw bits are persistent.
- */
- if (!drwrap_replace(pc, (app_pc)replaced_nop_routine, false))
- ASSERT(false, "failed to replace dbg-nop");
- } else {-#endif
- /* there could be a race on unload where passing e is unsafe but
- * we live w/ it
- * XXX: for -conservative we should do a lookup
- */
- if (!drwrap_wrap_ex(pc, alloc_hook,
- e->intercept_post ? handle_alloc_post : NULL, (void *)e,
- DRWRAP_UNWIND_ON_EXCEPTION))
- ASSERT(false, "failed to wrap alloc routine");
-#ifdef WINDOWS
- }
-#endif
+ /* there could be a race on unload where passing e is unsafe but
+ * we live w/ it
+ * XXX: for -conservative we should do a lookup
+ */
+ malloc_interface.malloc_intercept(pc, type, e);
@@ -1642,6 +1507,51 @@
+/* XXX i#882: make this static once malloc replacement replaces operators */
+void
+/* XXX: if we split the wrapping from the routine identification we'll
+ * have to figure out how to separate alloc_routine_entry_t: currently an
+ * opaque type in alloc_private.h
+ */
+malloc_wrap__intercept(app_pc pc, routine_type_t type, alloc_routine_entry_t *e)
+{+#ifdef WINDOWS
+ if (e->type == HEAP_ROUTINE_DBG_NOP) {+ /* cdecl so no args to clean up.
+ * we can't just insert a generated ret b/c our slowpath
+ * assumes the raw bits are persistent.
+ */
+ if (!drwrap_replace(pc, (app_pc)replaced_nop_routine, false))
+ ASSERT(false, "failed to replace dbg-nop");
+ } else {+#endif
+ if (!drwrap_wrap_ex(pc, alloc_hook,
+ e->intercept_post ? handle_alloc_post : NULL,
+ (void *)e, DRWRAP_UNWIND_ON_EXCEPTION))
+ ASSERT(false, "failed to wrap alloc routine");
+#ifdef WINDOWS
+ }
+#endif
+}
+
+/* XXX i#882: make this static once malloc replacement replaces operators */
+void
+malloc_wrap__unintercept(app_pc pc, routine_type_t type, alloc_routine_entry_t *e)
+{+#ifdef WINDOWS
+ if (e->type == HEAP_ROUTINE_DBG_NOP) {+ if (!drwrap_replace(pc, NULL/*remove*/, true))
+ ASSERT(false, "failed to unreplace dbg-nop");
+ } else {+#endif
+ if (!drwrap_unwrap(pc, alloc_hook,
+ e->intercept_post ? handle_alloc_post : NULL))
+ ASSERT(false, "failed to unwrap alloc routine");
+#ifdef WINDOWS
+ }
+#endif
+}
+
@@ -1935,8 +1845,6 @@
- MALLOC_POSSIBLE_CLIENT_FLAGS = (MALLOC_CLIENT_1 | MALLOC_CLIENT_2 |
- MALLOC_CLIENT_3 | MALLOC_CLIENT_4),
@@ -2072,6 +1980,12 @@
+
+ /* set up the per-malloc API */
+ if (alloc_ops.replace_malloc)
+ alloc_replace_init();
+ else
+ malloc_wrap_init();
@@ -2084,6 +1998,9 @@
+
+ if (alloc_ops.replace_malloc)
+ alloc_replace_exit();
@@ -2466,18 +2383,9 @@
-#ifdef WINDOWS
- if (e->type == HEAP_ROUTINE_DBG_NOP) {- if (!drwrap_replace(e->pc, NULL/*remove*/, true))
- ASSERT(false, "failed to unreplace dbg-nop");
- } else {-#endif
- if (!drwrap_unwrap(e->pc, alloc_hook,
- e->intercept_post ? handle_alloc_post : NULL))
- ASSERT(false, "failed to unwrap alloc routine");
-#ifdef WINDOWS
- }
-#endif
+
+ malloc_interface.malloc_unintercept(e->pc, e->type, e);
+
@@ -2524,6 +2432,94 @@
+/***************************************************************************
+ * Per-malloc API routing
+ */
+
+void
+malloc_lock(void)
+{+ malloc_interface.malloc_lock();
+}
+
+void
+malloc_unlock(void)
+{+ malloc_interface.malloc_unlock();
+}
+
+app_pc
+malloc_end(app_pc start)
+{+ return malloc_interface.malloc_end(start);
+}
+
+void
+malloc_add(app_pc start, app_pc end, app_pc real_end, bool pre_us,
+ uint client_flags, dr_mcontext_t *mc, app_pc post_call)
+{+ malloc_interface.malloc_add(start, end, real_end, pre_us,
+ client_flags, mc, post_call);
+}
+
+bool
+malloc_is_pre_us(app_pc start)
+{+ return malloc_interface.malloc_is_pre_us(start);
+}
+
+bool
+malloc_is_pre_us_ex(app_pc start, bool ok_if_invalid)
+{+ return malloc_interface.malloc_is_pre_us_ex(start, ok_if_invalid);
+}
+
+ssize_t
+malloc_size(app_pc start)
+{+ return malloc_interface.malloc_size(start);
+}
+
+ssize_t
+malloc_size_invalid_only(app_pc start)
+{+ return malloc_interface.malloc_size_invalid_only(start);
+}
+
+void *
+malloc_get_client_data(app_pc start)
+{+ return malloc_interface.malloc_get_client_data(start);
+}
+
+uint
+malloc_get_client_flags(app_pc start)
+{+ return malloc_interface.malloc_get_client_flags(start);
+}
+
+bool
+malloc_set_client_flag(app_pc start, uint client_flag)
+{+ return malloc_interface.malloc_set_client_flag(start, client_flag);
+}
+
+bool
+malloc_clear_client_flag(app_pc start, uint client_flag)
+{+ return malloc_interface.malloc_clear_client_flag(start, client_flag);
+}
+
+void
+malloc_iterate(malloc_iter_cb_t cb, void *iter_data)
+{+ malloc_interface.malloc_iterate(cb, iter_data);
+}
+
+/***************************************************************************
+ * Per-malloc API for wrapping
+ */
+
@@ -2540,8 +2536,8 @@
-void
-malloc_lock(void)
+static void
+malloc_wrap__lock(void)
@@ -2549,8 +2545,8 @@
-void
-malloc_unlock(void)
+static void
+malloc_wrap__unlock(void)
@@ -2647,9 +2643,9 @@
-void
-malloc_add(app_pc start, app_pc end, app_pc real_end,
- bool pre_us, uint client_flags, dr_mcontext_t *mc, app_pc post_call)
+static void
+malloc_wrap__add(app_pc start, app_pc end, app_pc real_end,
+ bool pre_us, uint client_flags, dr_mcontext_t *mc, app_pc post_call)
@@ -2691,7 +2687,8 @@
-void
+#ifdef WINDOWS
+static void
@@ -2701,6 +2698,7 @@
+#endif
@@ -2757,7 +2755,7 @@
-void
+static void
@@ -2824,8 +2822,8 @@
-bool
-malloc_is_pre_us_ex(app_pc start, bool ok_if_invalid)
+static bool
+malloc_wrap__is_pre_us_ex(app_pc start, bool ok_if_invalid)
@@ -2837,8 +2835,8 @@
-bool
-malloc_is_pre_us(app_pc start)
+static bool
+malloc_wrap__is_pre_us(app_pc start)
@@ -2902,8 +2900,8 @@
-app_pc
-malloc_end(app_pc start)
+static app_pc
+malloc_wrap__end(app_pc start)
@@ -2916,8 +2914,8 @@
-ssize_t
-malloc_size(app_pc start)
+static ssize_t
+malloc_wrap__size(app_pc start)
@@ -2930,8 +2928,8 @@
-ssize_t
-malloc_size_invalid_only(app_pc start)
+static ssize_t
+malloc_wrap__size_invalid_only(app_pc start)
@@ -2943,8 +2941,8 @@
-void *
-malloc_get_client_data(app_pc start)
+static void *
+malloc_wrap__get_client_data(app_pc start)
@@ -2956,8 +2954,8 @@
-uint
-malloc_get_client_flags(app_pc start)
+static uint
+malloc_wrap__get_client_flags(app_pc start)
@@ -2969,8 +2967,8 @@
-bool
-malloc_set_client_flag(app_pc start, uint client_flag)
+static bool
+malloc_wrap__set_client_flag(app_pc start, uint client_flag)
@@ -2984,8 +2982,8 @@
-bool
-malloc_clear_client_flag(app_pc start, uint client_flag)
+static bool
+malloc_wrap__clear_client_flag(app_pc start, uint client_flag)
@@ -3000,10 +2998,7 @@
-malloc_iterate_internal(bool include_native,
- bool (*cb)(app_pc start, app_pc end, app_pc real_end,
- bool pre_us, uint client_flags,
- void *client_data, void *iter_data), void *iter_data)
+malloc_iterate_internal(bool include_native, malloc_iter_cb_t cb, void *iter_data)
@@ -3030,10 +3025,8 @@
-void
-malloc_iterate(bool (*cb)(app_pc start, app_pc end, app_pc real_end,
- bool pre_us, uint client_flags,
- void *client_data, void *iter_data), void *iter_data)
+static void
+malloc_wrap__iterate(malloc_iter_cb_t cb, void *iter_data)
@@ -3058,6 +3051,26 @@
+}
+
+static void
+malloc_wrap_init(void)
+{+ malloc_interface.malloc_lock = malloc_wrap__lock;
+ malloc_interface.malloc_unlock = malloc_wrap__unlock;
+ malloc_interface.malloc_end = malloc_wrap__end;
+ malloc_interface.malloc_add = malloc_wrap__add;
+ malloc_interface.malloc_is_pre_us = malloc_wrap__is_pre_us;
+ malloc_interface.malloc_is_pre_us_ex = malloc_wrap__is_pre_us_ex;
+ malloc_interface.malloc_size = malloc_wrap__size;
+ malloc_interface.malloc_size_invalid_only = malloc_wrap__size_invalid_only;
+ malloc_interface.malloc_get_client_data = malloc_wrap__get_client_data;
+ malloc_interface.malloc_get_client_flags = malloc_wrap__get_client_flags;
+ malloc_interface.malloc_set_client_flag = malloc_wrap__set_client_flag;
+ malloc_interface.malloc_clear_client_flag = malloc_wrap__clear_client_flag;
+ malloc_interface.malloc_iterate = malloc_wrap__iterate;
+ malloc_interface.malloc_intercept = malloc_wrap__intercept;
+ malloc_interface.malloc_unintercept = malloc_wrap__unintercept;
@@ -5506,6 +5519,7 @@
+ LOG(2, "large malloc add "PFX"-"PFX"\n", start, start+ size);
@@ -5518,6 +5532,7 @@
+ LOG(2, "large malloc remove "PFX"\n", start);
--- common/alloc.h 9f1df662fe27fd4fb700f723f512855920ed6969
+++ common/alloc.h f6dc6910105f7a2bd25cd93732b95ece28a1c3d3
@@ -65,6 +65,14 @@
+
+ /* replace instead of wrap existing? */
+ bool replace_malloc;
+ /* only used with -replace_malloc: */
+ bool external_headers; /* headers in hashtable instead of inside redzone */
+ uint delay_frees;
+ uint delay_frees_maxsz;
+
@@ -82,7 +90,13 @@
+ MALLOC_POSSIBLE_CLIENT_FLAGS = (MALLOC_CLIENT_1 | MALLOC_CLIENT_2 |
+ MALLOC_CLIENT_3 | MALLOC_CLIENT_4),
+
+typedef bool (*malloc_iter_cb_t)(app_pc start, app_pc end, app_pc real_end,
+ bool pre_us, uint client_flags,
+ void *client_data, void *iter_data);
@@ -141,12 +155,6 @@
-void
-malloc_remove(app_pc start);
-
-void
-malloc_set_valid(app_pc start, bool valid);
-
@@ -184,15 +192,16 @@
-malloc_iterate(bool (*cb)(app_pc start, app_pc end, app_pc real_end,
- bool pre_us, uint client_flags,
- void *client_data, void *iter_data), void *iter_data);
+malloc_iterate(malloc_iter_cb_t cb, void *iter_data);
-app_pc
-get_brk(void);
+byte *
+get_brk(bool pre_us);
+
+byte *
+set_brk(byte *new_val);
@@ -213,15 +222,34 @@
+ * ALLOC REPLACEMENT
+ */
+
+bool
+alloc_entering_replace_routine(app_pc pc);
+
+bool
+alloc_replace_in_cur_arena(byte *addr);
+
+/***************************************************************************
+/* called for each live malloc chunk at process exit */
+/* called when malloc chunk data is being free so user data can also be freed */
+
+/* called when a malloc is being moved to a free list. the stored user
+ * data is replaced with the return value.
+ * only called when replacing rather than wrapping malloc.
+ */
+void *
+client_malloc_data_to_free_list(void *cur_data, dr_mcontext_t *mc, app_pc post_call);
--- common/alloc_private.h 9f1df662fe27fd4fb700f723f512855920ed6969
+++ common/alloc_private.h f6dc6910105f7a2bd25cd93732b95ece28a1c3d3
@@ -29,6 +29,179 @@
+ * MALLOC ROUTINE TYPES
+ */
+
+typedef enum {+ /* For Linux and for Cygwin, and for any other allocator connected via
+ * a to-be-implemented API (PR 406756)
+ */
+ /* Typically only one of these size routines is provided */
+ HEAP_ROUTINE_SIZE_USABLE,
+ HEAP_ROUTINE_SIZE_REQUESTED,
+ HEAP_ROUTINE_MALLOC,
+ HEAP_ROUTINE_REALLOC,
+ HEAP_ROUTINE_FREE,
+ /* BSD libc calloc simply calls malloc and then zeroes out
+ * the resulting memory: thus, nothing special for us to watch.
+ * But glibc calloc does its own allocating.
+ */
+ HEAP_ROUTINE_CALLOC,
+ HEAP_ROUTINE_POSIX_MEMALIGN,
+ HEAP_ROUTINE_MEMALIGN,
+ HEAP_ROUTINE_VALLOC,
+ HEAP_ROUTINE_PVALLOC,
+ /* On Windows, we must watch debug operator delete b/c it reads
+ * malloc's headers (i#26). On both platforms we want to watch
+ * the operators to find mismatches (i#123).
+ */
+ HEAP_ROUTINE_NEW,
+ HEAP_ROUTINE_NEW_ARRAY,
+ HEAP_ROUTINE_DELETE,
+ HEAP_ROUTINE_DELETE_ARRAY,
+ /* Group label for routines that might read heap headers but
+ * need no explicit argument modification
+ */
+ HEAP_ROUTINE_STATS,
+ /* Group label for un-handled routine */
+ HEAP_ROUTINE_NOT_HANDLED,
+ /* Should collapse these two once have aligned-malloc routine support */
+ HEAP_ROUTINE_NOT_HANDLED_NOTIFY,
+#ifdef LINUX
+ HEAP_ROUTINE_LAST = HEAP_ROUTINE_NOT_HANDLED_NOTIFY,
+#else
+ /* Debug CRT routines, which take in extra params */
+ HEAP_ROUTINE_SIZE_REQUESTED_DBG,
+ HEAP_ROUTINE_MALLOC_DBG,
+ HEAP_ROUTINE_REALLOC_DBG,
+ HEAP_ROUTINE_FREE_DBG,
+ HEAP_ROUTINE_CALLOC_DBG,
+ /* Free wrapper used in place of real delete or delete[] operators (i#722,i#655) */
+ HEAP_ROUTINE_DebugHeapDelete,
+ /* To avoid debug CRT checks (i#51) */
+ HEAP_ROUTINE_SET_DBG,
+ HEAP_ROUTINE_DBG_NOP,
+ /* FIXME PR 595798: for cygwin allocator we have to track library call */
+ HEAP_ROUTINE_SBRK,
+ HEAP_ROUTINE_LAST = HEAP_ROUTINE_SBRK,
+ /* The primary routines we hook are the Rtl*Heap routines, in addition
+ * to malloc routines in each library since some either do their own
+ * internal parceling (PR 476805) or add padding for debug purposes
+ * which we want to treat as unaddressable (DRi#284)
+ */
+ RTL_ROUTINE_MALLOC,
+ RTL_ROUTINE_REALLOC,
+ RTL_ROUTINE_FREE,
+ RTL_ROUTINE_VALIDATE,
+ RTL_ROUTINE_SIZE,
+ RTL_ROUTINE_CREATE,
+ RTL_ROUTINE_DESTROY,
+ RTL_ROUTINE_GETINFO,
+ RTL_ROUTINE_SETINFO,
+ RTL_ROUTINE_SETFLAGS,
+ RTL_ROUTINE_HEAPINFO,
+ RTL_ROUTINE_CREATE_ACTCXT, /* for csrss-allocated memory: i#352 */
+ RTL_ROUTINE_LOCK,
+ RTL_ROUTINE_UNLOCK,
+ RTL_ROUTINE_QUERY,
+ RTL_ROUTINE_NYI,
+ RTL_ROUTINE_SHUTDOWN,
+ RTL_ROUTINE_LAST = RTL_ROUTINE_SHUTDOWN,
+#endif
+ HEAP_ROUTINE_COUNT,
+} routine_type_t;
+
+static inline bool
+is_size_routine(routine_type_t type)
+{+ return (type == HEAP_ROUTINE_SIZE_USABLE || type == HEAP_ROUTINE_SIZE_REQUESTED
+ IF_WINDOWS(|| type == RTL_ROUTINE_SIZE
+ || type == HEAP_ROUTINE_SIZE_REQUESTED_DBG));
+}
+
+static inline bool
+is_size_requested_routine(routine_type_t type)
+{+ return (type == HEAP_ROUTINE_SIZE_REQUESTED
+ IF_WINDOWS(|| type == RTL_ROUTINE_SIZE
+ || type == HEAP_ROUTINE_SIZE_REQUESTED_DBG));
+}
+
+static inline bool
+is_free_routine(routine_type_t type)
+{+ return (type == HEAP_ROUTINE_FREE
+ IF_WINDOWS(|| type == RTL_ROUTINE_FREE || type == HEAP_ROUTINE_FREE_DBG));
+}
+
+static inline bool
+is_malloc_routine(routine_type_t type)
+{+ return (type == HEAP_ROUTINE_MALLOC
+ IF_WINDOWS(|| type == RTL_ROUTINE_MALLOC|| type == HEAP_ROUTINE_MALLOC_DBG));
+}
+
+static inline bool
+is_realloc_routine(routine_type_t type)
+{+ return (type == HEAP_ROUTINE_REALLOC
+ IF_WINDOWS(|| type == RTL_ROUTINE_REALLOC|| type == HEAP_ROUTINE_REALLOC_DBG));
+}
+
+static inline bool
+is_calloc_routine(routine_type_t type)
+{+ return (type == HEAP_ROUTINE_CALLOC IF_WINDOWS(|| type == HEAP_ROUTINE_CALLOC_DBG));
+}
+
+static inline bool
+is_new_routine(routine_type_t type)
+{+ return (type == HEAP_ROUTINE_NEW || type == HEAP_ROUTINE_NEW_ARRAY);
+}
+
+static inline bool
+is_delete_routine(routine_type_t type)
+{+ return (type == HEAP_ROUTINE_DELETE || type == HEAP_ROUTINE_DELETE_ARRAY);
+}
+
+/***************************************************************************
+ * Malloc tracking API
+ */
+
+struct _alloc_routine_entry_t;
+typedef struct _alloc_routine_entry_t alloc_routine_entry_t;
+
+typedef struct _malloc_interface_t {+ void (*malloc_lock)(void);
+ void (*malloc_unlock)(void);
+ app_pc (*malloc_end)(app_pc start);
+ void (*malloc_add)(app_pc start, app_pc end, app_pc real_end, bool pre_us,
+ uint client_flags, dr_mcontext_t *mc, app_pc post_call);
+ bool (*malloc_is_pre_us)(app_pc start);
+ bool (*malloc_is_pre_us_ex)(app_pc start, bool ok_if_invalid);
+ ssize_t (*malloc_size)(app_pc start);
+ ssize_t (*malloc_size_invalid_only)(app_pc start);
+ void * (*malloc_get_client_data)(app_pc start);
+ uint (*malloc_get_client_flags)(app_pc start);
+ bool (*malloc_set_client_flag)(app_pc start, uint client_flag);
+ bool (*malloc_clear_client_flag)(app_pc start, uint client_flag);
+ void (*malloc_iterate)(malloc_iter_cb_t cb, void *iter_data);
+ void (*malloc_intercept)(app_pc pc, routine_type_t type, alloc_routine_entry_t *e);
+ void (*malloc_unintercept)(app_pc pc, routine_type_t type, alloc_routine_entry_t *e);
+} malloc_interface_t;
+
+extern malloc_interface_t malloc_interface;
+
+/* XXX i#882: remove from header once malloc replacement replaces operators */
+void
+malloc_wrap__intercept(app_pc pc, routine_type_t type, alloc_routine_entry_t *e);
+
+void
+malloc_wrap__unintercept(app_pc pc, routine_type_t type, alloc_routine_entry_t *e);
+
+/***************************************************************************
--- common/alloc_replace.c 9f1df662fe27fd4fb700f723f512855920ed6969
+++ common/alloc_replace.c f6dc6910105f7a2bd25cd93732b95ece28a1c3d3
@@ -1,0 +1,1279 @@
+/* **********************************************************
+ * Copyright (c) 2012 Google, Inc. All rights reserved.
+ * **********************************************************/
+
+/* Dr. Memory: the memory debugger
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License, and no later version.
+
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/***************************************************************************
+ * malloc.c: application allocator replacement routines for both
+ * Dr. Memory and Dr. Heapstat
+ */
+
+/* Requirements beyond regular allocator:
+ * + add redzones (configurable)
+ * + delay frees (configurable): thus unlike most allocators we do
+ * not want to re-use a block immediately even with same-size
+ * repeated alloc-free in order to detect use-after-free
+ * + callbacks for custom actions like updating shadow memory
+ * or heap profiling
+ * + provide iterator over all chunks
+ * + given pointer, know whether the start of a live chunk,
+ * the start of a freed chunk, or neither
+ * + store both requested size and allocated size
+ * + store type: malloc, new or new[]
+ * + store custom flags per chunk (for use during leak scan)
+ * + store callstack
+ * + optional: given pointer, know whether *inside* a live chunk,
+ * a freed chunk, or neither. required during leak scan, but can
+ * build new data structure at that point.
+ * nice-to-have when reporting neighbors of unaddr, and can
+ * use shadow mem heuristics instead.
+ *
+ * Differences vs wrap-based implementation wrt client_ callouts:
+ * + redzones are built-in rather than added by the client, to
+ * facilitate both storing headers in them and sharing adjacent
+ * + delay free lists are built-in rather than maintained by client
+ *
+ * Design:
+ * + for !alloc_ops.external_headers, header sits inside redzone;
+ * for alloc_ops.external_headers, header is in a hashtable
+ * + redzones are shared among adjacent allocs:
+ *
+ * | request sz| | redzone size | request size | | redzone size |
+ * | app chunk | pad | redzone | header | app chunk |pad| redzone | header |
+ * ^
+ * arena_next _|
+ *
+ * + arena_next always has a redzone + header space (if co-located, i.e.,
+ * !alloc_ops.external_headers) to its left
+ * + free lists are kept in buckets by size. larger is preferred over
+ * searching. final bucket is var-sized and is always searched.
+ * frees are appended to make the lists FIFO for better delaying
+ * (though worse alloc re-use), and searches start at the front and
+ * take the first fit.
+ * we can add fancier algorithms in the future.
+ * + for alloc_ops.external_headers, free list entries are allocated
+ * externally and point at their heap chunks
+ */
+
+#include "dr_api.h"
+#include "drwrap.h"
+#include "utils.h"
+#include "alloc.h"
+#include "alloc_private.h"
+#include "heap.h"
+#include <string.h> /* memcpy */
+
+#ifdef LINUX
+/* FIXME DRi#199: use syscall and notify DR instead of using libc */
+# define __USE_GNU /* for mremap */
+# include <sys/mman.h>
+#endif
+
+/***************************************************************************
+ * header and free list data structures
+ */
+
+#define CHUNK_ALIGNMENT 8
+#define CHUNK_MIN_SIZE 8
+#define CHUNK_MIN_MMAP 128*1024
+/* initial commit has to hold at least one non-mmap chunk */
+#define ARENA_INITIAL_COMMIT CHUNK_MIN_MMAP
+#define ARENA_INITIAL_SIZE 4*1024*1024
+
+/* we only support allocation sizes under 4GB */
+typedef uint heapsz_t;
+
+/* each free list bucket contains freed chunks of at least its bucket size
+ * XXX: add stats on searches to help in tuning these
+ */
+static const uint free_list_sizes[] = {+ 8, 16, 24, 32, 40, 64, 96, 128, 192, 256, 384, 512, 1024, 2048, 4096
+};
+#define NUM_FREE_LISTS (sizeof(free_list_sizes)/sizeof(free_list_sizes[0]))
+
+enum {+ CHUNK_FREED = MALLOC_RESERVED_1,
+ CHUNK_MMAP = MALLOC_RESERVED_2,
+ /* MALLOC_RESERVED_{3,4} are used for types */+ CHUNK_PRE_US = MALLOC_RESERVED_5,
+ /* to support iteration */
+ CHUNK_ARENA_FINAL = MALLOC_RESERVED_6,
+ /* MALLOC_RESERVED_7 could be used to indicate presence of prev
+ * free chunk for coalescing
+ */
+};
+
+#define HEADER_MAGIC 0x5244 /* "DR" */
+
+/* This header struct is used in both a traditional co-located header
+ * and as a hashtable payload (for alloc_ops.external_headers). Note
+ * that when using redzones there's no problem with a large header as
+ * it sits inside the redzone. But with the hashtable, and for
+ * Dr. Heapstat where we have no redzone, we want to make the header
+ * as compact as is reasonable.
+ */
+typedef struct _chunk_header_t {+ /* if we wanted to save space we could hand out sizes only equal to the buckets
+ * and remove one of these. we'd use a separate header for the largest bucket
+ * that had the alloc_size.
+ */
+ heapsz_t request_size;
+ heapsz_t alloc_size;
+ ushort flags;
+ ushort magic;
+#ifdef X64
+ /* compiler will add anyawy: just making explicit. we need the header
+ * size to be aligned to 8 so we can't pack. for alloc_ops.external_headers
+ * we eat this overhead to provide runtime flexibility w/ the same
+ * data struct as we don't need it there.
+ */
+ uint pad;
+#endif
+ void *user_data;
+} chunk_header_t;
+
+#define HEADER_SIZE sizeof(chunk_header_t)
+
+/* if redzone is too small, header sticks beyond it */
+static heapsz_t header_beyond_redzone;
+
+/* free list header for both regular and var-size chunk. each chunk
+ * is at least 8 bytes so we can fit both the next pointer and the
+ * only-used-for-alloc_ops.external_headers chunk pointer, simplifying
+ * the code by having one header type.
+ *
+ * FIXME: for x64 chunk ptr doesn't fit: so either need a separate
+ * struct used for hashtable only that has the chunk ptr, or need
+ * to set CHUNK_MIN_SIZE to 16 for x64
+ */
+typedef struct _free_header_t {+ chunk_header_t head;
+ struct _free_header_t *next;
+ byte *chunk; /* only used for alloc_ops.external_headers */
+} free_header_t;
+
+/* a normal free list can be LIFO, but for more effective delayed frees
+ * we want FIFO. FIFO-per-bucket-size is sufficient.
+ */
+/* FIXME i#794: add rbtree and query routine for overlapping delayed free */
+static free_header_t *free_list_front[NUM_FREE_LISTS];
+static free_header_t *free_list_last[NUM_FREE_LISTS];
+
+/* counters for delayed frees. protected by malloc lock. */
+static uint delayed_chunks;
+static size_t delayed_bytes;
+
+static void *allocator_lock;
+
+#ifdef LINUX
+/* we assume we're the sole users of the brk (after pre-us allocs) */
+static byte *pre_us_brk;
+static byte *cur_brk;
+#endif
+
+/* these describe the current heap arena */
+static byte *arena_start;
+/* the end of reserved memory in the current heap arena */
+static byte *arena_next;
+static byte *arena_commit_end;
+static byte *arena_reserve_end;
+/* the furthest chunk in the current heap arena */
+static chunk_header_t *last_chunk;
+
+/* For handling pre-us mallocs for non-earlist injection or delayed/attach
+ * instrumentation. Contains chunk_header_t entries.
+ * We assume this table is only added to at init and only removed from
+ * at exit time and thus needs no external lock.
+ */
+#define PRE_US_TABLE_HASH_BITS 8
+static hashtable_t pre_us_table;
+
+/* XXX i#879: for pattern mode we don't want co-located headers and
+ * instead want a hashtable of live allocs (free are in free lists
+ * and/or rbtree).
+ * Cleaner to have own table here and not try to use the alloc.c malloc-wrap table
+ * though we do want the same hash tuning.
+ */
+
+/***************************************************************************
+ * utility routines
+ */
+
+static void *
+enter_client_code(void)
+{+ void *drcontext = dr_get_current_drcontext();
+ /* while we are using the app's stack and registers, we need to
+ * switch to the private peb/teb to avoid asserts in symbol
+ * routines.
+ * XXX: is it safe to do away w/ this and relax the asserts?
+ */
+ dr_switch_to_dr_state(drcontext);
+ return drcontext;
+}
+
+static void
+exit_client_code(void *drcontext)
+{+ dr_switch_to_app_state(drcontext);
+}
+
+static void
+initialize_mcontext_for_report(dr_mcontext_t *mc)
+{+ /* assumption: we only need xsp and xbp initialized */
+ mc->size = sizeof(*mc);
+ mc->flags = DR_MC_CONTROL | DR_MC_INTEGER;
+ /* FIXME i#794: add asm support and asm routine to get xsp and xbp:
+ * get_stack_registers(&mc->xsp, &mc->xbp);
+ * I don't see any cl intrinsic to get xbp (gcc has one): if there were
+ * could assume these routines don't have FPO and set xsp=xbp
+ */
+ mc->xsp = 0;
+ mc->xbp = 0;
+}
+
+static byte *
+os_large_alloc(size_t commit_size _IF_WINDOWS(size_t reserve_size))
+{+ /* FIXME DRi#199: how notify DR about app mem alloc?
+ * provide general raw_syscall() interface,
+ * or dr_mmap_as_app() or sthg.
+ * for now using libc call...
+ */
+ ASSERT(ALIGNED(commit_size, PAGE_SIZE), "must align to at least page size");
+#ifdef LINUX
+ /* N.B.: mmap uses too many args for sysenter so we avoid the DR
+ * post-vsyscall hook in this particular case
+ */
+ byte *map = mmap(NULL, commit_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ /* yeah libc returns -1 but this will work w/ syscall too */
+ if ((ptr_int_t)map < 0 && (ptr_int_t)map > -PAGE_SIZE)
+ return NULL;
+ return map;
+#else
+ /* FIXME i#794: Windows NYI */
+ ASSERT(ALIGNED(reserve_size, PAGE_SIZE), "must align to at least page size");
+ return NULL;
+#endif
+}
+
+static bool
+os_large_alloc_extend(byte *map, size_t cur_commit_size, size_t new_commit_size)
+{+ ASSERT(ALIGNED(cur_commit_size, PAGE_SIZE), "must align to at least page size");
+ ASSERT(ALIGNED(new_commit_size, PAGE_SIZE), "must align to at least page size");
+#ifdef LINUX
+ byte *newmap = mremap(map, cur_commit_size, new_commit_size, 0/*can't move*/);
+ if ((ptr_int_t)newmap < 0 && (ptr_int_t)newmap > -PAGE_SIZE)
+ return false;
+ return true;
+#else
+ /* FIXME i#794: Windows NYI */
+ return false;
+#endif
+}
+
+static bool
+os_large_free(byte *map, size_t map_size)
+{+#ifdef LINUX
+ int success;
+ ASSERT(ALIGNED(map, PAGE_SIZE), "invalid mmap base");
+ ASSERT(ALIGNED(map_size, PAGE_SIZE), "invalid mmap size");
+ success = munmap(map, map_size);
+ return (success == 0);
+#else
+ /* FIXME i#794: Windows NYI */
+ return false;
+#endif
+}
+
+static void
+notify_client_alloc(bool call_handle, void *drcontext, byte *ptr,
+ chunk_header_t *head, dr_mcontext_t *mc,
+ bool zeroed, bool realloc, app_pc caller)
+{+ head->user_data = client_add_malloc_pre(ptr, ptr + head->request_size,
+ ptr + head->alloc_size,
+ head->user_data, mc, caller);
+ client_add_malloc_post(ptr, ptr + head->request_size,
+ ptr + head->alloc_size, head->user_data);
+ if (call_handle) {+ ASSERT(drcontext != NULL, "invalid arg");
+ client_handle_malloc(drcontext, ptr, head->request_size,
+ /* XXX: pattern wants us to subtract redzone
+ * size for real_base but that would result in it clobbering
+ * our header: so we're just incompatible w/ pattern mode
+ * (checked up front in alloc_ops.c).
+ * xref i#879 on an allocator for pattern mode.
+ */
+ ptr, head->alloc_size, zeroed, realloc, mc);
+ }
+}
+
+/***************************************************************************
+ * core allocation routines
+ */
+
+static inline chunk_header_t *
+header_from_ptr(void *ptr)
+{+ if (alloc_ops.external_headers) {+ /* XXX i#879: hashtable lookup */
+ ASSERT(false, "NYI");
+ return NULL;
+ } else {+ if ((ptr_uint_t)ptr < HEADER_SIZE)
+ return NULL;
+ else
+ return (chunk_header_t *) ((byte *)ptr - HEADER_SIZE);
+ }
+}
+
+/* Pass in result of header_from_ptr() as 2nd arg, but don't de-reference it!
+ * Returns true for both live mallocs and chunks in delay free lists
+ */
+static inline bool
+is_valid_chunk(void *ptr, chunk_header_t *head)
+{+ /* Note that we can't be sure w/o using a hashtable, but for performance
+ * it's worth it to risk not identifying an invalid free so we use
+ * heuristics.
+ * XXX improvements:
+ * + should we have an option of using a hashtable to be sure,
+ * even when !alloc_ops.external_headers?
+ * app corrupting our allocator would be bad.
+ * + check whether in heap memory region(s) if that's cheap: if
+ * need rbtree lookup then don't
+ * + could check that next header is a real header, or at end of arena
+ * + could have client_ callout that checks shadow memory
+ */
+ if (alloc_ops.external_headers) {+ /* XXX i#879: need to look in delay free rbtree too */
+ return head != NULL;
+ } else {+ /* XXX: we don't want to crash de-referencing head, but
+ * a TRY here has a noticeable perf hit! live w/ risk of app
+ * crashing our allocator? have a top-level crash handler
+ * that bails out w/ an error report about invalid arg?
+ * note that we do have a TRY in malloc_replace_size() (b/c rest of
+ * drmem passes us bad pointers during neighbor discovery)
+ * which should be removed if we put a TRY here.
+ */
+ return (ptr != NULL &&
+ ALIGNED(ptr, CHUNK_ALIGNMENT) &&
+ head->magic == HEADER_MAGIC);
+ }
+}
+
+static bool
+is_live_alloc(void *ptr, chunk_header_t *head)
+{+ if (alloc_ops.external_headers) {+ return head != NULL;
+ } else {+ return (is_valid_chunk(ptr, head) &&
+ !TEST(CHUNK_FREED, head->flags));
+ }
+}
+
+static bool
+arena_extend(heapsz_t add_size)
+{+ heapsz_t aligned_add = (heapsz_t) ALIGN_FORWARD(add_size, PAGE_SIZE);
+#ifdef LINUX
+ if (arena_commit_end == cur_brk) {+ byte *new_brk = set_brk(cur_brk + aligned_add);
+ if (new_brk >= cur_brk + add_size) {+ LOG(2, "\tincreased brk from "PFX" to "PFX"\n", cur_brk, new_brk);
+ cur_brk = new_brk;
+ arena_commit_end = new_brk;
+ heap_region_adjust(arena_start, new_brk);
+ return true;
+ } else
+ LOG(1, "brk cannot expand: switching to mmap\n");
+ } else
+#else
+ if (arena_commit_end + aligned_add <= arena_reserve_end)
+#endif
+ { /* here to not confuse brace matching */+ size_t cur_size = arena_commit_end - arena_start;
+ size_t new_size = cur_size + aligned_add;
+ if (os_large_alloc_extend(arena_start, cur_size, new_size)) {+ arena_commit_end = arena_start + new_size;
+#ifdef LINUX /* windows already added whole reservation */
+ heap_region_adjust(arena_start, arena_start + new_size);
+#endif
+ return true;
+ }
+ }
+ /* XXX: add stranded space at end of arena to free list: but have to
+ * update last_chunk properly
+ */
+ LOG(1, "cur arena "PFX"-"PFX" out of space: creating new one\n",
+ arena_start, arena_reserve_end);
+ arena_start = os_large_alloc(IF_WINDOWS_(ARENA_INITIAL_COMMIT) ARENA_INITIAL_SIZE);
+ if (arena_start == NULL)
+ return false;
+#ifdef LINUX
+ arena_commit_end = arena_start + ARENA_INITIAL_SIZE;
+#else
+ arena_commit_end = arena_start + ARENA_INITIAL_COMMIT;
+#endif
+ arena_reserve_end = arena_start + ARENA_INITIAL_SIZE;
+ heap_region_add(arena_start, arena_reserve_end, HEAP_ARENA, NULL);
+ /* need to start with a redzone */
+ arena_next = arena_start + alloc_ops.redzone_size + header_beyond_redzone;
+ return true;
+}
+
+static chunk_header_t *
+search_free_list_bucket(heapsz_t aligned_size, uint bucket)
+{+ /* search for large enough chunk */
+ free_header_t *cur, *prev;
+ chunk_header_t *head = NULL;
+ ASSERT(dr_recurlock_self_owns(allocator_lock), "caller must hold lock");
+ ASSERT(bucket < NUM_FREE_LISTS, "invalid param");
+ for (cur = free_list_front[bucket], prev = NULL;
+ cur != NULL && cur->head.alloc_size < aligned_size;
+ prev = cur, cur = cur->next)
+ ; /* nothing */
+ if (cur != NULL) {+ if (prev == NULL)
+ free_list_front[bucket] = cur->next;
+ else
+ prev->next = cur->next;
+ if (cur == free_list_last[bucket])
+ free_list_last[bucket] = prev;
+ head = (chunk_header_t *) cur;
+ }
+ return head;
+}
+
+static chunk_header_t *
+find_free_list_entry(heapsz_t request_size, heapsz_t aligned_size)
+{+ chunk_header_t *head = NULL;
+ uint bucket;
+ ASSERT(dr_recurlock_self_owns(allocator_lock), "caller must hold lock");
+
+ /* don't use free list unless we hit max delay */
+ if (delayed_chunks < alloc_ops.delay_frees &&
+ delayed_bytes < alloc_ops.delay_frees_maxsz)
+ return NULL;
+
+ /* b/c we're delaying, we're not able to re-use a just-freed chunk.
+ * thus we go for time over space and use the guaranteed-size bucket
+ * before searching the maybe-big-enough bucket.
+ */
+ for (bucket = 0;
+ bucket < NUM_FREE_LISTS - 1 && aligned_size > free_list_sizes[bucket];
+ bucket++)
+ ; /* nothing */
+ if (free_list_front[bucket] == NULL && bucket > 0 &&
+ aligned_size < free_list_sizes[bucket]) {+ /* next-bigger is not avail: search maybe-big-enough bucket before
+ * possibly going to even bigger buckets
+ */
+ bucket--;
+ head = search_free_list_bucket(aligned_size, bucket);
+ if (head == NULL)
+ bucket++;
+ }
+
+ /* if delay frees are piling up, use a larger bucket to avoid
+ * delaying a ton of allocs of a certain size and never re-using
+ * them for pathological app alloc sequences
+ */
+ if (head == NULL && free_list_front[bucket] == NULL &&
+ (delayed_chunks >= 2*alloc_ops.delay_frees ||
+ delayed_bytes >= 2*alloc_ops.delay_frees_maxsz)) {+ LOG(2, "\tallocating from larger bucket size to reduce delayed frees\n");
+ while (bucket < NUM_FREE_LISTS - 1 && free_list_front[bucket] == NULL)
+ bucket++;
+ }
+
+ if (head == NULL && free_list_front[bucket] != NULL) {+ if (bucket == NUM_FREE_LISTS - 1) {+ /* var-size bucket: have to search */
+ head = search_free_list_bucket(aligned_size, bucket);
+ } else {+ /* guaranteed to be big enough so take from front */
+ ASSERT(aligned_size <= free_list_sizes[bucket], "logic error");
+ head = (chunk_header_t *) free_list_front[bucket];
+ free_list_front[bucket] = free_list_front[bucket]->next;
+ if (head == (chunk_header_t *) free_list_last[bucket])
+ free_list_last[bucket] = free_list_front[bucket];
+ }
+ }
+
+ if (head != NULL) {+ LOG(2, "\tusing free list size=%d for request=%d align=%d from bucket %d\n",
+ head->alloc_size, request_size, aligned_size, bucket);
+ ASSERT(delayed_chunks > 0, "delay counter off");
+ delayed_chunks--;
+ ASSERT(delayed_bytes >= head->alloc_size, "delay bytes counter off");
+ delayed_bytes -= head->alloc_size;
+ client_malloc_data_free(head->user_data);
+ head->flags &= ~CHUNK_FREED;
+ }
+ return head;
+}
+
+static byte *
+replace_alloc_common(size_t request_size, bool zeroed, bool realloc,
+ void *drcontext, dr_mcontext_t *mc, app_pc caller)
+{+ heapsz_t aligned_size;
+ byte *res = NULL;
+ chunk_header_t *head = NULL;
+
+ if (request_size > UINT_MAX) {+ /* rather than have larger headers for 64-bit we just don't support
+ * enormous allocations
+ */
+ client_handle_alloc_failure(request_size, zeroed, realloc, caller, mc);
+ return NULL;
+ }
+
+ aligned_size = ALIGN_FORWARD(request_size, CHUNK_ALIGNMENT);
+ if (aligned_size < CHUNK_MIN_SIZE)
+ aligned_size = CHUNK_MIN_SIZE;
+
+ /* XXX: use per-thread free lists to avoid lock in common case */
+ dr_recurlock_lock(allocator_lock);
+
+ /* for large requests we do direct mmap with own redzones.
+ * we use the large malloc table to track them for iteration.
+ * XXX: for simplicity, not delay-freeing these for now
+ */
+ if (aligned_size + HEADER_SIZE >= CHUNK_MIN_MMAP) {+ size_t map_size = (size_t)
+ ALIGN_FORWARD(aligned_size + alloc_ops.redzone_size*2 +
+ header_beyond_redzone, PAGE_SIZE);
+ byte *map = os_large_alloc(map_size _IF_WINDOWS(map_size));
+ LOG(2, "\tlarge alloc %d => mmap\n", request_size);
+ if (map == NULL) {+ client_handle_alloc_failure(request_size, zeroed, realloc, caller, mc);
+ return NULL;
+ }
+ ASSERT(!alloc_ops.external_headers, "NYI");
+ head = (chunk_header_t *) (map + alloc_ops.redzone_size +
+ header_beyond_redzone - HEADER_SIZE);
+ head->flags |= CHUNK_MMAP;
+ head->magic = HEADER_MAGIC;
+ head->alloc_size = map_size - alloc_ops.redzone_size*2 - header_beyond_redzone;
+ heap_region_add(map, map + map_size, 0, mc);
+ } else {+ /* look for free list entry */
+ head = find_free_list_entry(request_size, aligned_size);
+ }
+
+ /* if no free list entry, get new memory */
+ if (head == NULL) {+ heapsz_t add_size = aligned_size + alloc_ops.redzone_size + header_beyond_redzone;
+ if (arena_next + add_size > arena_commit_end) {+ if (!arena_extend(add_size)) {+ client_handle_alloc_failure(request_size, zeroed, realloc, caller, mc);
+ return NULL;
+ }
+ }
+ /* remember that arena_next always has a redzone preceding it */
+ head = (chunk_header_t *) (arena_next - HEADER_SIZE);
+ LOG(2, "\tcarving out new chunk @"PFX"\n", head);
+ head->alloc_size = aligned_size;
+ head->magic = HEADER_MAGIC;
+ head->user_data = NULL; /* b/c we pass the old to client */
+ head->flags = 0;
+ arena_next += add_size;
+ /* ensure we know where to stop when iterating */
+ if (last_chunk != NULL)
+ last_chunk->flags &= ~CHUNK_ARENA_FINAL;
+ head->flags |= CHUNK_ARENA_FINAL;
+ last_chunk = head;
+ }
+
+ /* head->alloc_size, head->magic, and head->flags (except type) are already set */
+ ASSERT(head->magic == HEADER_MAGIC, "corrupted header");
+ head->request_size = request_size;
+ /* FIXME i#794: need to pass in TLS to get type since still wrapping.
+ * XXX i#882: replace operators.
+ * Need to move the MALLOC_ALLOCATOR_* defines to alloc_private.h.
+ */
+ res = (byte *)(head + 1);
+ LOG(2, "\treplace_alloc_common request=%d, alloc=%d => "PFX"\n",
+ head->request_size, head->alloc_size, res);
+
+ ASSERT(head->alloc_size >= request_size, "chunk too small");
+
+ notify_client_alloc(true/*handle*/, drcontext, (byte *)res, head, mc,
+ zeroed, realloc, caller);
+
+ if (head->request_size >= LARGE_MALLOC_MIN_SIZE)
+ malloc_large_add(res, request_size);
+
+ dr_recurlock_unlock(allocator_lock);
+
+ return res;
+}
+
+static void
+replace_free_common(void *ptr, void *drcontext, dr_mcontext_t *mc, app_pc caller)
+{+ chunk_header_t *head = header_from_ptr(ptr);
+ free_header_t *cur;
+ uint bucket;
+
+ if (!is_live_alloc(ptr, head)) { /* including NULL */+ /* w/o early inject, or w/ delayed instru, there are allocs in place
+ * before we took over
+ */
+ head = hashtable_lookup(&pre_us_table, (void *)ptr);
+ if (head != NULL && !TEST(CHUNK_FREED, head->flags)) {+ /* XXX: need to call the app's free routine.
+ * Xref DRi#497 for a mechanism to do this; or, we could call
+ * it natively (after swapping TLS back).
+ * For Windows we can assume Rtl since that's where we iterated.
+ * For now we're just leaking these, which we claim is a feature
+ * b/c we'll catch use-after-free :)
+ * FIXME: That's fine for the small # at late inject, but for
+ * attach at a random point that's not good enough: probably
+ * better to free immediately rather than have some extra code
+ * to delay pre-us frees. If we do that we may need an
+ * external table lock.
+ */
+ } else {+ client_invalid_heap_arg(caller, (byte *)ptr, mc,
+ /* XXX: we might be replacing RtlHeapFree or
+ * _free_dbg but it's not worth trying to
+ * store the exact name
+ */
+ "free", true/*free*/);
+ }
+ return;
+ }
+
+ dr_recurlock_lock(allocator_lock);
+
+ if (!TEST(CHUNK_MMAP, head->flags))
+ head->flags |= CHUNK_FREED;
+ if (!TESTANY(CHUNK_MMAP | CHUNK_PRE_US, head->flags)) {+ cur = (free_header_t *) head;
+ /* our buckets guarantee that all allocs in that bucket have at least that size */
+ for (bucket = NUM_FREE_LISTS - 1; head->alloc_size < free_list_sizes[bucket];
+ bucket--)
+ ; /* nothing */
+ ASSERT(head->alloc_size >= free_list_sizes[bucket], "bucket invariant violated");
+ LOG(2, "\treplace_free_common "PFX" == request=%d, alloc=%d\n",
+ ptr, head->request_size, head->alloc_size);
+
+ /* add to the end for delayed free FIFO */
+ cur->next = NULL;
+ if (free_list_last[bucket] == NULL) {+ ASSERT(free_list_front[bucket] == NULL, "inconsistent free list");
+ free_list_front[bucket] = cur;
+ } else
+ free_list_last[bucket]->next = cur;
+ free_list_last[bucket] = cur;
+
+ delayed_chunks++;
+ delayed_bytes += head->alloc_size;
+
+ /* XXX: could add more sophisticated features like coalescing adjacent
+ * free entries which we may actually need for apps with corner-case
+ * alloc patterns. We may also want to implement negative sbrk to
+ * give memory back.
+ */
+ }
+
+ /* current model is to throw the data away when we put on free list.
+ * would we ever want to keep the alloc callstack for freed entries,
+ * or we always want to replace w/ free callstack?
+ */
+ client_remove_malloc_pre((byte *)ptr, (byte *)ptr + head->request_size,
+ (byte *)ptr + head->alloc_size, head->user_data);
+ if (TESTANY(CHUNK_MMAP | CHUNK_PRE_US, head->flags)) {+ client_malloc_data_free(head->user_data);
+ head->user_data = NULL;
+ } else
+ head->user_data = client_malloc_data_to_free_list(head->user_data, mc, caller);
+ client_remove_malloc_post((byte *)ptr, (byte *)ptr + head->request_size,
+ (byte *)ptr + head->alloc_size);
+
+ /* we ignore the return value */
+ client_handle_free((byte *)ptr, head->request_size,
+ /* XXX: real_base is regular base for us => no pattern */
+ (byte *)ptr, head->alloc_size,
+ mc, caller, head->user_data _IF_WINDOWS(NULL));
+
+ if (head->request_size >= LARGE_MALLOC_MIN_SIZE && !TEST(CHUNK_PRE_US, head->flags))
+ malloc_large_remove(ptr);
+
+ if (TEST(CHUNK_MMAP, head->flags)) {+ /* see comments in alloc routine about not delaying the free */
+ byte *map = (byte *)ptr - alloc_ops.redzone_size - header_beyond_redzone;
+ size_t map_size = head->alloc_size + alloc_ops.redzone_size*2 +
+ header_beyond_redzone;
+ heap_region_remove(map, map + map_size, mc);
+ if (!os_large_free(map, map_size))
+ ASSERT(false, "munmap failed");
+ }
+
+ dr_recurlock_unlock(allocator_lock);
+}
+
+/***************************************************************************
+ * iterator
+ */
+
+typedef struct _alloc_iter_data_t {+ bool only_live;
+ malloc_iter_cb_t cb;
+ void *data;
+} alloc_iter_data_t;
+
+static bool
+alloc_large_iter_cb(byte *start, size_t size, void *iter_data)
+{+ alloc_iter_data_t *data = (alloc_iter_data_t *) iter_data;
+ chunk_header_t *head = header_from_ptr(start);
+ if (TEST(CHUNK_MMAP, head->flags)) {+ return data->cb(start, start + head->request_size, start + head->alloc_size, false,
+ head->flags & MALLOC_POSSIBLE_CLIENT_FLAGS, head->user_data,
+ data->data);
+ } /* else already covered in main heap walk */
+ return true;
+}
+
+static bool
+alloc_iter_own_arena(byte *start, byte *end, uint flags
+ _IF_WINDOWS(HANDLE heap), void *iter_data)
+{+ alloc_iter_data_t *data = (alloc_iter_data_t *) iter_data;
+ chunk_header_t *head;
+ byte *cur;
+
+ if (TEST(HEAP_PRE_US, flags) || !TEST(HEAP_ARENA, flags))
+ return true;
+
+ LOG(2, "%s: "PFX"-"PFX"\n", __FUNCTION__, start, end);
+ cur = start + alloc_ops.redzone_size + header_beyond_redzone;
+ while (cur < end) {+ head = header_from_ptr(cur);
+ LOG(3, "\tchunk %s "PFX"-"PFX"\n", TEST(CHUNK_FREED, head->flags) ? "freed" : "",
+ (head + 1), (byte *)(head + 1) + head->alloc_size);
+ if (!data->only_live || !TEST(CHUNK_FREED, head->flags)) {+ byte *start = (byte *)(head + 1);
+ if (!data->cb(start, start + head->request_size, start + head->alloc_size,
+ false/*!pre_us*/, head->flags & MALLOC_POSSIBLE_CLIENT_FLAGS,
+ head->user_data, data->data))
+ return false;
+ }
+ /* don't try to walk over un-allocated extra space at end of arena */
+ if (TEST(CHUNK_ARENA_FINAL, head->flags))
+ break;
+ cur += head->alloc_size + alloc_ops.redzone_size + header_beyond_redzone;
+ }
+ return true;
+}
+
+
+static void
+alloc_iterate(malloc_iter_cb_t cb, void *iter_data, bool only_live)
+{+ /* Strategy:
+ * + can iterate arenas via heap rbtree
+ * - each arena of ours can be walked straight through
+ * + ignore pre-us arenas and instead iterate pre_us_table
+ * + for large mallocs can iterate the large_malloc_tree
+ */
+ alloc_iter_data_t data = {only_live, cb, iter_data};+ uint i;
+
+ LOG(2, "%s\n", __FUNCTION__);
+
+ heap_region_iterate(alloc_iter_own_arena, &data);
+
+ malloc_large_iterate(alloc_large_iter_cb, &data);
+
+ /* XXX: should add hashtable_iterate() to drcontainers */
+ for (i = 0; i < HASHTABLE_SIZE(pre_us_table.table_bits); i++) {+ /* we do NOT support removal while iterating. we don't even hold a lock. */
+ hash_entry_t *he;
+ for (he = pre_us_table.table[i]; he != NULL; he = he->next) {+ chunk_header_t *head = (chunk_header_t *) he->payload;
+ byte *start = he->key;
+ if (!only_live || !TEST(CHUNK_FREED, head->flags)) {+ if (!cb(start, start + head->request_size, start + head->alloc_size,
+ true/*pre_us*/, head->flags & MALLOC_POSSIBLE_CLIENT_FLAGS,
+ head->user_data, iter_data))
+ break;
+ }
+ }
+ }
+}
+
+/***************************************************************************
+ * app-facing interface
+ */
+
+void *
+replace_malloc(size_t size)
+{+ void *res;
+ void *drcontext = enter_client_code();
+ dr_mcontext_t mc;
+ /* XXX: should we make mc a debug-only param for perf? */
+ initialize_mcontext_for_report(&mc);
+ LOG(2, "replace_malloc %d\n", size);
+ res = (void *) replace_alloc_common(size, false/*!zeroed*/, false/*!realloc*/,
+ drcontext, &mc, (app_pc)replace_malloc);
+ LOG(2, "\treplace_malloc %d => "PFX"\n", size, res);
+ exit_client_code(drcontext);
+ return res;
+}
+
+void *
+replace_calloc(size_t nmemb, size_t size)
+{+ void *drcontext = enter_client_code();
+ byte *res;
+ dr_mcontext_t mc;
+ initialize_mcontext_for_report(&mc);
+ LOG(2, "replace_calloc %d %d\n", nmemb, size);
+ res = replace_alloc_common(nmemb * size, true/*zeroed*/, false/*!realloc*/,
+ drcontext, &mc, (app_pc)replace_calloc);
+ memset(res, 0, nmemb*size);
+ LOG(2, "\treplace_calloc %d %d => "PFX"\n", nmemb, size, res);
+ exit_client_code(drcontext);
+ return (void *) res;
+}
+
+void *
+replace_realloc(void *ptr, size_t size)
+{+ void *drcontext = enter_client_code();
+ void *res = NULL;
+ dr_mcontext_t mc;
+ chunk_header_t *head = header_from_ptr(ptr);
+ initialize_mcontext_for_report(&mc);
+ LOG(2, "replace_realloc "PFX" %d\n", ptr, size);
+ if (ptr == NULL) {+ client_handle_realloc_null((app_pc)replace_realloc, &mc);
+ res = (void *) replace_alloc_common(size, false/*!zeroed*/, true/*realloc*/,
+ drcontext, &mc, (app_pc)replace_realloc);
+ } else if (size == 0) {+ replace_free_common(ptr, drcontext, &mc, (app_pc)replace_realloc);
+ } else if (!is_live_alloc(ptr, head)) {+ client_invalid_heap_arg((app_pc)replace_realloc, (byte *)ptr, &mc,
+ /* XXX: we might be replacing RtlReallocateHeap or
+ * _realloc_dbg but it's not worth trying to
+ * store the exact name
+ */
+ "realloc", false/*!free*/);
+ } else {+ if (head->alloc_size >= size && !TEST(CHUNK_PRE_US, head->flags)) {+ /* XXX: if shrinking a lot, should free and re-malloc to save space */
+ client_handle_realloc(drcontext, (byte *)ptr, head->request_size,
+ (byte *)ptr, size,
+ /* XXX: real_base is regular base for us => no pattern */
+ (byte *)ptr, &mc);
+ if (head->request_size >= LARGE_MALLOC_MIN_SIZE)
+ malloc_large_remove(ptr);
+ head->request_size = size;
+ if (head->request_size >= LARGE_MALLOC_MIN_SIZE)
+ malloc_large_add(ptr, head->request_size);
+ res = ptr;
+ } else {+ /* XXX: use mremap for mmapped alloc! */
+ /* XXX: if final chunk in arena, extend in-place */
+ res = (void *) replace_alloc_common(size, false/*!zeroed*/, true/*realloc*/,
+ drcontext, &mc, (app_pc)replace_realloc);
+ if (res != NULL) {+ memcpy(res, ptr, head->request_size);
+ replace_free_common(ptr, drcontext, &mc, (app_pc)replace_realloc);
+ }
+ }
+ }
+ LOG(2, "\treplace_realloc %d => "PFX"\n", size, res);
+ exit_client_code(drcontext);
+ return res;
+}
+
+void
+replace_free(void *ptr)
+{+ void *drcontext = enter_client_code();
+ dr_mcontext_t mc;
+ initialize_mcontext_for_report(&mc);
+ LOG(2, "replace_free "PFX"\n", ptr);
+ replace_free_common(ptr, drcontext, &mc, (app_pc)replace_free);
+ exit_client_code(drcontext);
+}
+
+size_t
+replace_malloc_usable_size(void *ptr)
+{+ void *drcontext = enter_client_code();
+ chunk_header_t *head = header_from_ptr(ptr);
+ size_t res;
+ dr_mcontext_t mc;
+ initialize_mcontext_for_report(&mc);
+ LOG(2, "replace_malloc_usable_size "PFX"\n", ptr);
+ if (!is_live_alloc(ptr, head)) {+ client_invalid_heap_arg((app_pc)replace_malloc_usable_size, (byte *)ptr, &mc,
+ IF_WINDOWS_ELSE("_msize", "malloc_usable_size"),+ false/*!free*/);
+ return 0;
+ }
+ res = head->request_size; /* we do not allow using padding */
+ LOG(2, "\treplace_malloc_usable_size "PFX" => "PIFX"\n", ptr, res);
+ exit_client_code(drcontext);
+ return res;
+}
+
+/* XXX i#882: replace operator new/delete known to be non-placement to
+ * avoid wrap cost and to support redzones on debug CRT.
+ * We will also be able to pass in the allocation type rather than
+ * reading it from CLS.
+ */
+
+/* XXX i#94: replace mallopt(), mallinfo(), valloc(), memalign(), etc. */
+
+/***************************************************************************
+ * drmem-facing interface
+ */
+
+#ifdef LINUX
+byte *
+alloc_replace_orig_brk(void)
+{+ ASSERT(alloc_ops.replace_malloc, "shouldn't call");
+ return pre_us_brk;
+}
+#endif
+
+bool
+alloc_replace_in_cur_arena(byte *addr)
+{+ ASSERT(alloc_ops.replace_malloc, "shouldn't call");
+ return (addr >= arena_start && addr < arena_reserve_end);
+}
+
+bool
+alloc_entering_replace_routine(app_pc pc)
+{+ return drwrap_is_replaced_native(pc);
+}
+
+static void *
+func_interceptor(routine_type_t type)
+{+ if (is_malloc_routine(type))
+ return (void *) replace_malloc;
+ else if (is_calloc_routine(type))
+ return (void *) replace_calloc;
+ else if (is_realloc_routine(type))
+ return (void *) replace_realloc;
+ else if (is_free_routine(type))
+ return (void *) replace_free;
+ else if (is_size_routine(type))
+ return (void *) replace_malloc_usable_size;
+ else
+ return NULL;
+}
+
+static void
+malloc_replace__intercept(app_pc pc, routine_type_t type, alloc_routine_entry_t *e)
+{+ void *interceptor = func_interceptor(type);
+ if (interceptor != NULL) {+ if (!drwrap_replace_native(pc, interceptor, false))
+ ASSERT(false, "failed to replace alloc routine");
+ } else {+ /* else wrap: operators in particular.
+ * XXX i#882: replace operators.
+ */
+ /* FIXME i#794: Windows NYI: want to replace
+ * create/destroy/validate/etc., along with all other
+ * heap-related routines currenly not intercepted, w/ nops
+ */
+ malloc_wrap__intercept(pc, type, e);
+ }
+}
+
+static void
+malloc_replace__unintercept(app_pc pc, routine_type_t type, alloc_routine_entry_t *e)
+{+ void *interceptor = func_interceptor(type);
+ if (interceptor != NULL) {+ if (!drwrap_replace_native(pc, NULL, true))
+ ASSERT(false, "failed to un-replace alloc routine");
+ } else {+ malloc_wrap__unintercept(pc, type, e);
+ }
+}
+
+static void
+malloc_replace__add(app_pc start, app_pc end, app_pc real_end,
+ bool pre_us, uint client_flags, dr_mcontext_t *mc, app_pc post_call)
+{+ IF_DEBUG(bool new_entry;)
+ chunk_header_t *head = global_alloc(sizeof(*head), HEAPSTAT_HASHTABLE);
+ head->request_size = (end - start);
+ if (head->request_size >= LARGE_MALLOC_MIN_SIZE)
+ malloc_large_add(start, head->request_size);
+ head->alloc_size = (real_end - start);
+ head->flags = CHUNK_PRE_US;
+ head->magic = HEADER_MAGIC;
+ head->user_data = NULL;
+ /* we assume only called for pre_us and only during init when no lock is needed */
+ ASSERT(pre_us, "malloc add from outside must be pre_us");
+ IF_DEBUG(new_entry =)
+ hashtable_add(&pre_us_table, (void *)start, (void *)head);
+ ASSERT(new_entry, "should be no pre-us dups");
+ notify_client_alloc(false/*no handle: caller can do that on its own*/,
+ NULL, start, head, mc,
+ false/*zeroed? dunno*/, false/*!realloc*/, post_call);
+}
+
+static bool
+malloc_replace__is_pre_us_ex(app_pc start, bool ok_if_invalid)
+{+ /* see notes up top about not needing an external lock */
+ chunk_header_t *head = hashtable_lookup(&pre_us_table, (void *)start);
+ return (head != NULL && (ok_if_invalid || !TEST(CHUNK_FREED, head->flags)));
+}
+
+static bool
+malloc_replace__is_pre_us(app_pc start)
+{+ return malloc_replace__is_pre_us_ex(start, false);
+}
+
+static app_pc
+malloc_replace__end(app_pc start)
+{+ chunk_header_t *head = header_from_ptr(start);
+ if (!is_live_alloc(start, head))
+ return NULL;
+ else
+ return start + head->request_size;
+}
+
+/* Returns -1 on failure */
+static ssize_t
+malloc_replace__size(app_pc start)
+{+ chunk_header_t *head = header_from_ptr(start);
+ /* avoid crashing when drmem does neighbor discovery queries.
+ * see comment under is_valid_chunk() on why TRY isn't up there.
+ */
+ ssize_t res = -1;
+ DR_TRY_EXCEPT(dr_get_current_drcontext(), {+ if (is_live_alloc(start, head))
+ res = head->request_size;
+ }, { /* EXCEPT */+ res = -1;
+ });
+ return res;
+}
+
+static ssize_t
+malloc_replace__size_invalid_only(app_pc start)
+{+ chunk_header_t *head = header_from_ptr(start);
+ if (!is_valid_chunk(start, head) || !TEST(CHUNK_FREED, head->flags))
+ return -1;
+ else
+ return head->request_size;
+}
+
+static void *
+malloc_replace__get_client_data(app_pc start)
+{+ chunk_header_t *head = header_from_ptr(start);
+ if (!is_valid_chunk(start, head))
+ return NULL;
+ return head->user_data;
+}
+
+static uint
+malloc_replace__get_client_flags(app_pc start)
+{+ chunk_header_t *head = header_from_ptr(start);
+ if (!is_valid_chunk(start, head))
+ return 0;
+ return (head->flags & MALLOC_POSSIBLE_CLIENT_FLAGS);
+}
+
+static bool
+malloc_replace__set_client_flag(app_pc start, uint client_flag)
+{+ chunk_header_t *head = header_from_ptr(start);
+ if (!is_valid_chunk(start, head))
+ return false;
+ head->flags |= (client_flag & MALLOC_POSSIBLE_CLIENT_FLAGS);
+ return true;
+}
+
+static bool
+malloc_replace__clear_client_flag(app_pc start, uint client_flag)
+{+ chunk_header_t *head = header_from_ptr(start);
+ if (!is_valid_chunk(start, head))
+ return false;
+ head->flags &= ~(client_flag & MALLOC_POSSIBLE_CLIENT_FLAGS);
+ return true;
+}
+
+static void
+malloc_replace__iterate(bool (*cb)(app_pc start, app_pc end, app_pc real_end,
+ bool pre_us, uint client_flags,
+ void *client_data, void *iter_data), void *iter_data)
+{+ alloc_iterate(cb, iter_data, true/*live only*/);
+}
+
+static void
+malloc_replace__lock(void)
+{+ dr_recurlock_lock(allocator_lock);
+}
+
+static void
+malloc_replace__unlock(void)
+{+ dr_recurlock_unlock(allocator_lock);
+}
+
+void
+alloc_replace_init(void)
+{+ ASSERT(sizeof(free_header_t) <=
+ (alloc_ops.external_headers ? 0 : sizeof(chunk_header_t)) + CHUNK_MIN_SIZE,
+ "min size too small");
+ ASSERT(ALIGNED(sizeof(chunk_header_t), CHUNK_ALIGNMENT), "alignment off");
+
+ ASSERT(CHUNK_MIN_MMAP >= LARGE_MALLOC_MIN_SIZE,
+ "we rely on mmapped chunks being in large malloc table");
+
+ ASSERT(ARENA_INITIAL_SIZE >= CHUNK_MIN_MMAP, "arena must hold at least 1 chunk");
+
+ ASSERT(ALIGNED(alloc_ops.redzone_size, CHUNK_ALIGNMENT), "redzone alignment off");
+
+ if (alloc_ops.redzone_size < HEADER_SIZE)
+ header_beyond_redzone = HEADER_SIZE - alloc_ops.redzone_size;
+
+ allocator_lock = dr_recurlock_create();
+
+ hashtable_init(&pre_us_table, PRE_US_TABLE_HASH_BITS, HASH_INTPTR, false/*!strdup*/);
+
+#ifdef LINUX
+ /* we waste pre-brk space of pre-us allocator, and we assume we're
+ * now completely replacing the pre-us allocator.
+ * XXX: better to not use brk and solely use mmap instead?
+ */
+ cur_brk = get_brk(false);
+ pre_us_brk = cur_brk;
+ arena_start = pre_us_brk;
+ cur_brk = set_brk(cur_brk + PAGE_SIZE);
+ arena_commit_end = cur_brk;
+ arena_reserve_end = arena_commit_end;
+ /* XXX: for delayed instru we will need to handle this; for now we assert */
+ ASSERT(cur_brk > arena_start, "failed to increase brk at init");
+ LOG(2, "heap orig brk="PFX"\n", pre_us_brk);
+#else
+ arena_start = os_large_alloc(ARENA_INITIAL_COMMIT, ARENA_INITIAL_SIZE);
+ ASSERT(arena_start != NULL, "can't allocate initial heap: fatal");
+ arena_commit_end = arena_start + ARENA_INITIAL_COMMIT;
+ arena_reserve_end = arena_start + ARENA_INITIAL_SIZE;
+#endif
+ heap_region_add(arena_start, arena_reserve_end, HEAP_ARENA, NULL);
+ /* need to start with a redzone */
+ arena_next = arena_start + alloc_ops.redzone_size + header_beyond_redzone;
+
+ /* set up pointers for per-malloc API */
+ malloc_interface.malloc_lock = malloc_replace__lock;
+ malloc_interface.malloc_unlock = malloc_replace__unlock;
+ malloc_interface.malloc_end = malloc_replace__end;
+ malloc_interface.malloc_add = malloc_replace__add;
+ malloc_interface.malloc_is_pre_us = malloc_replace__is_pre_us;
+ malloc_interface.malloc_is_pre_us_ex = malloc_replace__is_pre_us_ex;
+ malloc_interface.malloc_size = malloc_replace__size;
+ malloc_interface.malloc_size_invalid_only = malloc_replace__size_invalid_only;
+ malloc_interface.malloc_get_client_data = malloc_replace__get_client_data;
+ malloc_interface.malloc_get_client_flags = malloc_replace__get_client_flags;
+ malloc_interface.malloc_set_client_flag = malloc_replace__set_client_flag;
+ malloc_interface.malloc_clear_client_flag = malloc_replace__clear_client_flag;
+ malloc_interface.malloc_iterate = malloc_replace__iterate;
+ malloc_interface.malloc_intercept = malloc_replace__intercept;
+ malloc_interface.malloc_unintercept = malloc_replace__unintercept;
+}
+
+static bool
+free_arena_at_exit(byte *start, byte *end, uint flags
+ _IF_WINDOWS(HANDLE heap), void *iter_data)
+{+ if (TEST(HEAP_ARENA, flags) && !TEST(HEAP_PRE_US, flags)) {+#ifdef LINUX
+ if (end != cur_brk)
+#endif
+ os_large_free(start, end - start);
+ }
+ return true;
+}
+
+static bool
+free_user_data_at_exit(app_pc start, app_pc end, app_pc real_end,
+ bool pre_us, uint client_flags,
+ void *client_data, void *iter_data)
+{+ if (!pre_us) {+ chunk_header_t *head = header_from_ptr(start);
+ client_malloc_data_free(head->user_data);
+ }
+ return true; /* keep iterating */
+}
+
+void
+alloc_replace_exit(void)
+{+ uint i;
+ alloc_iterate(free_user_data_at_exit, NULL, false/*free too*/);
+ /* XXX: should add hashtable_iterate() to drcontainers */
+ for (i = 0; i < HASHTABLE_SIZE(pre_us_table.table_bits); i++) {+ hash_entry_t *he, *next;
+ for (he = pre_us_table.table[i]; he != NULL; he = next) {+ chunk_header_t *head = (chunk_header_t *) he->payload;
+ next = he->next;
+ if (head->user_data != NULL)
+ client_malloc_data_free(head->user_data);
+ global_free(head, sizeof(*head), HEAPSTAT_HASHTABLE);
+ }
+ }
+ hashtable_delete_with_stats(&pre_us_table, "pre_us");
+
+ heap_region_iterate(free_arena_at_exit, NULL);
+
+ dr_recurlock_destroy(allocator_lock);
+}
--- common/alloc_replace.h 9f1df662fe27fd4fb700f723f512855920ed6969
+++ common/alloc_replace.h f6dc6910105f7a2bd25cd93732b95ece28a1c3d3
@@ -1,0 +1,65 @@
+/* **********************************************************
+ * Copyright (c) 2012 Google, Inc. All rights reserved.
+ * **********************************************************/
+
+/* Dr. Memory: the memory debugger
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License, and no later version.
+
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/***************************************************************************
+ * alloc_replace.h: Dr. Memory heap tracking internal header
+ */
+
+#ifndef _ALLOC_REPLACE_H_
+#define _ALLOC_REPLACE_H_ 1
+
+/***************************************************************************
+ * app-facing interface
+ */
+
+void *
+replace_malloc(size_t size);
+
+void *
+replace_calloc(size_t nmemb, size_t size);
+
+void *
+replace_realloc(void *ptr, size_t size);
+
+void
+replace_free(void *ptr);
+
+size_t
+replace_malloc_usable_size(void *ptr);
+
+/***************************************************************************
+ * drmem-facing interface
+ */
+
+void
+alloc_replace_init();
+
+void
+alloc_replace_exit();
+
+#ifdef LINUX
+byte *
+alloc_replace_orig_brk();
+#endif
+
+/* rest is in malloc_interface_t */
+
+#endif /* _ALLOC_REPLACE_H_ */
--- common/heap.c 9f1df662fe27fd4fb700f723f512855920ed6969
+++ common/heap.c f6dc6910105f7a2bd25cd93732b95ece28a1c3d3
@@ -1,5 +1,5 @@
- * Copyright (c) 2011 Google, Inc. All rights reserved.
+ * Copyright (c) 2011-2012 Google, Inc. All rights reserved.
@@ -97,7 +97,7 @@
- app_pc cur_brk = get_brk();
+ app_pc cur_brk = get_brk(true/*pre-us*/);
@@ -107,7 +107,10 @@
- ASSERT(info.base_pc + info.size == cur_brk, "heap location error");
+ /* we no longer assert that these are equal b/c -replace_malloc
+ * has extended the brk already
+ */
+ ASSERT(info.base_pc + info.size >= cur_brk, "heap location error");
@@ -343,7 +346,7 @@
- app_pc cur_brk = get_brk();
+ app_pc cur_brk = get_brk(true/*pre-us*/);
--- drheapstat/drheapstat.c 9f1df662fe27fd4fb700f723f512855920ed6969
+++ drheapstat/drheapstat.c f6dc6910105f7a2bd25cd93732b95ece28a1c3d3
@@ -340,6 +340,13 @@
+}
+
+void *
+client_malloc_data_to_free_list(void *cur_data, dr_mcontext_t *mc, app_pc post_call)
+{+ /* nothing to do since we persist our callstacks in alloc_stack_table */
+ return cur_data;
--- drmemory/alloc_drmem.c 9f1df662fe27fd4fb700f723f512855920ed6969
+++ drmemory/alloc_drmem.c f6dc6910105f7a2bd25cd93732b95ece28a1c3d3
@@ -164,6 +164,11 @@
+ /* replace vs wrap */
+ alloc_ops.replace_malloc = options.replace_malloc;
+ alloc_ops.external_headers = (options.pattern != 0);
+ alloc_ops.delay_frees = options.delay_frees;
+ alloc_ops.delay_frees_maxsz = options.delay_frees_maxsz;
@@ -560,7 +565,7 @@
- sz, get_heap_start(), get_brk());
+ sz, get_heap_start(), get_brk(false/*want full extent*/));
@@ -699,7 +704,7 @@
- if (INSTRUMENT_MEMREFS() && options.delay_frees > 0) {+ if (INSTRUMENT_MEMREFS() && !options.replace_malloc && options.delay_frees > 0) {@@ -841,6 +846,20 @@
+void *
+client_malloc_data_to_free_list(void *cur_data, dr_mcontext_t *mc, app_pc post_call)
+{+ ASSERT(options.replace_malloc, "should not be called");
+ /* replace malloc callstack with free callstack */
+ if (options.delay_frees_stack) {+ packed_callstack_t *pcs = (packed_callstack_t *) cur_data;
+ ASSERT(pcs != NULL || !options.count_leaks, "malloc data must exist");
+ shared_callstack_free(pcs);
+ return (void *) get_shared_callstack(NULL, mc, post_call);
+ }
+ return cur_data;
+}
+
@@ -887,6 +906,7 @@
+ /* XXX i#794: query allocator for options.replace_malloc */
@@ -1069,6 +1089,7 @@
+ LOG(2, "Entering windows callback handler\n");
@@ -2259,4 +2280,3 @@
-
--- drmemory/drmemory.c 9f1df662fe27fd4fb700f723f512855920ed6969
+++ drmemory/drmemory.c f6dc6910105f7a2bd25cd93732b95ece28a1c3d3
@@ -779,7 +779,7 @@
- app_pc cur_brk = get_brk();
+ app_pc cur_brk = get_brk(true/*pre-us*/);
@@ -842,6 +842,11 @@
+ } else if (options.replace_malloc &&
+ /* base won't be b/c it will be pre-us heap */
+ alloc_replace_in_cur_arena(info.base_pc + info.size - 1)) {+ /* ignore replace-heap: leave unaddressable */
+ LOG(2, " => replacement heap\n");
--- drmemory/leak.c 9f1df662fe27fd4fb700f723f512855920ed6969
+++ drmemory/leak.c f6dc6910105f7a2bd25cd93732b95ece28a1c3d3
@@ -36,7 +36,7 @@
-/* We claim 3 of the malloc table's client flags */
+/* We claim 4 of the malloc table's client flags */
--- drmemory/options.c 9f1df662fe27fd4fb700f723f512855920ed6969
+++ drmemory/options.c f6dc6910105f7a2bd25cd93732b95ece28a1c3d3
@@ -375,6 +375,15 @@
+ if (options.replace_malloc) {+ /* XXX i#879: we need a custom malloc w/ no headers */
+ usage_error("pattern mode incompatible with replacing malloc", "");+ }
+ }
+ if (options.replace_malloc) {+ options.replace_realloc = false; /* no need for it */
+ /* whole header is in redzone, but supports redzone being smaller than header */
+ options.size_in_redzone = false;
--- drmemory/optionsx.h 9f1df662fe27fd4fb700f723f512855920ed6969
+++ drmemory/optionsx.h f6dc6910105f7a2bd25cd93732b95ece28a1c3d3
@@ -595,3 +595,6 @@
+OPTION_CLIENT_BOOL(internal, replace_malloc, false,
+ "Replace malloc rather than wrapping existing routines",
+ "Replace malloc with custom routines rather than wrapping existing routines. Replacing is more efficient but can be less transparent.")
--- drmemory/readwrite.c 9f1df662fe27fd4fb700f723f512855920ed6969
+++ drmemory/readwrite.c f6dc6910105f7a2bd25cd93732b95ece28a1c3d3
@@ -2429,7 +2429,10 @@
- xl8 == decode_next_pc(drcontext, loc_to_pc(&loc))),
+ xl8 == decode_next_pc(drcontext, loc_to_pc(&loc))) ||
+ /* for native ret we changed pc */
+ (options.replace_malloc && opc == OP_ret &&
+ alloc_entering_replace_routine(xl8)),
@@ -2461,6 +2464,21 @@
+ else if (instr_is_return(inst) && options.replace_malloc &&
+ alloc_entering_replace_routine(instr_get_app_pc(inst))) {+ /* drwrap_replace_native() uses a generated ret.
+ * if we don't do anything here, we'll use its raw bits
+ * as the decode pc, yet they're temporary and we'll
+ * have trouble.
+ * XXX: this could happen w/ any generated instr: how can we tell
+ * raw bits are temporary vs a hook trampoline?
+ * XXX: drwrap_replace_native() can't set xl8 to a fake ret
+ * b/c of DR's constraints on xl8 being within original bb bounds!
+ */
+ static byte ret_to_decode[1] = {RET_NOIMM_OPCODE};+ return (app_pc) &ret_to_decode;
+ }
+
@@ -4146,7 +4164,7 @@
- if (bi->first_instr)
+ if (bi->first_instr && instr_ok_to_mangle(inst))
--- drmemory/readwrite.h 9f1df662fe27fd4fb700f723f512855920ed6969
+++ drmemory/readwrite.h f6dc6910105f7a2bd25cd93732b95ece28a1c3d3
@@ -272,6 +272,7 @@
+#define RET_NOIMM_OPCODE 0xc3
--- tests/CMakeLists.txt 9f1df662fe27fd4fb700f723f512855920ed6969
+++ tests/CMakeLists.txt f6dc6910105f7a2bd25cd93732b95ece28a1c3d3
@@ -404,6 +404,9 @@
+ # test redzone sizes
+ newtest_nobuild(redzone8 malloc "" "-redzone_size;8" "" OFF "malloc")
+ newtest_nobuild(redzone1024 malloc "" "-redzone_size;1024" "" OFF "malloc")

