diff --git a/.autom4te.cfg b/.autom4te.cfg new file mode 100644 index 0000000000..fe2424db5e --- /dev/null +++ b/.autom4te.cfg @@ -0,0 +1,3 @@ +begin-language: "Autoconf-without-aclocal-m4" +args: --no-cache +end-language: "Autoconf-without-aclocal-m4" diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000..6313b56c57 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +* text=auto eol=lf diff --git a/.gitignore b/.gitignore index 4c408ec2c2..d0e393619f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,8 @@ /*.gcov.* -/autom4te.cache/ - +/bin/jemalloc-config /bin/jemalloc.sh +/bin/jeprof /config.stamp /config.log @@ -15,6 +15,8 @@ /doc/jemalloc.html /doc/jemalloc.3 +/jemalloc.pc + /lib/ /Makefile @@ -35,6 +37,7 @@ /include/jemalloc/jemalloc_protos.h /include/jemalloc/jemalloc_protos_jet.h /include/jemalloc/jemalloc_rename.h +/include/jemalloc/jemalloc_typedefs.h /src/*.[od] /src/*.gcda diff --git a/COPYING b/COPYING index bdda0feb9e..611968cda5 100644 --- a/COPYING +++ b/COPYING @@ -1,10 +1,10 @@ Unless otherwise specified, files in the jemalloc source distribution are subject to the following license: -------------------------------------------------------------------------------- -Copyright (C) 2002-2014 Jason Evans . +Copyright (C) 2002-2015 Jason Evans . All rights reserved. Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved. -Copyright (C) 2009-2014 Facebook, Inc. All rights reserved. +Copyright (C) 2009-2015 Facebook, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff --git a/ChangeLog b/ChangeLog index d56ee999e6..e3b0a5190d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,10 +1,250 @@ Following are change highlights associated with official releases. Important -bug fixes are all mentioned, but internal enhancements are omitted here for -brevity (even though they are more fun to write about). Much more detail can be -found in the git revision history: +bug fixes are all mentioned, but some internal enhancements are omitted here for +brevity. Much more detail can be found in the git revision history: https://github.com/jemalloc/jemalloc +* 4.0.3 (September 24, 2015) + + This bugfix release continues the trend of xallocx() and heap profiling fixes. + + Bug fixes: + - Fix xallocx(..., MALLOCX_ZERO) to zero all trailing bytes of large + allocations when --enable-cache-oblivious configure option is enabled. + - Fix xallocx(..., MALLOCX_ZERO) to zero trailing bytes of huge allocations + when resizing from/to a size class that is not a multiple of the chunk size. + - Fix prof_tctx_dump_iter() to filter out nodes that were created after heap + profile dumping started. + - Work around a potentially bad thread-specific data initialization + interaction with NPTL (glibc's pthreads implementation). + +* 4.0.2 (September 21, 2015) + + This bugfix release addresses a few bugs specific to heap profiling. + + Bug fixes: + - Fix ixallocx_prof_sample() to never modify nor create sampled small + allocations. xallocx() is in general incapable of moving small allocations, + so this fix removes buggy code without loss of generality. + - Fix irallocx_prof_sample() to always allocate large regions, even when + alignment is non-zero. + - Fix prof_alloc_rollback() to read tdata from thread-specific data rather + than dereferencing a potentially invalid tctx. + +* 4.0.1 (September 15, 2015) + + This is a bugfix release that is somewhat high risk due to the amount of + refactoring required to address deep xallocx() problems. As a side effect of + these fixes, xallocx() now tries harder to partially fulfill requests for + optional extra space. Note that a couple of minor heap profiling + optimizations are included, but these are better thought of as performance + fixes that were integral to disovering most of the other bugs. + + Optimizations: + - Avoid a chunk metadata read in arena_prof_tctx_set(), since it is in the + fast path when heap profiling is enabled. Additionally, split a special + case out into arena_prof_tctx_reset(), which also avoids chunk metadata + reads. + - Optimize irallocx_prof() to optimistically update the sampler state. The + prior implementation appears to have been a holdover from when + rallocx()/xallocx() functionality was combined as rallocm(). + + Bug fixes: + - Fix TLS configuration such that it is enabled by default for platforms on + which it works correctly. + - Fix arenas_cache_cleanup() and arena_get_hard() to handle + allocation/deallocation within the application's thread-specific data + cleanup functions even after arenas_cache is torn down. + - Fix xallocx() bugs related to size+extra exceeding HUGE_MAXCLASS. + - Fix chunk purge hook calls for in-place huge shrinking reallocation to + specify the old chunk size rather than the new chunk size. This bug caused + no correctness issues for the default chunk purge function, but was + visible to custom functions set via the "arena..chunk_hooks" mallctl. + - Fix heap profiling bugs: + + Fix heap profiling to distinguish among otherwise identical sample sites + with interposed resets (triggered via the "prof.reset" mallctl). This bug + could cause data structure corruption that would most likely result in a + segfault. + + Fix irealloc_prof() to prof_alloc_rollback() on OOM. + + Make one call to prof_active_get_unlocked() per allocation event, and use + the result throughout the relevant functions that handle an allocation + event. Also add a missing check in prof_realloc(). These fixes protect + allocation events against concurrent prof_active changes. + + Fix ixallocx_prof() to pass usize_max and zero to ixallocx_prof_sample() + in the correct order. + + Fix prof_realloc() to call prof_free_sampled_object() after calling + prof_malloc_sample_object(). Prior to this fix, if tctx and old_tctx were + the same, the tctx could have been prematurely destroyed. + - Fix portability bugs: + + Don't bitshift by negative amounts when encoding/decoding run sizes in + chunk header maps. This affected systems with page sizes greater than 8 + KiB. + + Rename index_t to szind_t to avoid an existing type on Solaris. + + Add JEMALLOC_CXX_THROW to the memalign() function prototype, in order to + match glibc and avoid compilation errors when including both + jemalloc/jemalloc.h and malloc.h in C++ code. + + Don't assume that /bin/sh is appropriate when running size_classes.sh + during configuration. + + Consider __sparcv9 a synonym for __sparc64__ when defining LG_QUANTUM. + + Link tests to librt if it contains clock_gettime(2). + +* 4.0.0 (August 17, 2015) + + This version contains many speed and space optimizations, both minor and + major. The major themes are generalization, unification, and simplification. + Although many of these optimizations cause no visible behavior change, their + cumulative effect is substantial. + + New features: + - Normalize size class spacing to be consistent across the complete size + range. By default there are four size classes per size doubling, but this + is now configurable via the --with-lg-size-class-group option. Also add the + --with-lg-page, --with-lg-page-sizes, --with-lg-quantum, and + --with-lg-tiny-min options, which can be used to tweak page and size class + settings. Impacts: + + Worst case performance for incrementally growing/shrinking reallocation + is improved because there are far fewer size classes, and therefore + copying happens less often. + + Internal fragmentation is limited to 20% for all but the smallest size + classes (those less than four times the quantum). (1B + 4 KiB) + and (1B + 4 MiB) previously suffered nearly 50% internal fragmentation. + + Chunk fragmentation tends to be lower because there are fewer distinct run + sizes to pack. + - Add support for explicit tcaches. The "tcache.create", "tcache.flush", and + "tcache.destroy" mallctls control tcache lifetime and flushing, and the + MALLOCX_TCACHE(tc) and MALLOCX_TCACHE_NONE flags to the *allocx() API + control which tcache is used for each operation. + - Implement per thread heap profiling, as well as the ability to + enable/disable heap profiling on a per thread basis. Add the "prof.reset", + "prof.lg_sample", "thread.prof.name", "thread.prof.active", + "opt.prof_thread_active_init", "prof.thread_active_init", and + "thread.prof.active" mallctls. + - Add support for per arena application-specified chunk allocators, configured + via the "arena..chunk_hooks" mallctl. + - Refactor huge allocation to be managed by arenas, so that arenas now + function as general purpose independent allocators. This is important in + the context of user-specified chunk allocators, aside from the scalability + benefits. Related new statistics: + + The "stats.arenas..huge.allocated", "stats.arenas..huge.nmalloc", + "stats.arenas..huge.ndalloc", and "stats.arenas..huge.nrequests" + mallctls provide high level per arena huge allocation statistics. + + The "arenas.nhchunks", "arenas.hchunk..size", + "stats.arenas..hchunks..nmalloc", + "stats.arenas..hchunks..ndalloc", + "stats.arenas..hchunks..nrequests", and + "stats.arenas..hchunks..curhchunks" mallctls provide per size class + statistics. + - Add the 'util' column to malloc_stats_print() output, which reports the + proportion of available regions that are currently in use for each small + size class. + - Add "alloc" and "free" modes for for junk filling (see the "opt.junk" + mallctl), so that it is possible to separately enable junk filling for + allocation versus deallocation. + - Add the jemalloc-config script, which provides information about how + jemalloc was configured, and how to integrate it into application builds. + - Add metadata statistics, which are accessible via the "stats.metadata", + "stats.arenas..metadata.mapped", and + "stats.arenas..metadata.allocated" mallctls. + - Add the "stats.resident" mallctl, which reports the upper limit of + physically resident memory mapped by the allocator. + - Add per arena control over unused dirty page purging, via the + "arenas.lg_dirty_mult", "arena..lg_dirty_mult", and + "stats.arenas..lg_dirty_mult" mallctls. + - Add the "prof.gdump" mallctl, which makes it possible to toggle the gdump + feature on/off during program execution. + - Add sdallocx(), which implements sized deallocation. The primary + optimization over dallocx() is the removal of a metadata read, which often + suffers an L1 cache miss. + - Add missing header includes in jemalloc/jemalloc.h, so that applications + only have to #include . + - Add support for additional platforms: + + Bitrig + + Cygwin + + DragonFlyBSD + + iOS + + OpenBSD + + OpenRISC/or1k + + Optimizations: + - Maintain dirty runs in per arena LRUs rather than in per arena trees of + dirty-run-containing chunks. In practice this change significantly reduces + dirty page purging volume. + - Integrate whole chunks into the unused dirty page purging machinery. This + reduces the cost of repeated huge allocation/deallocation, because it + effectively introduces a cache of chunks. + - Split the arena chunk map into two separate arrays, in order to increase + cache locality for the frequently accessed bits. + - Move small run metadata out of runs, into arena chunk headers. This reduces + run fragmentation, smaller runs reduce external fragmentation for small size + classes, and packed (less uniformly aligned) metadata layout improves CPU + cache set distribution. + - Randomly distribute large allocation base pointer alignment relative to page + boundaries in order to more uniformly utilize CPU cache sets. This can be + disabled via the --disable-cache-oblivious configure option, and queried via + the "config.cache_oblivious" mallctl. + - Micro-optimize the fast paths for the public API functions. + - Refactor thread-specific data to reside in a single structure. This assures + that only a single TLS read is necessary per call into the public API. + - Implement in-place huge allocation growing and shrinking. + - Refactor rtree (radix tree for chunk lookups) to be lock-free, and make + additional optimizations that reduce maximum lookup depth to one or two + levels. This resolves what was a concurrency bottleneck for per arena huge + allocation, because a global data structure is critical for determining + which arenas own which huge allocations. + + Incompatible changes: + - Replace --enable-cc-silence with --disable-cc-silence to suppress spurious + warnings by default. + - Assure that the constness of malloc_usable_size()'s return type matches that + of the system implementation. + - Change the heap profile dump format to support per thread heap profiling, + rename pprof to jeprof, and enhance it with the --thread= option. As a + result, the bundled jeprof must now be used rather than the upstream + (gperftools) pprof. + - Disable "opt.prof_final" by default, in order to avoid atexit(3), which can + internally deadlock on some platforms. + - Change the "arenas.nlruns" mallctl type from size_t to unsigned. + - Replace the "stats.arenas..bins..allocated" mallctl with + "stats.arenas..bins..curregs". + - Ignore MALLOC_CONF in set{uid,gid,cap} binaries. + - Ignore MALLOCX_ARENA(a) in dallocx(), in favor of using the + MALLOCX_TCACHE(tc) and MALLOCX_TCACHE_NONE flags to control tcache usage. + + Removed features: + - Remove the *allocm() API, which is superseded by the *allocx() API. + - Remove the --enable-dss options, and make dss non-optional on all platforms + which support sbrk(2). + - Remove the "arenas.purge" mallctl, which was obsoleted by the + "arena..purge" mallctl in 3.1.0. + - Remove the unnecessary "opt.valgrind" mallctl; jemalloc automatically + detects whether it is running inside Valgrind. + - Remove the "stats.huge.allocated", "stats.huge.nmalloc", and + "stats.huge.ndalloc" mallctls. + - Remove the --enable-mremap option. + - Remove the "stats.chunks.current", "stats.chunks.total", and + "stats.chunks.high" mallctls. + + Bug fixes: + - Fix the cactive statistic to decrease (rather than increase) when active + memory decreases. This regression was first released in 3.5.0. + - Fix OOM handling in memalign() and valloc(). A variant of this bug existed + in all releases since 2.0.0, which introduced these functions. + - Fix an OOM-related regression in arena_tcache_fill_small(), which could + cause cache corruption on OOM. This regression was present in all releases + from 2.2.0 through 3.6.0. + - Fix size class overflow handling for malloc(), posix_memalign(), memalign(), + calloc(), and realloc() when profiling is enabled. + - Fix the "arena..dss" mallctl to return an error if "primary" or + "secondary" precedence is specified, but sbrk(2) is not supported. + - Fix fallback lg_floor() implementations to handle extremely large inputs. + - Ensure the default purgeable zone is after the default zone on OS X. + - Fix latent bugs in atomic_*(). + - Fix the "arena..dss" mallctl to handle read-only calls. + - Fix tls_model configuration to enable the initial-exec model when possible. + - Mark malloc_conf as a weak symbol so that the application can override it. + - Correctly detect glibc's adaptive pthread mutexes. + - Fix the --without-export configure option. + * 3.6.0 (March 31, 2014) This version contains a critical bug fix for a regression present in 3.5.0 and @@ -21,7 +261,7 @@ found in the git revision history: backtracing to be reliable. - Use dss allocation precedence for huge allocations as well as small/large allocations. - - Fix test assertion failure message formatting. This bug did not manifect on + - Fix test assertion failure message formatting. This bug did not manifest on x86_64 systems because of implementation subtleties in va_list. - Fix inconsequential test failures for hash and SFMT code. @@ -516,7 +756,7 @@ found in the git revision history: - Make it possible for the application to manually flush a thread's cache, via the "tcache.flush" mallctl. - Base maximum dirty page count on proportion of active memory. - - Compute various addtional run-time statistics, including per size class + - Compute various additional run-time statistics, including per size class statistics for large objects. - Expose malloc_stats_print(), which can be called repeatedly by the application. diff --git a/INSTALL b/INSTALL index 07f51d1e25..8d3968745e 100644 --- a/INSTALL +++ b/INSTALL @@ -1,10 +1,23 @@ -Building and installing jemalloc can be as simple as typing the following while -in the root directory of the source tree: +Building and installing a packaged release of jemalloc can be as simple as +typing the following while in the root directory of the source tree: ./configure make make install +If building from unpackaged developer sources, the simplest command sequence +that might work is: + + ./autogen.sh + make dist + make + make install + +Note that documentation is not built by the default target because doing so +would create a dependency on xsltproc in packaged releases, hence the +requirement to either run 'make dist' or avoid installing docs via the various +install_* targets documented below. + === Advanced configuration ===================================================== The 'configure' script supports numerous options that allow control of which @@ -56,7 +69,7 @@ any of the following arguments (not a definitive list) to 'configure': replace the "malloc", "calloc", etc. symbols. --without-export - Don't export public APIs. This can be useful when building jemalloc as a + Don't export public APIs. This can be useful when building jemalloc as a static library, or to avoid exporting public APIs when using the zone allocator on OSX. @@ -94,15 +107,15 @@ any of the following arguments (not a definitive list) to 'configure': there are interactions between the various coverage targets, so it is usually advisable to run 'make clean' between repeated code coverage runs. ---enable-ivsalloc - Enable validation code, which verifies that pointers reside within - jemalloc-owned chunks before dereferencing them. This incurs a substantial - performance hit. - --disable-stats Disable statistics gathering functionality. See the "opt.stats_print" option documentation for usage details. +--enable-ivsalloc + Enable validation code, which verifies that pointers reside within + jemalloc-owned chunks before dereferencing them. This incurs a minor + performance hit. + --enable-prof Enable heap profiling and leak detection functionality. See the "opt.prof" option documentation for usage details. When enabled, there are several @@ -132,12 +145,6 @@ any of the following arguments (not a definitive list) to 'configure': released in bulk, thus reducing the total number of mutex operations. See the "opt.tcache" option for usage details. ---enable-mremap - Enable huge realloc() via mremap(2). mremap() is disabled by default - because the flavor used is specific to Linux, which has a quirk in its - virtual memory allocation algorithm that causes semi-permanent VM map holes - under normal jemalloc operation. - --disable-munmap Disable virtual memory deallocation via munmap(2); instead keep track of the virtual memory for later use. munmap() is disabled by default (i.e. @@ -154,7 +161,7 @@ any of the following arguments (not a definitive list) to 'configure': Disable support for Valgrind. --disable-zone-allocator - Disable zone allocator for Darwin. This means jemalloc won't be hooked as + Disable zone allocator for Darwin. This means jemalloc won't be hooked as the default allocator on OSX/iOS. --enable-utrace @@ -178,10 +185,106 @@ any of the following arguments (not a definitive list) to 'configure': thread-local variables via the __thread keyword. If TLS is available, jemalloc uses it for several purposes. +--disable-cache-oblivious + Disable cache-oblivious large allocation alignment for large allocation + requests with no alignment constraints. If this feature is disabled, all + large allocations are page-aligned as an implementation artifact, which can + severely harm CPU cache utilization. However, the cache-oblivious layout + comes at the cost of one extra page per large allocation, which in the + most extreme case increases physical memory usage for the 16 KiB size class + to 20 KiB. + --with-xslroot= Specify where to find DocBook XSL stylesheets when building the documentation. +--with-lg-page= + Specify the base 2 log of the system page size. This option is only useful + when cross compiling, since the configure script automatically determines + the host's page size by default. + +--with-lg-page-sizes= + Specify the comma-separated base 2 logs of the page sizes to support. This + option may be useful when cross-compiling in combination with + --with-lg-page, but its primary use case is for integration with FreeBSD's + libc, wherein jemalloc is embedded. + +--with-lg-size-class-group= + Specify the base 2 log of how many size classes to use for each doubling in + size. By default jemalloc uses =2, which results in + e.g. the following size classes: + + [...], 64, + 80, 96, 112, 128, + 160, [...] + + =3 results in e.g. the following size classes: + + [...], 64, + 72, 80, 88, 96, 104, 112, 120, 128, + 144, [...] + + The minimal =0 causes jemalloc to only provide size + classes that are powers of 2: + + [...], + 64, + 128, + 256, + [...] + + An implementation detail currently limits the total number of small size + classes to 255, and a compilation error will result if the + you specify cannot be supported. The limit is + roughly =4, depending on page size. + +--with-lg-quantum= + Specify the base 2 log of the minimum allocation alignment. jemalloc needs + to know the minimum alignment that meets the following C standard + requirement (quoted from the April 12, 2011 draft of the C11 standard): + + The pointer returned if the allocation succeeds is suitably aligned so + that it may be assigned to a pointer to any type of object with a + fundamental alignment requirement and then used to access such an object + or an array of such objects in the space allocated [...] + + This setting is architecture-specific, and although jemalloc includes known + safe values for the most commonly used modern architectures, there is a + wrinkle related to GNU libc (glibc) that may impact your choice of + . On most modern architectures, this mandates 16-byte alignment + (=4), but the glibc developers chose not to meet this + requirement for performance reasons. An old discussion can be found at + https://sourceware.org/bugzilla/show_bug.cgi?id=206 . Unlike glibc, + jemalloc does follow the C standard by default (caveat: jemalloc + technically cheats if --with-lg-tiny-min is smaller than + --with-lg-quantum), but the fact that Linux systems already work around + this allocator noncompliance means that it is generally safe in practice to + let jemalloc's minimum alignment follow glibc's lead. If you specify + --with-lg-quantum=3 during configuration, jemalloc will provide additional + size classes that are not 16-byte-aligned (24, 40, and 56, assuming + --with-lg-size-class-group=2). + +--with-lg-tiny-min= + Specify the base 2 log of the minimum tiny size class to support. Tiny + size classes are powers of 2 less than the quantum, and are only + incorporated if is less than (see + --with-lg-quantum). Tiny size classes technically violate the C standard + requirement for minimum alignment, and crashes could conceivably result if + the compiler were to generate instructions that made alignment assumptions, + both because illegal instruction traps could result, and because accesses + could straddle page boundaries and cause segmentation faults due to + accessing unmapped addresses. + + The default of =3 works well in practice even on architectures + that technically require 16-byte alignment, probably for the same reason + --with-lg-quantum=3 works. Smaller tiny size classes can, and will, cause + crashes (see https://bugzilla.mozilla.org/show_bug.cgi?id=691003 for an + example). + + This option is rarely useful, and is mainly provided as documentation of a + subtle implementation detail. If you do use this option, specify a + value in [3, ..., ]. + The following environment variables (not a definitive list) impact configure's behavior: diff --git a/Makefile.in b/Makefile.in index e411804a2d..1ac6f2926d 100644 --- a/Makefile.in +++ b/Makefile.in @@ -28,6 +28,7 @@ CFLAGS := @CFLAGS@ LDFLAGS := @LDFLAGS@ EXTRA_LDFLAGS := @EXTRA_LDFLAGS@ LIBS := @LIBS@ +TESTLIBS := @TESTLIBS@ RPATH_EXTRA := @RPATH_EXTRA@ SO := @so@ IMPORTLIB := @importlib@ @@ -42,14 +43,16 @@ XSLTPROC := @XSLTPROC@ AUTOCONF := @AUTOCONF@ _RPATH = @RPATH@ RPATH = $(if $(1),$(call _RPATH,$(1))) -cfghdrs_in := @cfghdrs_in@ +cfghdrs_in := $(addprefix $(srcroot),@cfghdrs_in@) cfghdrs_out := @cfghdrs_out@ -cfgoutputs_in := @cfgoutputs_in@ +cfgoutputs_in := $(addprefix $(srcroot),@cfgoutputs_in@) cfgoutputs_out := @cfgoutputs_out@ enable_autogen := @enable_autogen@ enable_code_coverage := @enable_code_coverage@ +enable_prof := @enable_prof@ enable_valgrind := @enable_valgrind@ enable_zone_allocator := @enable_zone_allocator@ +MALLOC_CONF := @JEMALLOC_CPREFIX@MALLOC_CONF DSO_LDFLAGS = @DSO_LDFLAGS@ SOREV = @SOREV@ PIC_CFLAGS = @PIC_CFLAGS@ @@ -73,16 +76,17 @@ endif LIBJEMALLOC := $(LIBPREFIX)jemalloc$(install_suffix) # Lists of files. -BINS := $(srcroot)bin/pprof $(objroot)bin/jemalloc.sh +BINS := $(objroot)bin/jemalloc-config $(objroot)bin/jemalloc.sh $(objroot)bin/jeprof C_HDRS := $(objroot)include/jemalloc/jemalloc$(install_suffix).h C_SRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c \ $(srcroot)src/atomic.c $(srcroot)src/base.c $(srcroot)src/bitmap.c \ $(srcroot)src/chunk.c $(srcroot)src/chunk_dss.c \ $(srcroot)src/chunk_mmap.c $(srcroot)src/ckh.c $(srcroot)src/ctl.c \ $(srcroot)src/extent.c $(srcroot)src/hash.c $(srcroot)src/huge.c \ - $(srcroot)src/mb.c $(srcroot)src/mutex.c $(srcroot)src/prof.c \ - $(srcroot)src/quarantine.c $(srcroot)src/rtree.c $(srcroot)src/stats.c \ - $(srcroot)src/tcache.c $(srcroot)src/util.c $(srcroot)src/tsd.c + $(srcroot)src/mb.c $(srcroot)src/mutex.c $(srcroot)src/pages.c \ + $(srcroot)src/prof.c $(srcroot)src/quarantine.c $(srcroot)src/rtree.c \ + $(srcroot)src/stats.c $(srcroot)src/tcache.c $(srcroot)src/util.c \ + $(srcroot)src/tsd.c ifeq ($(enable_valgrind), 1) C_SRCS += $(srcroot)src/valgrind.c endif @@ -101,49 +105,60 @@ DSOS := $(objroot)lib/$(LIBJEMALLOC).$(SOREV) ifneq ($(SOREV),$(SO)) DSOS += $(objroot)lib/$(LIBJEMALLOC).$(SO) endif +PC := $(objroot)jemalloc.pc MAN3 := $(objroot)doc/jemalloc$(install_suffix).3 DOCS_XML := $(objroot)doc/jemalloc$(install_suffix).xml -DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.html) -DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.3) +DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(objroot)%.html) +DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(objroot)%.3) DOCS := $(DOCS_HTML) $(DOCS_MAN3) -C_TESTLIB_SRCS := $(srcroot)test/src/math.c $(srcroot)test/src/mtx.c \ +C_TESTLIB_SRCS := $(srcroot)test/src/btalloc.c $(srcroot)test/src/btalloc_0.c \ + $(srcroot)test/src/btalloc_1.c $(srcroot)test/src/math.c \ + $(srcroot)test/src/mtx.c $(srcroot)test/src/mq.c \ $(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \ - $(srcroot)test/src/thd.c + $(srcroot)test/src/thd.c $(srcroot)test/src/timer.c C_UTIL_INTEGRATION_SRCS := $(srcroot)src/util.c -TESTS_UNIT := $(srcroot)test/unit/bitmap.c \ +TESTS_UNIT := $(srcroot)test/unit/atomic.c \ + $(srcroot)test/unit/bitmap.c \ $(srcroot)test/unit/ckh.c \ $(srcroot)test/unit/hash.c \ $(srcroot)test/unit/junk.c \ + $(srcroot)test/unit/junk_alloc.c \ + $(srcroot)test/unit/junk_free.c \ + $(srcroot)test/unit/lg_chunk.c \ $(srcroot)test/unit/mallctl.c \ $(srcroot)test/unit/math.c \ $(srcroot)test/unit/mq.c \ $(srcroot)test/unit/mtx.c \ $(srcroot)test/unit/prof_accum.c \ + $(srcroot)test/unit/prof_active.c \ $(srcroot)test/unit/prof_gdump.c \ $(srcroot)test/unit/prof_idump.c \ + $(srcroot)test/unit/prof_reset.c \ + $(srcroot)test/unit/prof_thread_name.c \ $(srcroot)test/unit/ql.c \ $(srcroot)test/unit/qr.c \ $(srcroot)test/unit/quarantine.c \ $(srcroot)test/unit/rb.c \ $(srcroot)test/unit/rtree.c \ $(srcroot)test/unit/SFMT.c \ + $(srcroot)test/unit/size_classes.c \ $(srcroot)test/unit/stats.c \ $(srcroot)test/unit/tsd.c \ $(srcroot)test/unit/util.c \ $(srcroot)test/unit/zero.c -TESTS_UNIT_AUX := $(srcroot)test/unit/prof_accum_a.c \ - $(srcroot)test/unit/prof_accum_b.c TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \ $(srcroot)test/integration/allocated.c \ + $(srcroot)test/integration/sdallocx.c \ $(srcroot)test/integration/mallocx.c \ $(srcroot)test/integration/MALLOCX_ARENA.c \ - $(srcroot)test/integration/mremap.c \ + $(srcroot)test/integration/overflow.c \ $(srcroot)test/integration/posix_memalign.c \ $(srcroot)test/integration/rallocx.c \ $(srcroot)test/integration/thread_arena.c \ $(srcroot)test/integration/thread_tcache_enabled.c \ - $(srcroot)test/integration/xallocx.c -TESTS_STRESS := + $(srcroot)test/integration/xallocx.c \ + $(srcroot)test/integration/chunk.c +TESTS_STRESS := $(srcroot)test/stress/microbench.c TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_STRESS) C_OBJS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.$(O)) @@ -156,10 +171,9 @@ C_TESTLIB_STRESS_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.stress.$(O)) C_TESTLIB_OBJS := $(C_TESTLIB_UNIT_OBJS) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(C_TESTLIB_STRESS_OBJS) TESTS_UNIT_OBJS := $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%.$(O)) -TESTS_UNIT_AUX_OBJS := $(TESTS_UNIT_AUX:$(srcroot)%.c=$(objroot)%.$(O)) TESTS_INTEGRATION_OBJS := $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%.$(O)) TESTS_STRESS_OBJS := $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%.$(O)) -TESTS_OBJS := $(TESTS_UNIT_OBJS) $(TESTS_UNIT_AUX_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_STRESS_OBJS) +TESTS_OBJS := $(TESTS_UNIT_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_STRESS_OBJS) .PHONY: all dist build_doc_html build_doc_man build_doc .PHONY: install_bin install_include install_lib @@ -173,10 +187,10 @@ all: build_lib dist: build_doc -$(srcroot)doc/%.html : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/html.xsl +$(objroot)doc/%.html : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/html.xsl $(XSLTPROC) -o $@ $(objroot)doc/html.xsl $< -$(srcroot)doc/%.3 : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/manpages.xsl +$(objroot)doc/%.3 : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/manpages.xsl $(XSLTPROC) -o $@ $(objroot)doc/manpages.xsl $< build_doc_html: $(DOCS_HTML) @@ -208,18 +222,12 @@ $(C_TESTLIB_STRESS_OBJS): $(objroot)test/src/%.stress.$(O): $(srcroot)test/src/% $(C_TESTLIB_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST -DJEMALLOC_STRESS_TESTLIB $(C_TESTLIB_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include $(TESTS_UNIT_OBJS): CPPFLAGS += -DJEMALLOC_UNIT_TEST -$(TESTS_UNIT_AUX_OBJS): CPPFLAGS += -DJEMALLOC_UNIT_TEST -define make-unit-link-dep -$(1): TESTS_UNIT_LINK_OBJS += $(2) -$(1): $(2) -endef -$(foreach test, $(TESTS_UNIT:$(srcroot)test/unit/%.c=$(objroot)test/unit/%$(EXE)), $(eval $(call make-unit-link-dep,$(test),$(filter $(test:%=%_a.$(O)) $(test:%=%_b.$(O)),$(TESTS_UNIT_AUX_OBJS))))) $(TESTS_INTEGRATION_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_TEST $(TESTS_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST $(TESTS_OBJS): $(objroot)test/%.$(O): $(srcroot)test/%.c $(TESTS_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include ifneq ($(IMPORTLIB),$(SO)) -$(C_OBJS): CPPFLAGS += -DDLLEXPORT +$(C_OBJS) $(C_JET_OBJS): CPPFLAGS += -DDLLEXPORT endif ifndef CC_MM @@ -228,7 +236,7 @@ HEADER_DIRS = $(srcroot)include/jemalloc/internal \ $(objroot)include/jemalloc $(objroot)include/jemalloc/internal HEADERS = $(wildcard $(foreach dir,$(HEADER_DIRS),$(dir)/*.h)) $(C_OBJS) $(C_PIC_OBJS) $(C_JET_OBJS) $(C_TESTLIB_OBJS) $(TESTS_OBJS): $(HEADERS) -$(TESTS_OBJS): $(objroot)test/unit/jemalloc_test.h +$(TESTS_OBJS): $(objroot)test/include/test/jemalloc_test.h endif $(C_OBJS) $(C_PIC_OBJS) $(C_JET_OBJS) $(C_TESTLIB_OBJS) $(TESTS_OBJS): %.$(O): @@ -258,15 +266,15 @@ $(STATIC_LIBS): $(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(TESTS_UNIT_LINK_OBJS) $(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS) @mkdir -p $(@D) - $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS) + $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(TESTLIBS) $(EXTRA_LDFLAGS) $(objroot)test/integration/%$(EXE): $(objroot)test/integration/%.$(O) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) @mkdir -p $(@D) - $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lpthread,$(LIBS))) -lm $(EXTRA_LDFLAGS) + $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lpthread,$(LIBS))) -lm $(TESTLIBS) $(EXTRA_LDFLAGS) $(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) @mkdir -p $(@D) - $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS) + $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(TESTLIBS) $(EXTRA_LDFLAGS) build_lib_shared: $(DSOS) build_lib_static: $(STATIC_LIBS) @@ -300,7 +308,14 @@ install_lib_static: $(STATIC_LIBS) install -m 755 $$l $(LIBDIR); \ done -install_lib: install_lib_shared install_lib_static +install_lib_pc: $(PC) + install -d $(LIBDIR)/pkgconfig + @for l in $(PC); do \ + echo "install -m 644 $$l $(LIBDIR)/pkgconfig"; \ + install -m 644 $$l $(LIBDIR)/pkgconfig; \ +done + +install_lib: install_lib_shared install_lib_static install_lib_pc install_doc_html: install -d $(DATADIR)/doc/jemalloc$(install_suffix) @@ -329,18 +344,23 @@ check_unit_dir: @mkdir -p $(objroot)test/unit check_integration_dir: @mkdir -p $(objroot)test/integration -check_stress_dir: +stress_dir: @mkdir -p $(objroot)test/stress -check_dir: check_unit_dir check_integration_dir check_stress_dir +check_dir: check_unit_dir check_integration_dir check_unit: tests_unit check_unit_dir $(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%) +check_integration_prof: tests_integration check_integration_dir +ifeq ($(enable_prof), 1) + $(MALLOC_CONF)="prof:true" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) + $(MALLOC_CONF)="prof:true,prof_active:false" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) +endif check_integration: tests_integration check_integration_dir $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) -check_stress: tests_stress check_stress_dir +stress: tests_stress stress_dir $(SHELL) $(objroot)test/test.sh $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%) -check: tests check_dir - $(SHELL) $(objroot)test/test.sh $(TESTS:$(srcroot)%.c=$(objroot)%) +check: tests check_dir check_integration_prof + $(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) ifeq ($(enable_code_coverage), 1) coverage_unit: check_unit @@ -354,7 +374,7 @@ coverage_integration: check_integration $(SHELL) $(srcroot)coverage.sh $(srcroot)test/src integration $(C_TESTLIB_INTEGRATION_OBJS) $(SHELL) $(srcroot)coverage.sh $(srcroot)test/integration integration $(TESTS_INTEGRATION_OBJS) -coverage_stress: check_stress +coverage_stress: stress $(SHELL) $(srcroot)coverage.sh $(srcroot)src pic $(C_PIC_OBJS) $(SHELL) $(srcroot)coverage.sh $(srcroot)src jet $(C_JET_OBJS) $(SHELL) $(srcroot)coverage.sh $(srcroot)test/src stress $(C_TESTLIB_STRESS_OBJS) @@ -399,8 +419,9 @@ clean: rm -f $(objroot)*.gcov.* distclean: clean - rm -rf $(objroot)autom4te.cache + rm -f $(objroot)bin/jemalloc-config rm -f $(objroot)bin/jemalloc.sh + rm -f $(objroot)bin/jeprof rm -f $(objroot)config.log rm -f $(objroot)config.status rm -f $(objroot)config.stamp @@ -409,7 +430,7 @@ distclean: clean relclean: distclean rm -f $(objroot)configure - rm -f $(srcroot)VERSION + rm -f $(objroot)VERSION rm -f $(DOCS_HTML) rm -f $(DOCS_MAN3) diff --git a/bin/jemalloc-config.in b/bin/jemalloc-config.in new file mode 100644 index 0000000000..b016c8d331 --- /dev/null +++ b/bin/jemalloc-config.in @@ -0,0 +1,79 @@ +#!/bin/sh + +usage() { + cat < +Options: + --help | -h : Print usage. + --version : Print jemalloc version. + --revision : Print shared library revision number. + --config : Print configure options used to build jemalloc. + --prefix : Print installation directory prefix. + --bindir : Print binary installation directory. + --datadir : Print data installation directory. + --includedir : Print include installation directory. + --libdir : Print library installation directory. + --mandir : Print manual page installation directory. + --cc : Print compiler used to build jemalloc. + --cflags : Print compiler flags used to build jemalloc. + --cppflags : Print preprocessor flags used to build jemalloc. + --ldflags : Print library flags used to build jemalloc. + --libs : Print libraries jemalloc was linked against. +EOF +} + +prefix="@prefix@" +exec_prefix="@exec_prefix@" + +case "$1" in +--help | -h) + usage + exit 0 + ;; +--version) + echo "@jemalloc_version@" + ;; +--revision) + echo "@rev@" + ;; +--config) + echo "@CONFIG@" + ;; +--prefix) + echo "@PREFIX@" + ;; +--bindir) + echo "@BINDIR@" + ;; +--datadir) + echo "@DATADIR@" + ;; +--includedir) + echo "@INCLUDEDIR@" + ;; +--libdir) + echo "@LIBDIR@" + ;; +--mandir) + echo "@MANDIR@" + ;; +--cc) + echo "@CC@" + ;; +--cflags) + echo "@CFLAGS@" + ;; +--cppflags) + echo "@CPPFLAGS@" + ;; +--ldflags) + echo "@LDFLAGS@ @EXTRA_LDFLAGS@" + ;; +--libs) + echo "@LIBS@" + ;; +*) + usage + exit 1 +esac diff --git a/bin/pprof b/bin/jeprof.in old mode 100755 new mode 100644 similarity index 95% rename from bin/pprof rename to bin/jeprof.in index 328138cd98..e7178078ae --- a/bin/pprof +++ b/bin/jeprof.in @@ -2,11 +2,11 @@ # Copyright (c) 1998-2007, Google Inc. # All rights reserved. -# +# # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: -# +# # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above @@ -16,7 +16,7 @@ # * Neither the name of Google Inc. nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. -# +# # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR @@ -40,28 +40,28 @@ # # Examples: # -# % tools/pprof "program" "profile" +# % tools/jeprof "program" "profile" # Enters "interactive" mode # -# % tools/pprof --text "program" "profile" +# % tools/jeprof --text "program" "profile" # Generates one line per procedure # -# % tools/pprof --gv "program" "profile" +# % tools/jeprof --gv "program" "profile" # Generates annotated call-graph and displays via "gv" # -# % tools/pprof --gv --focus=Mutex "program" "profile" +# % tools/jeprof --gv --focus=Mutex "program" "profile" # Restrict to code paths that involve an entry that matches "Mutex" # -# % tools/pprof --gv --focus=Mutex --ignore=string "program" "profile" +# % tools/jeprof --gv --focus=Mutex --ignore=string "program" "profile" # Restrict to code paths that involve an entry that matches "Mutex" # and does not match "string" # -# % tools/pprof --list=IBF_CheckDocid "program" "profile" +# % tools/jeprof --list=IBF_CheckDocid "program" "profile" # Generates disassembly listing of all routines with at least one # sample that match the --list= pattern. The listing is # annotated with the flat and cumulative sample counts at each line. # -# % tools/pprof --disasm=IBF_CheckDocid "program" "profile" +# % tools/jeprof --disasm=IBF_CheckDocid "program" "profile" # Generates disassembly listing of all routines with at least one # sample that match the --disasm= pattern. The listing is # annotated with the flat and cumulative sample counts at each PC value. @@ -72,10 +72,11 @@ use strict; use warnings; use Getopt::Long; +my $JEPROF_VERSION = "@jemalloc_version@"; my $PPROF_VERSION = "2.0"; # These are the object tools we use which can come from a -# user-specified location using --tools, from the PPROF_TOOLS +# user-specified location using --tools, from the JEPROF_TOOLS # environment variable, or from the environment. my %obj_tool_map = ( "objdump" => "objdump", @@ -144,13 +145,13 @@ my $sep_address = undef; sub usage_string { return < +jeprof [options] is a space separated list of profile names. -pprof [options] +jeprof [options] is a list of profile files where each file contains the necessary symbol mappings as well as profile data (likely generated with --raw). -pprof [options] +jeprof [options] is a remote form. Symbols are obtained from host:port$SYMBOL_PAGE Each name can be: @@ -161,9 +162,9 @@ pprof [options] $GROWTH_PAGE, $CONTENTION_PAGE, /pprof/wall, $CENSUSPROFILE_PAGE, or /pprof/filteredprofile. For instance: - pprof http://myserver.com:80$HEAP_PAGE + jeprof http://myserver.com:80$HEAP_PAGE If / is omitted, the service defaults to $PROFILE_PAGE (cpu profiling). -pprof --symbols +jeprof --symbols Maps addresses to symbol names. In this mode, stdin should be a list of library mappings, in the same format as is found in the heap- and cpu-profile files (this loosely matches that of /proc/self/maps @@ -202,7 +203,7 @@ Output type: --pdf Generate PDF to stdout --svg Generate SVG to stdout --gif Generate GIF to stdout - --raw Generate symbolized pprof data (useful with remote fetch) + --raw Generate symbolized jeprof data (useful with remote fetch) Heap-Profile Options: --inuse_space Display in-use (mega)bytes [default] @@ -223,6 +224,7 @@ Call-graph Options: --edgefraction= Hide edges below *total [default=.001] --maxdegree= Max incoming/outgoing edges per node [default=8] --focus= Focus on nodes matching + --thread= Show profile for thread --ignore= Ignore nodes matching --scale= Set GV scaling [default=0] --heapcheck Make nodes with non-0 object counts @@ -235,34 +237,34 @@ Miscellaneous: --version Version information Environment Variables: - PPROF_TMPDIR Profiles directory. Defaults to \$HOME/pprof - PPROF_TOOLS Prefix for object tools pathnames + JEPROF_TMPDIR Profiles directory. Defaults to \$HOME/jeprof + JEPROF_TOOLS Prefix for object tools pathnames Examples: -pprof /bin/ls ls.prof +jeprof /bin/ls ls.prof Enters "interactive" mode -pprof --text /bin/ls ls.prof +jeprof --text /bin/ls ls.prof Outputs one line per procedure -pprof --web /bin/ls ls.prof +jeprof --web /bin/ls ls.prof Displays annotated call-graph in web browser -pprof --gv /bin/ls ls.prof +jeprof --gv /bin/ls ls.prof Displays annotated call-graph via 'gv' -pprof --gv --focus=Mutex /bin/ls ls.prof +jeprof --gv --focus=Mutex /bin/ls ls.prof Restricts to code paths including a .*Mutex.* entry -pprof --gv --focus=Mutex --ignore=string /bin/ls ls.prof +jeprof --gv --focus=Mutex --ignore=string /bin/ls ls.prof Code paths including Mutex but not string -pprof --list=getdir /bin/ls ls.prof +jeprof --list=getdir /bin/ls ls.prof (Per-line) annotated source listing for getdir() -pprof --disasm=getdir /bin/ls ls.prof +jeprof --disasm=getdir /bin/ls ls.prof (Per-PC) annotated disassembly for getdir() -pprof http://localhost:1234/ +jeprof http://localhost:1234/ Enters "interactive" mode -pprof --text localhost:1234 +jeprof --text localhost:1234 Outputs one line per procedure for localhost:1234 -pprof --raw localhost:1234 > ./local.raw -pprof --text ./local.raw +jeprof --raw localhost:1234 > ./local.raw +jeprof --text ./local.raw Fetches a remote profile for later analysis and then analyzes it in text mode. EOF @@ -270,7 +272,8 @@ EOF sub version_string { return < \$main::opt_edgefraction, "maxdegree=i" => \$main::opt_maxdegree, "focus=s" => \$main::opt_focus, + "thread=s" => \$main::opt_thread, "ignore=s" => \$main::opt_ignore, "scale=i" => \$main::opt_scale, "heapcheck" => \$main::opt_heapcheck, @@ -562,66 +567,12 @@ sub Init() { } } -sub Main() { - Init(); - $main::collected_profile = undef; - @main::profile_files = (); - $main::op_time = time(); - - # Printing symbols is special and requires a lot less info that most. - if ($main::opt_symbols) { - PrintSymbols(*STDIN); # Get /proc/maps and symbols output from stdin - return; - } - - # Fetch all profile data - FetchDynamicProfiles(); - - # this will hold symbols that we read from the profile files - my $symbol_map = {}; - - # Read one profile, pick the last item on the list - my $data = ReadProfile($main::prog, pop(@main::profile_files)); - my $profile = $data->{profile}; - my $pcs = $data->{pcs}; - my $libs = $data->{libs}; # Info about main program and shared libraries - $symbol_map = MergeSymbols($symbol_map, $data->{symbols}); - - # Add additional profiles, if available. - if (scalar(@main::profile_files) > 0) { - foreach my $pname (@main::profile_files) { - my $data2 = ReadProfile($main::prog, $pname); - $profile = AddProfile($profile, $data2->{profile}); - $pcs = AddPcs($pcs, $data2->{pcs}); - $symbol_map = MergeSymbols($symbol_map, $data2->{symbols}); - } - } - - # Subtract base from profile, if specified - if ($main::opt_base ne '') { - my $base = ReadProfile($main::prog, $main::opt_base); - $profile = SubtractProfile($profile, $base->{profile}); - $pcs = AddPcs($pcs, $base->{pcs}); - $symbol_map = MergeSymbols($symbol_map, $base->{symbols}); - } +sub FilterAndPrint { + my ($profile, $symbols, $libs, $thread) = @_; # Get total data in profile my $total = TotalProfile($profile); - # Collect symbols - my $symbols; - if ($main::use_symbolized_profile) { - $symbols = FetchSymbols($pcs, $symbol_map); - } elsif ($main::use_symbol_page) { - $symbols = FetchSymbols($pcs); - } else { - # TODO(csilvers): $libs uses the /proc/self/maps data from profile1, - # which may differ from the data from subsequent profiles, especially - # if they were run on different machines. Use appropriate libs for - # each pc somehow. - $symbols = ExtractSymbols($libs, $pcs); - } - # Remove uniniteresting stack items $profile = RemoveUninterestingFrames($symbols, $profile); @@ -656,7 +607,9 @@ sub Main() { # (only matters when --heapcheck is given but we must be # compatible with old branches that did not pass --heapcheck always): if ($total != 0) { - printf("Total: %s %s\n", Unparse($total), Units()); + printf("Total%s: %s %s\n", + (defined($thread) ? " (t$thread)" : ""), + Unparse($total), Units()); } PrintText($symbols, $flat, $cumulative, -1); } elsif ($main::opt_raw) { @@ -692,6 +645,77 @@ sub Main() { } else { InteractiveMode($profile, $symbols, $libs, $total); } +} + +sub Main() { + Init(); + $main::collected_profile = undef; + @main::profile_files = (); + $main::op_time = time(); + + # Printing symbols is special and requires a lot less info that most. + if ($main::opt_symbols) { + PrintSymbols(*STDIN); # Get /proc/maps and symbols output from stdin + return; + } + + # Fetch all profile data + FetchDynamicProfiles(); + + # this will hold symbols that we read from the profile files + my $symbol_map = {}; + + # Read one profile, pick the last item on the list + my $data = ReadProfile($main::prog, pop(@main::profile_files)); + my $profile = $data->{profile}; + my $pcs = $data->{pcs}; + my $libs = $data->{libs}; # Info about main program and shared libraries + $symbol_map = MergeSymbols($symbol_map, $data->{symbols}); + + # Add additional profiles, if available. + if (scalar(@main::profile_files) > 0) { + foreach my $pname (@main::profile_files) { + my $data2 = ReadProfile($main::prog, $pname); + $profile = AddProfile($profile, $data2->{profile}); + $pcs = AddPcs($pcs, $data2->{pcs}); + $symbol_map = MergeSymbols($symbol_map, $data2->{symbols}); + } + } + + # Subtract base from profile, if specified + if ($main::opt_base ne '') { + my $base = ReadProfile($main::prog, $main::opt_base); + $profile = SubtractProfile($profile, $base->{profile}); + $pcs = AddPcs($pcs, $base->{pcs}); + $symbol_map = MergeSymbols($symbol_map, $base->{symbols}); + } + + # Collect symbols + my $symbols; + if ($main::use_symbolized_profile) { + $symbols = FetchSymbols($pcs, $symbol_map); + } elsif ($main::use_symbol_page) { + $symbols = FetchSymbols($pcs); + } else { + # TODO(csilvers): $libs uses the /proc/self/maps data from profile1, + # which may differ from the data from subsequent profiles, especially + # if they were run on different machines. Use appropriate libs for + # each pc somehow. + $symbols = ExtractSymbols($libs, $pcs); + } + + if (!defined($main::opt_thread)) { + FilterAndPrint($profile, $symbols, $libs); + } + if (defined($data->{threads})) { + foreach my $thread (sort { $a <=> $b } keys(%{$data->{threads}})) { + if (defined($main::opt_thread) && + ($main::opt_thread eq '*' || $main::opt_thread == $thread)) { + my $thread_profile = $data->{threads}{$thread}; + FilterAndPrint($thread_profile, $symbols, $libs, $thread); + } + } + } cleanup(); exit(0); @@ -780,14 +804,14 @@ sub InteractiveMode { $| = 1; # Make output unbuffered for interactive mode my ($orig_profile, $symbols, $libs, $total) = @_; - print STDERR "Welcome to pprof! For help, type 'help'.\n"; + print STDERR "Welcome to jeprof! For help, type 'help'.\n"; # Use ReadLine if it's installed and input comes from a console. if ( -t STDIN && !ReadlineMightFail() && defined(eval {require Term::ReadLine}) ) { - my $term = new Term::ReadLine 'pprof'; - while ( defined ($_ = $term->readline('(pprof) '))) { + my $term = new Term::ReadLine 'jeprof'; + while ( defined ($_ = $term->readline('(jeprof) '))) { $term->addhistory($_) if /\S/; if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) { last; # exit when we get an interactive command to quit @@ -795,7 +819,7 @@ sub InteractiveMode { } } else { # don't have readline while (1) { - print STDERR "(pprof) "; + print STDERR "(jeprof) "; $_ = ; last if ! defined $_ ; s/\r//g; # turn windows-looking lines into unix-looking lines @@ -988,7 +1012,7 @@ sub ProcessProfile { sub InteractiveHelpMessage { print STDERR <