Java Heap 内存分析
Perfetto工具下载: https://github.com/google/perfetto
heap_profile
首先确保刷机的版本是userdebug,否则无法抓到三方app的Java Heap信息

Java heap sampling
主要解决内存抖动问题
Java heap sampling抓取的是创建对象堆栈的采样(采样大小默认是4KB)
https://perfetto.dev/docs/data-sources/native-heap-profiler#java-heap-sampling
tools/heap_profile --name com.ss.android.ugc.aweme --heaps com.android.art --continuous-dump 1000 --duration 10000- —name com.ss.android.ugc.aweme 指定抓取的应用
- —heaps com.android.art 指定Java Heap
- —continuous-dump 1000 1s抓一次采样
- —duration 10000 连续抓10s
- —interval 采样的大小,默认是4096 (4KiB)


Java heap dump
主要解决内存泄漏问题
Java heap dump抓取的是当前Java heap的快照,不包括堆栈信息,slice堆叠表现为对象间引用关系,注意和Java heap smpling区分
https://perfetto.dev/docs/data-sources/java-heap-profiler
tools/java_heap_dump -n com.ss.android.ugc.aweme- —continuous-dump 5000 5s抓一次dump
- —wait-for-oom 打开tracing直到发生OOM
- —output FILE

No profiles generated
抓不到Java heap sampling可能存在的问题:
- 确保刷的包是userdebug
- 使用 adb shell su root setenforce 0 关闭SELinux
- 去应用商店下载一个新的app
ReportSample
在AllocWithNewTLAB打点
mirror::Object* Heap::AllocWithNewTLAB(Thread* self,
AllocatorType allocator_type,
size_t alloc_size,
bool grow,
size_t* bytes_allocated,
size_t* usable_size,
size_t* bytes_tl_bulk_allocated) {
mirror::Object* ret = nullptr;
bool take_sample = false;
size_t bytes_until_sample = 0;
bool jhp_enabled = GetHeapSampler().IsEnabled();
...
// JavaHeapProfiler: Send the thread information about this allocation in case a sample is
// requested.
// This is the fallthrough from both the if and else if above cases => Cases that use TLAB.
if (jhp_enabled) {
if (take_sample) {
// 参数是object和alloc_size
GetHeapSampler().ReportSample(ret, alloc_size);
// Update the bytes_until_sample now that the allocation is already done.
GetHeapSampler().SetBytesUntilSample(bytes_until_sample);
}
VLOG(heap) << "JHP:Fallthrough Tlab allocation";
}
return ret;
}
void HeapSampler::ReportSample(art::mirror::Object* obj, size_t allocation_size) {
VLOG(heap) << "JHP:***Report Perfetto Allocation: alloc_size: " << allocation_size;
uint64_t perf_alloc_id = reinterpret_cast<uint64_t>(obj);
VLOG(heap) << "JHP:***Report Perfetto Allocation: obj: " << perf_alloc_id;
#ifdef ART_TARGET_ANDROID
AHeapProfile_reportSample(perfetto_heap_id_, perf_alloc_id, allocation_size);
#endif
}
__attribute__((visibility("default"))) bool
AHeapProfile_reportSample(uint32_t heap_id, uint64_t id, uint64_t size) {
const AHeapInfo& heap = GetHeap(heap_id);
if (!heap.enabled.load(std::memory_order_acquire)) {
return false;
}
...
if (!client->RecordMalloc(heap_id, size, size, id)) {
ShutdownLazy(client);
return false;
}
return true;
}
// external/perfetto/src/profiling/memory/client.cc
// The stack grows towards numerically smaller addresses, so the stack layout
// of main calling malloc is as follows.
//
// +------------+
// |SendWireMsg |
// stackptr +--> +------------+ 0x1000
// |RecordMalloc| +
// +------------+ |
// | malloc | |
// +------------+ |
// | main | v
// stackend +-> +------------+ 0xffff
bool Client::RecordMalloc(uint32_t heap_id,
uint64_t sample_size,
uint64_t alloc_size,
uint64_t alloc_address) {
if (PERFETTO_UNLIKELY(IsPostFork())) {
return postfork_return_value_;
}
AllocMetadata metadata;
// By the difference between calling conventions, the frame pointer might
// include the current frame or not. So, using __builtin_frame_address()
// on specific architectures such as riscv can make stack unwinding failed.
// Thus, using __builtin_stack_address() or reading the stack pointer in
// register data directly instead of using __builtin_frame_address() on riscv.
#if PERFETTO_BUILDFLAG(PERFETTO_ARCH_CPU_RISCV)
#if PERFETTO_HAS_BUILTIN_STACK_ADDRESS()
const char* stackptr = reinterpret_cast<char*>(__builtin_stack_address());
unwindstack::AsmGetRegs(metadata.register_data);
#else
char* register_data = metadata.register_data;
unwindstack::AsmGetRegs(register_data);
const char* stackptr = reinterpret_cast<char*>(
GetStackAddress(register_data, unwindstack::Regs::CurrentArch()));
if (!stackptr) {
PERFETTO_ELOG("Failed to get stack address.");
shmem_.SetErrorState(SharedRingBuffer::kInvalidStackBounds);
return false;
}
#endif /* PERFETTO_HAS_BUILTIN_STACK_ADDRESS() */
#else
// 这里拿到callback信息
const char* stackptr = reinterpret_cast<char*>(__builtin_frame_address(0));
unwindstack::AsmGetRegs(metadata.register_data);
#endif /* PERFETTO_BUILDFLAG(PERFETTO_ARCH_CPU_RISCV) */
const char* stackend = GetStackEnd(stackptr);
if (!stackend) {
PERFETTO_ELOG("Failed to find stackend.");
shmem_.SetErrorState(SharedRingBuffer::kInvalidStackBounds);
return false;
}
uint64_t stack_size = static_cast<uint64_t>(stackend - stackptr);
metadata.sample_size = sample_size;
metadata.alloc_size = alloc_size;
metadata.alloc_address = alloc_address;
metadata.stack_pointer = reinterpret_cast<uint64_t>(stackptr);
metadata.arch = unwindstack::Regs::CurrentArch();
metadata.sequence_number =
1 + sequence_number_[heap_id].fetch_add(1, std::memory_order_acq_rel);
metadata.heap_id = heap_id;
struct timespec ts;
if (clock_gettime(CLOCK_MONOTONIC_COARSE, &ts) == 0) {
metadata.clock_monotonic_coarse_timestamp =
static_cast<uint64_t>(base::FromPosixTimespec(ts).count());
} else {
metadata.clock_monotonic_coarse_timestamp = 0;
}
WireMessage msg{};
msg.record_type = RecordType::Malloc;
msg.alloc_header = &metadata;
msg.payload = const_cast<char*>(stackptr);
msg.payload_size = static_cast<size_t>(stack_size);
if (SendWireMessageWithRetriesIfBlocking(msg) == -1)
return false;
if (!shmem_.GetAndResetReaderPaused())
return true;
return SendControlSocketByte();
}