Java Heap 内存分析

Perfetto工具下载: https://github.com/google/perfetto

heap_profile

首先确保刷机的版本是userdebug,否则无法抓到三方app的Java Heap信息

Java heap sampling

主要解决内存抖动问题

Java heap sampling抓取的是创建对象堆栈的采样(采样大小默认是4KB)

https://perfetto.dev/docs/data-sources/native-heap-profiler#java-heap-sampling

tools/heap_profile --name com.ss.android.ugc.aweme --heaps com.android.art --continuous-dump 1000 --duration 10000
  • —name com.ss.android.ugc.aweme 指定抓取的应用
  • —heaps com.android.art 指定Java Heap
  • —continuous-dump 1000 1s抓一次采样
  • —duration 10000 连续抓10s
  • —interval 采样的大小,默认是4096 (4KiB)

Java heap dump

主要解决内存泄漏问题

Java heap dump抓取的是当前Java heap的快照,不包括堆栈信息,slice堆叠表现为对象间引用关系,注意和Java heap smpling区分

https://perfetto.dev/docs/data-sources/java-heap-profiler

tools/java_heap_dump -n com.ss.android.ugc.aweme
  • —continuous-dump 5000 5s抓一次dump
  • —wait-for-oom 打开tracing直到发生OOM
  • —output FILE

No profiles generated

抓不到Java heap sampling可能存在的问题:

  1. 确保刷的包是userdebug
  2. 使用 adb shell su root setenforce 0 关闭SELinux
  3. 去应用商店下载一个新的app

ReportSample

在AllocWithNewTLAB打点

mirror::Object* Heap::AllocWithNewTLAB(Thread* self,
                                       AllocatorType allocator_type,
                                       size_t alloc_size,
                                       bool grow,
                                       size_t* bytes_allocated,
                                       size_t* usable_size,
                                       size_t* bytes_tl_bulk_allocated) {
  mirror::Object* ret = nullptr;
  bool take_sample = false;
  size_t bytes_until_sample = 0;
  bool jhp_enabled = GetHeapSampler().IsEnabled();
  ...
  // JavaHeapProfiler: Send the thread information about this allocation in case a sample is
  // requested.
  // This is the fallthrough from both the if and else if above cases => Cases that use TLAB.
  if (jhp_enabled) {
    if (take_sample) {
      // 参数是object和alloc_size
      GetHeapSampler().ReportSample(ret, alloc_size);
      // Update the bytes_until_sample now that the allocation is already done.
      GetHeapSampler().SetBytesUntilSample(bytes_until_sample);
    }
    VLOG(heap) << "JHP:Fallthrough Tlab allocation";
  }
  return ret;
}
 
void HeapSampler::ReportSample(art::mirror::Object* obj, size_t allocation_size) {
  VLOG(heap) << "JHP:***Report Perfetto Allocation: alloc_size: " << allocation_size;
  uint64_t perf_alloc_id = reinterpret_cast<uint64_t>(obj);
  VLOG(heap) << "JHP:***Report Perfetto Allocation: obj: " << perf_alloc_id;
#ifdef ART_TARGET_ANDROID
  AHeapProfile_reportSample(perfetto_heap_id_, perf_alloc_id, allocation_size);
#endif
}
 
 
__attribute__((visibility("default"))) bool
AHeapProfile_reportSample(uint32_t heap_id, uint64_t id, uint64_t size) {
  const AHeapInfo& heap = GetHeap(heap_id);
  if (!heap.enabled.load(std::memory_order_acquire)) {
    return false;
  }
  ...
  if (!client->RecordMalloc(heap_id, size, size, id)) {
    ShutdownLazy(client);
    return false;
  }
  return true;
}
 
// external/perfetto/src/profiling/memory/client.cc
// The stack grows towards numerically smaller addresses, so the stack layout
// of main calling malloc is as follows.
//
//               +------------+
//               |SendWireMsg |
// stackptr +--> +------------+ 0x1000
//               |RecordMalloc|    +
//               +------------+    |
//               | malloc     |    |
//               +------------+    |
//               |  main      |    v
// stackend  +-> +------------+ 0xffff
bool Client::RecordMalloc(uint32_t heap_id,
                          uint64_t sample_size,
                          uint64_t alloc_size,
                          uint64_t alloc_address) {
  if (PERFETTO_UNLIKELY(IsPostFork())) {
    return postfork_return_value_;
  }
  AllocMetadata metadata;
  // By the difference between calling conventions, the frame pointer might
  // include the current frame or not. So, using __builtin_frame_address()
  // on specific architectures such as riscv can make stack unwinding failed.
  // Thus, using __builtin_stack_address() or reading the stack pointer in
  // register data directly instead of using __builtin_frame_address() on riscv.
#if PERFETTO_BUILDFLAG(PERFETTO_ARCH_CPU_RISCV)
#if PERFETTO_HAS_BUILTIN_STACK_ADDRESS()
  const char* stackptr = reinterpret_cast<char*>(__builtin_stack_address());
  unwindstack::AsmGetRegs(metadata.register_data);
#else
  char* register_data = metadata.register_data;
  unwindstack::AsmGetRegs(register_data);
  const char* stackptr = reinterpret_cast<char*>(
      GetStackAddress(register_data, unwindstack::Regs::CurrentArch()));
  if (!stackptr) {
    PERFETTO_ELOG("Failed to get stack address.");
    shmem_.SetErrorState(SharedRingBuffer::kInvalidStackBounds);
    return false;
  }
#endif /* PERFETTO_HAS_BUILTIN_STACK_ADDRESS() */
#else
  // 这里拿到callback信息
  const char* stackptr = reinterpret_cast<char*>(__builtin_frame_address(0));
  unwindstack::AsmGetRegs(metadata.register_data);
#endif /* PERFETTO_BUILDFLAG(PERFETTO_ARCH_CPU_RISCV) */
  const char* stackend = GetStackEnd(stackptr);
  if (!stackend) {
    PERFETTO_ELOG("Failed to find stackend.");
    shmem_.SetErrorState(SharedRingBuffer::kInvalidStackBounds);
    return false;
  }
  uint64_t stack_size = static_cast<uint64_t>(stackend - stackptr);
  metadata.sample_size = sample_size;
  metadata.alloc_size = alloc_size;
  metadata.alloc_address = alloc_address;
  metadata.stack_pointer = reinterpret_cast<uint64_t>(stackptr);
  metadata.arch = unwindstack::Regs::CurrentArch();
  metadata.sequence_number =
      1 + sequence_number_[heap_id].fetch_add(1, std::memory_order_acq_rel);
  metadata.heap_id = heap_id;
 
  struct timespec ts;
  if (clock_gettime(CLOCK_MONOTONIC_COARSE, &ts) == 0) {
    metadata.clock_monotonic_coarse_timestamp =
        static_cast<uint64_t>(base::FromPosixTimespec(ts).count());
  } else {
    metadata.clock_monotonic_coarse_timestamp = 0;
  }
 
  WireMessage msg{};
  msg.record_type = RecordType::Malloc;
  msg.alloc_header = &metadata;
  msg.payload = const_cast<char*>(stackptr);
  msg.payload_size = static_cast<size_t>(stack_size);
 
  if (SendWireMessageWithRetriesIfBlocking(msg) == -1)
    return false;
 
  if (!shmem_.GetAndResetReaderPaused())
    return true;
  return SendControlSocketByte();
}