渲染线程flush layer耗时问题

问题背景:

相机场景下调出控制中心,RendThreader有一帧执行renderFrame有较长的 running耗时,

原因分析:

打开trace开关:adb shell setprop persist.sys.hwui.skia_atrace_enabled true)

可以看到异常的一帧flush layer在循环执行 virtual bool GrOpsTask::onExecute(GrOpFlushState *)函数;

bool GrDrawingManager::executeRenderTasks(GrOpFlushState* flushState) {
#if GR_FLUSH_TIME_OP_SPEW
    SkDebugf("Flushing %d opsTasks\n", fDAG.count());
    for (int i = 0; i < fDAG.count(); ++i) {
        if (fDAG[i]) {
            SkString label;
            label.printf("task %d/%d", i, fDAG.count());
            fDAG[i]->dump(label, {}, true, true);
        }
    }
#endif
 
 
    bool anyRenderTasksExecuted = false;
 
 
    for (const auto& renderTask : fDAG) {
        if (!renderTask || !renderTask->isInstantiated()) {
             continue;
        }
 
 
        SkASSERT(renderTask->deferredProxiesAreInstantiated());
 
 
        renderTask->prepare(flushState);
    }
 
 
    // Upload all data to the GPU
    flushState->preExecuteDraws();
 
 
    // For Vulkan, if we have too many oplists to be flushed we end up allocating a lot of resources
    // for each command buffer associated with the oplists. If this gets too large we can cause the
    // devices to go OOM. In practice we usually only hit this case in our tests, but to be safe we
    // put a cap on the number of oplists we will execute before flushing to the GPU to relieve some
    // memory pressure.
    static constexpr int kMaxRenderTasksBeforeFlush = 100;
    int numRenderTasksExecuted = 0;
 
 
    // Execute the onFlush renderTasks first, if any.
    for (sk_sp<GrRenderTask>& onFlushRenderTask : fOnFlushRenderTasks) {
        if (!onFlushRenderTask->execute(flushState)) {
            SkDebugf("WARNING: onFlushRenderTask failed to execute.\n");
        }
        SkASSERT(onFlushRenderTask->unique());
        onFlushRenderTask->disown(this);
        onFlushRenderTask = nullptr;
        if (++numRenderTasksExecuted >= kMaxRenderTasksBeforeFlush) {
            flushState->gpu()->submitToGpu(false);
            numRenderTasksExecuted = 0;
        }
    }
    fOnFlushRenderTasks.reset();
 
 
    // Execute the normal op lists.
    for (const auto& renderTask : fDAG) {
        SkASSERT(renderTask);
        if (!renderTask->isInstantiated()) {
            continue;
        }
 
 
        if (renderTask->execute(flushState)) {
            anyRenderTasksExecuted = true;
        }
        if (++numRenderTasksExecuted >= kMaxRenderTasksBeforeFlush) {
            flushState->gpu()->submitToGpu(false);
            numRenderTasksExecuted = 0;
        }
    }
 
 
    SkASSERT(!flushState->opsRenderPass());
    SkASSERT(fTokenTracker.nextDrawToken() == fTokenTracker.nextTokenToFlush());
 
 
    // We reset the flush state before the RenderTasks so that the last resources to be freed are
    // those that are written to in the RenderTasks. This helps to make sure the most recently used
    // resources are the last to be purged by the resource cache.
    flushState->reset();
 
 
    return anyRenderTasksExecuted;
}

drawPaint()、drawPath()、drawTextBlob()、drawRRect()等多个函数都会添加fDAG数量,fDAG添加路径

异常一帧确定在GrDrawingManager::newOpsTask中添加了fDAG数量。

sk_sp<GrOpsTask> GrDrawingManager::newOpsTask(GrSurfaceProxyView surfaceView,
                                              sk_sp<GrArenas> arenas,
                                              bool flushTimeOpsTask) {
    SkDEBUGCODE(this->validate());
    SkASSERT(fContext);
 
    this->closeActiveOpsTask();
 
    sk_sp<GrOpsTask> opsTask(new GrOpsTask(this,
                                           std::move(surfaceView),
                                           fContext->priv().auditTrail(),
                                           std::move(arenas)));
    SkASSERT(this->getLastRenderTask(opsTask->target(0)) == opsTask.get());
 
    if (flushTimeOpsTask) {
        fOnFlushRenderTasks.push_back(opsTask);
    } else {
        this->appendTask(opsTask);
        fActiveOpsTask = opsTask.get();
    }
 
    SkDEBUGCODE(this->validate());
    return opsTask;
}

执行较长的这一帧,执行onExecute 11次,其中执行int SkCanvas::saveLayer(const SkCanvas::SaveLayerRec &) 2 次,SkGpuDevice::drawDevice 2 次,执行void SkCanvas::drawPaint(const SkPaint &) 7次,一共newOpsTask appendtask 11次,故onExecute执行11次,running时间会比较长

1.drawLayer [QSControlCenterHeaderView] 1080.0 x 381.0

2.drawLayer [QSControlExpandTileView] 407.0 x 210.0

void SkCanvas::drawPaint(const SkPaint &) 添加一个

3.drawLayer [QSBigTileView] 407.0 x 210.0

void SkCanvas::drawPaint(const SkPaint &) 添加一个

4.drawLayer [QSBigTileView] 407.0 x 210.0

void SkCanvas::drawPaint(const SkPaint &) 添加一个

5.drawLayer [QSBigTileView] 407.0 x 210.0

void SkCanvas::drawPaint(const SkPaint &) 添加一个

6.drawLayer [AutoBrightnessView] 170.0 x 170.0

void SkCanvas::drawPaint(const SkPaint &) 添加一个

7.drawLayer [QCToggleSliderView] 625.0 x 170.0

void SkCanvas::drawPaint(const SkPaint &) 添加一个

int SkCanvas::saveLayer(const SkCanvas::SaveLayerRec &) 添加两个

SkGpuDevice::drawDevice 添加两个

8.drawLayer [LinearLayout] 842.0 x 399.0

void SkCanvas::drawPaint(const SkPaint &) 添加一个

共添加11次,执行11次virtual bool GrOpsTask::onExecute(GrOpFlushState *)。

void SkCanvas::drawPaint的调用时机:

堆栈信息:

03-07 15:04:51.302 5043 7711 D gwq:drawPaint: #00 pc 0000000000266e70 /system/lib64/libhwui.so (SkCanvas::drawColor(SkRGBA4f<(SkAlphaType)3> const&, SkBlendMode)+216)

03-07 15:04:51.302 5043 7711 D gwq:drawPaint: #01 pc 00000000005027f8 /system/lib64/libhwui.so (android::uirenderer::skiapipeline::SkiaPipeline::renderFrameImpl(SkRect const&, std::__1::vector<android::spandroid::uirenderer::RenderNode, std::__1::allocator<android::spandroid::uirenderer::RenderNode > > const&, bool, android::uirenderer::Rect const&, SkCanvas, SkMatrix const&)+368)

03-07 15:04:51.302 5043 7711 D gwq:drawPaint: #02 pc 0000000000347b64 /system/lib64/libhwui.so (android::uirenderer::skiapipeline::SkiaPipeline::renderFrame(android::uirenderer::LayerUpdateQueue const&, SkRect const&, std::__1::vector<android::spandroid::uirenderer::RenderNode, std::__1::allocator<android::spandroid::uirenderer::RenderNode > > const&, bool, android::uirenderer::Rect const&, sk_sp, SkMatrix const&)+184)

03-07 15:04:51.302 5043 7711 D gwq:drawPaint: #03 pc 000000000034768c /system/lib64/libhwui.so (android::uirenderer::skiapipeline::SkiaOpenGLPipeline::draw(android::uirenderer::renderthread::Frame const&, SkRect const&, SkRect const&, android::uirenderer::LightGeometry const&, android::uirenderer::LayerUpdateQueue, android::uirenderer::Rect const&, bool, android::uirenderer::LightInfo const&, std::__1::vector<android::spandroid::uirenderer::RenderNode, std::__1::allocator<android::spandroid::uirenderer::RenderNode > > const&, android::uirenderer::FrameInfoVisualizer)+436)

03-07 15:04:51.302 5043 7711 D gwq:drawPaint: #04 pc 00000000003c58b4 /system/lib64/libhwui.so (android::uirenderer::renderthread::CanvasContext::draw()+1176)

03-07 15:04:51.302 5043 7711 D gwq:drawPaint: #05 pc 00000000003c4350 /system/lib64/libhwui.so (_ZNSt3__110__function6__funcIZN7android10uirenderer12renderthread13DrawFrameTask11postAndWaitEvE3c1671e787f244890c877724752face20+784)

03-07 15:04:51.302 5043 7711 D gwq:drawPaint: #06 pc 00000000003d5280 /system/lib64/libhwui.so (android::uirenderer::WorkQueue::process()+160)

03-07 15:04:51.302 5043 7711 D gwq:drawPaint: #07 pc 00000000003d4fe0 /system/lib64/libhwui.so (android::uirenderer::renderthread::RenderThread::threadLoop()+88)

03-07 15:04:51.302 5043 7711 D gwq:drawPaint: #08 pc 000000000001358c /system/lib64/libutils.so (android::Thread::_threadLoop(void)+264)

03-07 15:04:51.302 5043 7711 D gwq:drawPaint: #09 pc 0000000000012de8 /system/lib64/libutils.so (thread_data_t::trampoline(thread_data_t const)+408)

03-07 15:04:51.302 5043 7711 D gwq:drawPaint: #10 pc 00000000000f0d34 /apex/com.android.runtime/lib64/bionic/libc.so (__pthread_start(void)+264)

03-07 15:04:51.302 5043 7711 D gwq:drawPaint: #11 pc 000000000008d57c /apex/com.android.runtime/lib64/bionic/libc.so (__start_thread+68)

ui 线程被阻塞以后,渲染线程会调用到上一次插入到队列里的 DrawFrameTask.run 方法,run() 这里会先调用 syncFrameState,这个方法主要是用于同步 java 层的各类数据。

调用 CanvasContext::prepareTree 来将前面在 java 层构建的 DrawOp 树同步到 c++ 层,以便后续运行 OpengGL 的命令。这里关键的调用链是:CanvasContext::prepareTree——>RenderNode::prepareTree——>RenderNode::prepareTreeImpl

void CanvasContext::prepareTree(TreeInfo& info, int64_t* uiFrameInfo, int64_t syncQueued,
                                RenderNode* target) {
    mRenderThread.removeFrameCallback(this);
 
 
    // If the previous frame was dropped we don't need to hold onto it, so
    // just keep using the previous frame's structure instead
    if (!wasSkipped(mCurrentFrameInfo)) {
        mCurrentFrameInfo = mJankTracker.startFrame();
    }
 
 
    mCurrentFrameInfo->importUiThreadInfo(uiFrameInfo);
    mCurrentFrameInfo->set(FrameInfoIndex::SyncQueued) = syncQueued;
    mCurrentFrameInfo->markSyncStart();
 
 
    info.damageAccumulator = &mDamageAccumulator;
    info.layerUpdateQueue = &mLayerUpdateQueue;
    info.damageGenerationId = mDamageId++;
    info.out.canDrawThisFrame = true;
 
 
    mAnimationContext->startFrame(info.mode);
    //底层根节点RenderNode准备,树形结构遍历
    for (const sp<RenderNode>& node : mRenderNodes) {
        // Only the primary target node will be drawn full - all other nodes would get drawn in
        // real time mode. In case of a window, the primary node is the window content and the other
        // node(s) are non client / filler nodes.
        info.mode = (node.get() == target ? TreeInfo::MODE_FULL : TreeInfo::MODE_RT_ONLY);
        node->prepareTree(info);
        GL_CHECKPOINT(MODERATE);
    }
    mAnimationContext->runRemainingAnimations(info);
    GL_CHECKPOINT(MODERATE);
 
 
    freePrefetchedLayers();
    GL_CHECKPOINT(MODERATE);
 
 
    mIsDirty = true;
 
 
    if (CC_UNLIKELY(!hasSurface())) {
        mCurrentFrameInfo->addFlag(FrameInfoFlags::SkippedFrame);
        info.out.canDrawThisFrame = false;
        return;
    }
 
 
    if (CC_LIKELY(mSwapHistory.size() && !Properties::forceDrawFrame)) {
        nsecs_t latestVsync = mRenderThread.timeLord().latestVsync();
        SwapHistory& lastSwap = mSwapHistory.back();
        nsecs_t vsyncDelta = std::abs(lastSwap.vsyncTime - latestVsync);
        // The slight fudge-factor is to deal with cases where
        // the vsync was estimated due to being slow handling the signal.
        // See the logic in TimeLord#computeFrameTimeNanos or in
        // Choreographer.java for details on when this happens
        if (vsyncDelta < 2_ms) {
            // Already drew for this vsync pulse, UI draw request missed
            // the deadline for RT animations
            info.out.canDrawThisFrame = false;
        }
    } else {
        info.out.canDrawThisFrame = true;
    }
 
 
    // TODO: Do we need to abort out if the backdrop is added but not ready? Should that even
    // be an allowable combination?
    if (mRenderNodes.size() > 2 && !mRenderNodes[1]->isRenderable()) {
        info.out.canDrawThisFrame = false;
    }
 
 
    if (info.out.canDrawThisFrame) {
        int err = mNativeSurface->reserveNext();
        if (err != OK) {
            mCurrentFrameInfo->addFlag(FrameInfoFlags::SkippedFrame);
            info.out.canDrawThisFrame = false;
            ALOGW("reserveNext failed, error = %d (%s)", err, strerror(-err));
            if (err != TIMED_OUT) {
                // A timed out surface can still recover, but assume others are permanently dead.
                setSurface(nullptr);
                return;
            }
        }
    } else {
        mCurrentFrameInfo->addFlag(FrameInfoFlags::SkippedFrame);
    }
 
 
    bool postedFrameCallback = false;
    if (info.out.hasAnimations || !info.out.canDrawThisFrame) {
        if (CC_UNLIKELY(!Properties::enableRTAnimations)) {
            info.out.requiresUiRedraw = true;
        }
        if (!info.out.requiresUiRedraw) {
            // If animationsNeedsRedraw is set don't bother posting for an RT anim
            // as we will just end up fighting the UI thread.
            mRenderThread.postFrameCallback(this);
            postedFrameCallback = true;
        }
    }
 
 
    if (!postedFrameCallback &&
        info.out.animatedImageDelay != TreeInfo::Out::kNoAnimatedImageDelay) {
        // Subtract the time of one frame so it can be displayed on time.
        const nsecs_t kFrameTime = mRenderThread.timeLord().frameIntervalNanos();
        if (info.out.animatedImageDelay <= kFrameTime) {
            mRenderThread.postFrameCallback(this);
        } else {
            const auto delay = info.out.animatedImageDelay - kFrameTime;
            int genId = mGenerationID;
            mRenderThread.queue().postDelayed(delay, [this, genId]() {
                if (mGenerationID == genId) {
                    mRenderThread.postFrameCallback(this);
                }
            });
        }
    }
}

解决方法: