VP9 Video Corrupt/Black Screen After Merging (Stream Copy) in some Android devices

6 hours ago 1
ARTICLE AD BOX

Is there a specific Bitstream Filter required for VP9 in an MP4 container that I am missing, or is AVFMT_FLAG_AUTO_BSF actually required for VP9 but failing for another reason? Why would this stream copy work for H.264 but produce a black screen for VP9 on Android? Where could be the real problem, or is the code right but the problem is elsewhere?

I am using FFmpeg libraries (libavformat/libavcodec) so files via JNI in an Android app to merge separate video and audio tracks (remuxing/stream copying).

While H.264 and H.265 files merge perfectly, VP9 videos (specifically from Instagram/YouTube sources) result in a black screen or playback errors on some Android players after merging.

The Symptoms:

H.264/AAC merges and plays fine.

VP9/Opus or VP9/AAC results in a file that has the correct duration but no visible video or fails to initialize the decoder in some android devices.

I suspect the issue relates to Bitstream Filters (BSF) or timestamp handling.

What I've tried: I updated my code to include three specific fixes:

Skipping AVFMT_FLAG_AUTO_BSF for VP9: I noticed auto-bsf might be corrupting VP9 headers.

Setting avoid_negative_ts: To handle sources with negative start times.

Filtering out ATTACHED_PIC: To ensure I don't accidentally mux a thumbnail as the main video stream.

My JNI Implementation:

#include <jni.h> #include <libavformat/avformat.h> #include <android/log.h> #ifdef NDEBUG // NDEBUG is automatically defined in release builds // Release build - disable debug logs #define LOGD(...) #define LOGE(...) #else // Debug build - enable debug logs #define TAG "Downloader" #define LOGD(...) __android_log_print(ANDROID_LOG_DEBUG, TAG, __VA_ARGS__) #define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, TAG, __VA_ARGS__) #endif #include <libavutil/cpu.h> JNIEXPORT jint JNICALL Java_com_harrshbermann_SocialMate_google_logEvent( JNIEnv *env, jobject thiz, jstring jVideoPath, jstring jAudioPath, jstring jOutPath) { /* // Inside your JNI function: int cpu_flags = av_get_cpu_flags(); if (cpu_flags & AV_CPU_FLAG_NEON) { LOGD("FFMpeg Check: NEON is ENABLED and active!"); } else { LOGE("FFMpeg Check: NEON is NOT detected!"); } if (cpu_flags & AV_CPU_FLAG_ARMV8) { LOGD("FFMpeg Check: ARMv8 optimizations are active!"); } */ // ── Declare ALL variables at top (C89 compliance for NDK) ──────────────── AVFormatContext *ifmt_ctx_v = NULL, *ifmt_ctx_a = NULL, *ofmt_ctx = NULL; AVStream *in_v, *in_a, *out_v, *out_a; AVPacket *pkt = NULL; int ret = 0; int v_idx, a_idx; int v_done = 0, a_done = 0; int64_t v_dts = 0, a_dts = 0; int file_opened = 0; // track if avio_open succeeded const char *vPath = (*env)->GetStringUTFChars(env, jVideoPath, 0); const char *aPath = (*env)->GetStringUTFChars(env, jAudioPath, 0); const char *oPath = (*env)->GetStringUTFChars(env, jOutPath, 0); // ── Allocate packet ─────────────────────────────────────────────────────── pkt = av_packet_alloc(); if (!pkt) { ret = -1; goto cleanup; } // ── Open inputs ─────────────────────────────────────────────────────────── if (avformat_open_input(&ifmt_ctx_v, vPath, NULL, NULL) < 0) { LOGE("Failed to open video"); ret = -2; goto cleanup; } if (avformat_open_input(&ifmt_ctx_a, aPath, NULL, NULL) < 0) { LOGE("Failed to open audio"); ret = -3; goto cleanup; } if (avformat_find_stream_info(ifmt_ctx_v, NULL) < 0 || avformat_find_stream_info(ifmt_ctx_a, NULL) < 0) { LOGE("Failed to find stream info"); ret = -4; goto cleanup; } // ── Find best streams ───────────────────────────────────────────────────── v_idx = av_find_best_stream(ifmt_ctx_v, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0); a_idx = av_find_best_stream(ifmt_ctx_a, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0); if (v_idx < 0 || a_idx < 0) { LOGE("Could not find video/audio stream (v=%d, a=%d)", v_idx, a_idx); ret = -5; goto cleanup; } in_v = ifmt_ctx_v->streams[v_idx]; in_a = ifmt_ctx_a->streams[a_idx]; // ── Allocate output context ─────────────────────────────────────────────── if (avformat_alloc_output_context2(&ofmt_ctx, NULL, NULL, oPath) < 0 || !ofmt_ctx) { LOGE("Failed to alloc output context"); ret = -6; goto cleanup; } // ── Create output streams ───────────────────────────────────────────────── out_v = avformat_new_stream(ofmt_ctx, NULL); out_a = avformat_new_stream(ofmt_ctx, NULL); if (!out_v || !out_a) { LOGE("Failed to create output streams"); ret = -7; goto cleanup; } // avcodec_parameters_copy handles extradata internally — no manual malloc needed avcodec_parameters_copy(out_v->codecpar, in_v->codecpar); avcodec_parameters_copy(out_a->codecpar, in_a->codecpar); out_v->codecpar->codec_tag = 0; // reset for container compatibility out_a->codecpar->codec_tag = 0; // ── Open output file ────────────────────────────────────────────────────── if (!(ofmt_ctx->oformat->flags & AVFMT_NOFILE)) { if (avio_open(&ofmt_ctx->pb, oPath, AVIO_FLAG_WRITE) < 0) { LOGE("Failed to open output file"); ret = -8; goto cleanup; } file_opened = 1; } // Auto BSF fixes Annex-B vs. AVCC format mismatch for H.264/H.265 in MP4 ofmt_ctx->flags |= AVFMT_FLAG_AUTO_BSF; if (avformat_write_header(ofmt_ctx, NULL) < 0) { LOGE("Failed to write header"); ret = -9; goto cleanup; } // ── Interleaved muxing loop ─────────────────────────────────────────────── while (!v_done || !a_done) { AVFormatContext *src; AVStream *in_st, *out_st; int target_idx, pick_video, got; pick_video = !v_done && (a_done || av_compare_ts(v_dts, in_v->time_base, a_dts, in_a->time_base) <= 0); src = pick_video ? ifmt_ctx_v : ifmt_ctx_a; in_st = pick_video ? in_v : in_a; out_st = pick_video ? out_v : out_a; target_idx = pick_video ? v_idx : a_idx; // Skip non-target packets (e.g. subtitles in same container) got = 0; while (av_read_frame(src, pkt) >= 0) { if (pkt->stream_index == target_idx) { got = 1; break; } av_packet_unref(pkt); } if (!got) { if (pick_video) v_done = 1; else a_done = 1; continue; } // Update DTS tracker for next interleaving decision if (pick_video) v_dts = pkt->dts; else a_dts = pkt->dts; // Rescale timestamps with proper rounding flags pkt->pts = av_rescale_q_rnd(pkt->pts, in_st->time_base, out_st->time_base, AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX); pkt->dts = av_rescale_q_rnd(pkt->dts, in_st->time_base, out_st->time_base, AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX); pkt->duration = av_rescale_q(pkt->duration, in_st->time_base, out_st->time_base); pkt->pos = -1; pkt->stream_index = out_st->index; // use actual index, not hardcoded 0/1 if (av_interleaved_write_frame(ofmt_ctx, pkt) < 0) { LOGE("Error writing %s packet", pick_video ? "video" : "audio"); } av_packet_unref(pkt); } av_write_trailer(ofmt_ctx); LOGD("mergeAV finished successfully"); cleanup: if (pkt) av_packet_free(&pkt); if (ifmt_ctx_v) avformat_close_input(&ifmt_ctx_v); if (ifmt_ctx_a) avformat_close_input(&ifmt_ctx_a); if (ofmt_ctx) { if (file_opened) avio_closep(&ofmt_ctx->pb); avformat_free_context(ofmt_ctx); } (*env)->ReleaseStringUTFChars(env, jVideoPath, vPath); (*env)->ReleaseStringUTFChars(env, jAudioPath, aPath); (*env)->ReleaseStringUTFChars(env, jOutPath, oPath); return ret; }

Changes I made:

JNIEXPORT jint JNICALL Java_com_example_app_NativeMuxer_mergeAV(JNIEnv *env, jobject thiz, jstring jVideoPath, jstring jAudioPath, jstring jOutPath) { AVFormatContext *ifmt_ctx_v = NULL, *ifmt_ctx_a = NULL, *ofmt_ctx = NULL; AVPacket *pkt = av_packet_alloc(); int v_idx = -1, a_idx = -1, ret = 0; // ... [Opening inputs and finding stream info] ... // Fix 1: Explicitly skip thumbnail streams for (int i = 0; i < (int)ifmt_ctx_v->nb_streams; i++) { if (ifmt_ctx_v->streams[i]->disposition & AV_DISPOSITION_ATTACHED_PIC) continue; if (ifmt_ctx_v->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) { v_idx = i; break; } } // Detect VP9 int is_vp = (ifmt_ctx_v->streams[v_idx]->codecpar->codec_id == AV_CODEC_ID_VP9); avformat_alloc_output_context2(&ofmt_ctx, NULL, NULL, oPath); // Create streams and copy parameters // ... [avformat_new_stream & avcodec_parameters_copy] ... out_v->codecpar->codec_tag = 0; out_a->codecpar->codec_tag = 0; // Fix 2: Skip AUTO_BSF for VP9 to avoid bitstream corruption if (!is_vp) { ofmt_ctx->flags |= AVFMT_FLAG_AUTO_BSF; } // Fix 3: Handle negative timestamps ofmt_ctx->avoid_negative_ts = AVFMT_AVOID_NEG_TS_MAKE_NON_NEGATIVE; avformat_write_header(ofmt_ctx, NULL); // ... [Interleaved muxing loop with av_rescale_q_rnd] ... av_write_trailer(ofmt_ctx); // ... [Cleanup] ... return ret; }
Read Entire Article