diff --git a/RELEASENOTES.md b/RELEASENOTES.md index 62a0143a876..5e9ea278126 100644 --- a/RELEASENOTES.md +++ b/RELEASENOTES.md @@ -50,6 +50,8 @@ `ColorInfo.colorSpace`, `ColorInfo.colorTransfer`, and `ColorInfo.colorRange` values ([#692](https://github.com/androidx/media/pull/692)). + * Fix wrong keyframe detection for TS H264 streams + ([#864](https://github.com/androidx/media/pull/864)). * Audio: * Video: * Add workaround for a device issue on Galaxy Tab S7 FE, Chromecast with diff --git a/libraries/extractor/src/main/java/androidx/media3/extractor/ts/H264Reader.java b/libraries/extractor/src/main/java/androidx/media3/extractor/ts/H264Reader.java index c6cb9376162..07786a4ba5e 100644 --- a/libraries/extractor/src/main/java/androidx/media3/extractor/ts/H264Reader.java +++ b/libraries/extractor/src/main/java/androidx/media3/extractor/ts/H264Reader.java @@ -183,7 +183,7 @@ private void startNalUnit(long position, int nalUnitType, long pesTimeUs) { pps.startNalUnit(nalUnitType); } sei.startNalUnit(nalUnitType); - sampleReader.startNalUnit(position, nalUnitType, pesTimeUs); + sampleReader.startNalUnit(position, nalUnitType, pesTimeUs, randomAccessIndicator); } @RequiresNonNull("sampleReader") @@ -253,8 +253,7 @@ private void endNalUnit(long position, int offset, int discardPadding, long pesT seiWrapper.setPosition(4); // NAL prefix and nal_unit() header. seiReader.consume(pesTimeUs, seiWrapper); } - boolean sampleIsKeyFrame = - sampleReader.endNalUnit(position, offset, hasOutputFormat, randomAccessIndicator); + boolean sampleIsKeyFrame = sampleReader.endNalUnit(position, offset, hasOutputFormat); if (sampleIsKeyFrame) { // This is either an IDR frame or the first I-frame since the random access indicator, so mark // it as a keyframe. Clear the flag so that subsequent non-IDR I-frames are not marked as @@ -297,6 +296,7 @@ private static final class SampleReader { private long samplePosition; private long sampleTimeUs; private boolean sampleIsKeyframe; + private boolean randomAccessIndicator; public SampleReader( TrackOutput output, boolean allowNonIdrKeyframes, boolean detectAccessUnits) { @@ -330,10 +330,12 @@ public void reset() { sliceHeader.clear(); } - public void startNalUnit(long position, int type, long pesTimeUs) { + public void startNalUnit( + long position, int type, long pesTimeUs, boolean randomAccessIndicator) { nalUnitType = type; nalUnitTimeUs = pesTimeUs; nalUnitStartPosition = position; + this.randomAccessIndicator = randomAccessIndicator; if ((allowNonIdrKeyframes && nalUnitType == NalUnitUtil.NAL_UNIT_TYPE_NON_IDR) || (detectAccessUnits && (nalUnitType == NalUnitUtil.NAL_UNIT_TYPE_IDR @@ -481,8 +483,7 @@ public void appendToNalUnit(byte[] data, int offset, int limit) { isFilling = false; } - public boolean endNalUnit( - long position, int offset, boolean hasOutputFormat, boolean randomAccessIndicator) { + public boolean endNalUnit(long position, int offset, boolean hasOutputFormat) { if (nalUnitType == NalUnitUtil.NAL_UNIT_TYPE_AUD || (detectAccessUnits && sliceHeader.isFirstVclNalUnitOfPicture(previousSliceHeader))) { // If the NAL unit ending is the start of a new sample, output the previous one. diff --git a/libraries/extractor/src/test/java/androidx/media3/extractor/ts/TsExtractorTest.java b/libraries/extractor/src/test/java/androidx/media3/extractor/ts/TsExtractorTest.java index c0d4a00ec97..4ee83354a39 100644 --- a/libraries/extractor/src/test/java/androidx/media3/extractor/ts/TsExtractorTest.java +++ b/libraries/extractor/src/test/java/androidx/media3/extractor/ts/TsExtractorTest.java @@ -65,6 +65,11 @@ public void sampleWithH263() throws Exception { ExtractorAsserts.assertBehavior(TsExtractor::new, "media/ts/sample_h263.ts", simulationConfig); } + @Test + public void sampleWithH264() throws Exception { + ExtractorAsserts.assertBehavior(TsExtractor::new, "media/ts/sample_h264.ts", simulationConfig); + } + @Test public void sampleWithH264AndMpegAudio() throws Exception { ExtractorAsserts.assertBehavior( diff --git a/libraries/test_data/src/test/assets/extractordumps/ts/sample_h264.ts.0.dump b/libraries/test_data/src/test/assets/extractordumps/ts/sample_h264.ts.0.dump new file mode 100644 index 00000000000..366338fe96c --- /dev/null +++ b/libraries/test_data/src/test/assets/extractordumps/ts/sample_h264.ts.0.dump @@ -0,0 +1,144 @@ +seekMap: + isSeekable = true + duration = 900000 + getPosition(0) = [[timeUs=0, position=0]] + getPosition(1) = [[timeUs=1, position=0]] + getPosition(450000) = [[timeUs=450000, position=23030]] + getPosition(900000) = [[timeUs=900000, position=46248]] +numberOfTracks = 1 +track 256: + total output bytes = 41614 + sample count = 30 + format 0: + id = 1/256 + sampleMimeType = video/avc + codecs = avc1.64001F + width = 854 + height = 480 + colorInfo: + lumaBitdepth = 8 + chromaBitdepth = 8 + initializationData: + data = length 29, hash A220FDE0 + data = length 9, hash D971CD89 + sample 0: + time = 66666 + flags = 1 + data = length 856, hash E3FC14FA + sample 1: + time = 100000 + flags = 0 + data = length 3330, hash ACE433CC + sample 2: + time = 166666 + flags = 0 + data = length 6849, hash 2762E298 + sample 3: + time = 133333 + flags = 0 + data = length 794, hash 16489833 + sample 4: + time = 233333 + flags = 0 + data = length 1874, hash 24DF8A2D + sample 5: + time = 200000 + flags = 0 + data = length 1209, hash B137200F + sample 6: + time = 366666 + flags = 0 + data = length 476, hash 1D70B27E + sample 7: + time = 300000 + flags = 0 + data = length 325, hash D3FB2CC2 + sample 8: + time = 266666 + flags = 0 + data = length 89, hash F66C5D60 + sample 9: + time = 333333 + flags = 0 + data = length 177, hash 7E6DB5B7 + sample 10: + time = 400000 + flags = 1 + data = length 11054, hash 1FC7442D + sample 11: + time = 533333 + flags = 0 + data = length 302, hash 5432E85C + sample 12: + time = 466666 + flags = 0 + data = length 302, hash 95A21C90 + sample 13: + time = 433333 + flags = 0 + data = length 96, hash 8C98DBB8 + sample 14: + time = 500000 + flags = 0 + data = length 198, hash 114E22D9 + sample 15: + time = 666666 + flags = 0 + data = length 303, hash C2C45716 + sample 16: + time = 600000 + flags = 0 + data = length 168, hash 1B048DCA + sample 17: + time = 566666 + flags = 0 + data = length 98, hash 3AA3B8B4 + sample 18: + time = 633333 + flags = 0 + data = length 108, hash 4F67E6F8 + sample 19: + time = 700000 + flags = 0 + data = length 110, hash 625A9B91 + sample 20: + time = 733333 + flags = 1 + data = length 11235, hash 8EDDEB9 + sample 21: + time = 866666 + flags = 0 + data = length 358, hash 797276E4 + sample 22: + time = 800000 + flags = 0 + data = length 336, hash B92DBED9 + sample 23: + time = 766666 + flags = 0 + data = length 105, hash 223F3D12 + sample 24: + time = 833333 + flags = 0 + data = length 106, hash DF8D1552 + sample 25: + time = 1000000 + flags = 0 + data = length 235, hash 6127527D + sample 26: + time = 933333 + flags = 0 + data = length 109, hash E8E69F9F + sample 27: + time = 900000 + flags = 0 + data = length 84, hash 54E55CB4 + sample 28: + time = 966666 + flags = 0 + data = length 93, hash B6B6263C + sample 29: + time = 1033333 + flags = 0 + data = length 234, hash A48D3D90 +tracksEnded = true diff --git a/libraries/test_data/src/test/assets/extractordumps/ts/sample_h264.ts.1.dump b/libraries/test_data/src/test/assets/extractordumps/ts/sample_h264.ts.1.dump new file mode 100644 index 00000000000..afbcb23e5d1 --- /dev/null +++ b/libraries/test_data/src/test/assets/extractordumps/ts/sample_h264.ts.1.dump @@ -0,0 +1,108 @@ +seekMap: + isSeekable = true + duration = 900000 + getPosition(0) = [[timeUs=0, position=0]] + getPosition(1) = [[timeUs=1, position=0]] + getPosition(450000) = [[timeUs=450000, position=23030]] + getPosition(900000) = [[timeUs=900000, position=46248]] +numberOfTracks = 1 +track 256: + total output bytes = 25812 + sample count = 21 + format 0: + id = 1/256 + sampleMimeType = video/avc + codecs = avc1.64001F + width = 854 + height = 480 + colorInfo: + lumaBitdepth = 8 + chromaBitdepth = 8 + initializationData: + data = length 29, hash A220FDE0 + data = length 9, hash D971CD89 + sample 0: + time = 333333 + flags = 0 + data = length 177, hash 7E6DB5B7 + sample 1: + time = 400000 + flags = 1 + data = length 11054, hash 1FC7442D + sample 2: + time = 533333 + flags = 0 + data = length 302, hash 5432E85C + sample 3: + time = 466666 + flags = 0 + data = length 302, hash 95A21C90 + sample 4: + time = 433333 + flags = 0 + data = length 96, hash 8C98DBB8 + sample 5: + time = 500000 + flags = 0 + data = length 198, hash 114E22D9 + sample 6: + time = 666666 + flags = 0 + data = length 303, hash C2C45716 + sample 7: + time = 600000 + flags = 0 + data = length 168, hash 1B048DCA + sample 8: + time = 566666 + flags = 0 + data = length 98, hash 3AA3B8B4 + sample 9: + time = 633333 + flags = 0 + data = length 108, hash 4F67E6F8 + sample 10: + time = 700000 + flags = 0 + data = length 110, hash 625A9B91 + sample 11: + time = 733333 + flags = 1 + data = length 11235, hash 8EDDEB9 + sample 12: + time = 866666 + flags = 0 + data = length 358, hash 797276E4 + sample 13: + time = 800000 + flags = 0 + data = length 336, hash B92DBED9 + sample 14: + time = 766666 + flags = 0 + data = length 105, hash 223F3D12 + sample 15: + time = 833333 + flags = 0 + data = length 106, hash DF8D1552 + sample 16: + time = 1000000 + flags = 0 + data = length 235, hash 6127527D + sample 17: + time = 933333 + flags = 0 + data = length 109, hash E8E69F9F + sample 18: + time = 900000 + flags = 0 + data = length 84, hash 54E55CB4 + sample 19: + time = 966666 + flags = 0 + data = length 93, hash B6B6263C + sample 20: + time = 1033333 + flags = 0 + data = length 234, hash A48D3D90 +tracksEnded = true diff --git a/libraries/test_data/src/test/assets/extractordumps/ts/sample_h264.ts.2.dump b/libraries/test_data/src/test/assets/extractordumps/ts/sample_h264.ts.2.dump new file mode 100644 index 00000000000..ce18263dd80 --- /dev/null +++ b/libraries/test_data/src/test/assets/extractordumps/ts/sample_h264.ts.2.dump @@ -0,0 +1,72 @@ +seekMap: + isSeekable = true + duration = 900000 + getPosition(0) = [[timeUs=0, position=0]] + getPosition(1) = [[timeUs=1, position=0]] + getPosition(450000) = [[timeUs=450000, position=23030]] + getPosition(900000) = [[timeUs=900000, position=46248]] +numberOfTracks = 1 +track 256: + total output bytes = 13114 + sample count = 12 + format 0: + id = 1/256 + sampleMimeType = video/avc + codecs = avc1.64001F + width = 854 + height = 480 + colorInfo: + lumaBitdepth = 8 + chromaBitdepth = 8 + initializationData: + data = length 29, hash A220FDE0 + data = length 9, hash D971CD89 + sample 0: + time = 633333 + flags = 0 + data = length 108, hash 4F67E6F8 + sample 1: + time = 700000 + flags = 0 + data = length 110, hash 625A9B91 + sample 2: + time = 733333 + flags = 1 + data = length 11235, hash 8EDDEB9 + sample 3: + time = 866666 + flags = 0 + data = length 358, hash 797276E4 + sample 4: + time = 800000 + flags = 0 + data = length 336, hash B92DBED9 + sample 5: + time = 766666 + flags = 0 + data = length 105, hash 223F3D12 + sample 6: + time = 833333 + flags = 0 + data = length 106, hash DF8D1552 + sample 7: + time = 1000000 + flags = 0 + data = length 235, hash 6127527D + sample 8: + time = 933333 + flags = 0 + data = length 109, hash E8E69F9F + sample 9: + time = 900000 + flags = 0 + data = length 84, hash 54E55CB4 + sample 10: + time = 966666 + flags = 0 + data = length 93, hash B6B6263C + sample 11: + time = 1033333 + flags = 0 + data = length 234, hash A48D3D90 +tracksEnded = true diff --git a/libraries/test_data/src/test/assets/extractordumps/ts/sample_h264.ts.3.dump b/libraries/test_data/src/test/assets/extractordumps/ts/sample_h264.ts.3.dump new file mode 100644 index 00000000000..f8372491d6e --- /dev/null +++ b/libraries/test_data/src/test/assets/extractordumps/ts/sample_h264.ts.3.dump @@ -0,0 +1,36 @@ +seekMap: + isSeekable = true + duration = 900000 + getPosition(0) = [[timeUs=0, position=0]] + getPosition(1) = [[timeUs=1, position=0]] + getPosition(450000) = [[timeUs=450000, position=23030]] + getPosition(900000) = [[timeUs=900000, position=46248]] +numberOfTracks = 1 +track 256: + total output bytes = 412 + sample count = 3 + format 0: + id = 1/256 + sampleMimeType = video/avc + codecs = avc1.64001F + width = 854 + height = 480 + colorInfo: + lumaBitdepth = 8 + chromaBitdepth = 8 + initializationData: + data = length 29, hash A220FDE0 + data = length 9, hash D971CD89 + sample 0: + time = 900000 + flags = 0 + data = length 84, hash 54E55CB4 + sample 1: + time = 966666 + flags = 0 + data = length 93, hash B6B6263C + sample 2: + time = 1033333 + flags = 0 + data = length 234, hash A48D3D90 +tracksEnded = true diff --git a/libraries/test_data/src/test/assets/extractordumps/ts/sample_h264.ts.unknown_length.dump b/libraries/test_data/src/test/assets/extractordumps/ts/sample_h264.ts.unknown_length.dump new file mode 100644 index 00000000000..8406fb34de5 --- /dev/null +++ b/libraries/test_data/src/test/assets/extractordumps/ts/sample_h264.ts.unknown_length.dump @@ -0,0 +1,141 @@ +seekMap: + isSeekable = false + duration = UNSET TIME + getPosition(0) = [[timeUs=0, position=0]] +numberOfTracks = 1 +track 256: + total output bytes = 41614 + sample count = 30 + format 0: + id = 1/256 + sampleMimeType = video/avc + codecs = avc1.64001F + width = 854 + height = 480 + colorInfo: + lumaBitdepth = 8 + chromaBitdepth = 8 + initializationData: + data = length 29, hash A220FDE0 + data = length 9, hash D971CD89 + sample 0: + time = 66666 + flags = 1 + data = length 856, hash E3FC14FA + sample 1: + time = 100000 + flags = 0 + data = length 3330, hash ACE433CC + sample 2: + time = 166666 + flags = 0 + data = length 6849, hash 2762E298 + sample 3: + time = 133333 + flags = 0 + data = length 794, hash 16489833 + sample 4: + time = 233333 + flags = 0 + data = length 1874, hash 24DF8A2D + sample 5: + time = 200000 + flags = 0 + data = length 1209, hash B137200F + sample 6: + time = 366666 + flags = 0 + data = length 476, hash 1D70B27E + sample 7: + time = 300000 + flags = 0 + data = length 325, hash D3FB2CC2 + sample 8: + time = 266666 + flags = 0 + data = length 89, hash F66C5D60 + sample 9: + time = 333333 + flags = 0 + data = length 177, hash 7E6DB5B7 + sample 10: + time = 400000 + flags = 1 + data = length 11054, hash 1FC7442D + sample 11: + time = 533333 + flags = 0 + data = length 302, hash 5432E85C + sample 12: + time = 466666 + flags = 0 + data = length 302, hash 95A21C90 + sample 13: + time = 433333 + flags = 0 + data = length 96, hash 8C98DBB8 + sample 14: + time = 500000 + flags = 0 + data = length 198, hash 114E22D9 + sample 15: + time = 666666 + flags = 0 + data = length 303, hash C2C45716 + sample 16: + time = 600000 + flags = 0 + data = length 168, hash 1B048DCA + sample 17: + time = 566666 + flags = 0 + data = length 98, hash 3AA3B8B4 + sample 18: + time = 633333 + flags = 0 + data = length 108, hash 4F67E6F8 + sample 19: + time = 700000 + flags = 0 + data = length 110, hash 625A9B91 + sample 20: + time = 733333 + flags = 1 + data = length 11235, hash 8EDDEB9 + sample 21: + time = 866666 + flags = 0 + data = length 358, hash 797276E4 + sample 22: + time = 800000 + flags = 0 + data = length 336, hash B92DBED9 + sample 23: + time = 766666 + flags = 0 + data = length 105, hash 223F3D12 + sample 24: + time = 833333 + flags = 0 + data = length 106, hash DF8D1552 + sample 25: + time = 1000000 + flags = 0 + data = length 235, hash 6127527D + sample 26: + time = 933333 + flags = 0 + data = length 109, hash E8E69F9F + sample 27: + time = 900000 + flags = 0 + data = length 84, hash 54E55CB4 + sample 28: + time = 966666 + flags = 0 + data = length 93, hash B6B6263C + sample 29: + time = 1033333 + flags = 0 + data = length 234, hash A48D3D90 +tracksEnded = true diff --git a/libraries/test_data/src/test/assets/media/ts/sample_h264.ts b/libraries/test_data/src/test/assets/media/ts/sample_h264.ts new file mode 100644 index 00000000000..3e4becd3a9a Binary files /dev/null and b/libraries/test_data/src/test/assets/media/ts/sample_h264.ts differ