Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix TS H264 key frame detection (2) #864

Merged
merged 4 commits into from
Dec 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions RELEASENOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@
`ColorInfo.colorSpace`, `ColorInfo.colorTransfer`, and
`ColorInfo.colorRange` values
([#692](https://github.com/androidx/media/pull/692)).
* Fix wrong keyframe detection for TS H264 streams
([#864](https://github.com/androidx/media/pull/864)).
* Audio:
* Video:
* Add workaround for a device issue on Galaxy Tab S7 FE, Chromecast with
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ private void startNalUnit(long position, int nalUnitType, long pesTimeUs) {
pps.startNalUnit(nalUnitType);
}
sei.startNalUnit(nalUnitType);
sampleReader.startNalUnit(position, nalUnitType, pesTimeUs);
sampleReader.startNalUnit(position, nalUnitType, pesTimeUs, randomAccessIndicator);
}

@RequiresNonNull("sampleReader")
Expand Down Expand Up @@ -253,8 +253,7 @@ private void endNalUnit(long position, int offset, int discardPadding, long pesT
seiWrapper.setPosition(4); // NAL prefix and nal_unit() header.
seiReader.consume(pesTimeUs, seiWrapper);
}
boolean sampleIsKeyFrame =
sampleReader.endNalUnit(position, offset, hasOutputFormat, randomAccessIndicator);
boolean sampleIsKeyFrame = sampleReader.endNalUnit(position, offset, hasOutputFormat);
if (sampleIsKeyFrame) {
// This is either an IDR frame or the first I-frame since the random access indicator, so mark
// it as a keyframe. Clear the flag so that subsequent non-IDR I-frames are not marked as
Expand Down Expand Up @@ -297,6 +296,7 @@ private static final class SampleReader {
private long samplePosition;
private long sampleTimeUs;
private boolean sampleIsKeyframe;
private boolean randomAccessIndicator;

public SampleReader(
TrackOutput output, boolean allowNonIdrKeyframes, boolean detectAccessUnits) {
Expand Down Expand Up @@ -330,10 +330,12 @@ public void reset() {
sliceHeader.clear();
}

public void startNalUnit(long position, int type, long pesTimeUs) {
public void startNalUnit(
long position, int type, long pesTimeUs, boolean randomAccessIndicator) {
nalUnitType = type;
nalUnitTimeUs = pesTimeUs;
nalUnitStartPosition = position;
this.randomAccessIndicator = randomAccessIndicator;
if ((allowNonIdrKeyframes && nalUnitType == NalUnitUtil.NAL_UNIT_TYPE_NON_IDR)
|| (detectAccessUnits
&& (nalUnitType == NalUnitUtil.NAL_UNIT_TYPE_IDR
Expand Down Expand Up @@ -481,8 +483,7 @@ public void appendToNalUnit(byte[] data, int offset, int limit) {
isFilling = false;
}

public boolean endNalUnit(
long position, int offset, boolean hasOutputFormat, boolean randomAccessIndicator) {
public boolean endNalUnit(long position, int offset, boolean hasOutputFormat) {
if (nalUnitType == NalUnitUtil.NAL_UNIT_TYPE_AUD
|| (detectAccessUnits && sliceHeader.isFirstVclNalUnitOfPicture(previousSliceHeader))) {
// If the NAL unit ending is the start of a new sample, output the previous one.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,11 @@ public void sampleWithH263() throws Exception {
ExtractorAsserts.assertBehavior(TsExtractor::new, "media/ts/sample_h263.ts", simulationConfig);
}

@Test
public void sampleWithH264() throws Exception {
ExtractorAsserts.assertBehavior(TsExtractor::new, "media/ts/sample_h264.ts", simulationConfig);
}

@Test
public void sampleWithH264AndMpegAudio() throws Exception {
ExtractorAsserts.assertBehavior(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
seekMap:
isSeekable = true
duration = 900000
getPosition(0) = [[timeUs=0, position=0]]
getPosition(1) = [[timeUs=1, position=0]]
getPosition(450000) = [[timeUs=450000, position=23030]]
getPosition(900000) = [[timeUs=900000, position=46248]]
numberOfTracks = 1
track 256:
total output bytes = 41614
sample count = 30
format 0:
id = 1/256
sampleMimeType = video/avc
codecs = avc1.64001F
width = 854
height = 480
colorInfo:
lumaBitdepth = 8
chromaBitdepth = 8
initializationData:
data = length 29, hash A220FDE0
data = length 9, hash D971CD89
sample 0:
time = 66666
flags = 1
data = length 856, hash E3FC14FA
sample 1:
time = 100000
flags = 0
data = length 3330, hash ACE433CC
sample 2:
time = 166666
flags = 0
data = length 6849, hash 2762E298
sample 3:
time = 133333
flags = 0
data = length 794, hash 16489833
sample 4:
time = 233333
flags = 0
data = length 1874, hash 24DF8A2D
sample 5:
time = 200000
flags = 0
data = length 1209, hash B137200F
sample 6:
time = 366666
flags = 0
data = length 476, hash 1D70B27E
sample 7:
time = 300000
flags = 0
data = length 325, hash D3FB2CC2
sample 8:
time = 266666
flags = 0
data = length 89, hash F66C5D60
sample 9:
time = 333333
flags = 0
data = length 177, hash 7E6DB5B7
sample 10:
time = 400000
flags = 1
data = length 11054, hash 1FC7442D
sample 11:
time = 533333
flags = 0
data = length 302, hash 5432E85C
sample 12:
time = 466666
flags = 0
data = length 302, hash 95A21C90
sample 13:
time = 433333
flags = 0
data = length 96, hash 8C98DBB8
sample 14:
time = 500000
flags = 0
data = length 198, hash 114E22D9
sample 15:
time = 666666
flags = 0
data = length 303, hash C2C45716
sample 16:
time = 600000
flags = 0
data = length 168, hash 1B048DCA
sample 17:
time = 566666
flags = 0
data = length 98, hash 3AA3B8B4
sample 18:
time = 633333
flags = 0
data = length 108, hash 4F67E6F8
sample 19:
time = 700000
flags = 0
data = length 110, hash 625A9B91
sample 20:
time = 733333
flags = 1
data = length 11235, hash 8EDDEB9
sample 21:
time = 866666
flags = 0
data = length 358, hash 797276E4
sample 22:
time = 800000
flags = 0
data = length 336, hash B92DBED9
sample 23:
time = 766666
flags = 0
data = length 105, hash 223F3D12
sample 24:
time = 833333
flags = 0
data = length 106, hash DF8D1552
sample 25:
time = 1000000
flags = 0
data = length 235, hash 6127527D
sample 26:
time = 933333
flags = 0
data = length 109, hash E8E69F9F
sample 27:
time = 900000
flags = 0
data = length 84, hash 54E55CB4
sample 28:
time = 966666
flags = 0
data = length 93, hash B6B6263C
sample 29:
time = 1033333
flags = 0
data = length 234, hash A48D3D90
tracksEnded = true
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
seekMap:
isSeekable = true
duration = 900000
getPosition(0) = [[timeUs=0, position=0]]
getPosition(1) = [[timeUs=1, position=0]]
getPosition(450000) = [[timeUs=450000, position=23030]]
getPosition(900000) = [[timeUs=900000, position=46248]]
numberOfTracks = 1
track 256:
total output bytes = 25812
sample count = 21
format 0:
id = 1/256
sampleMimeType = video/avc
codecs = avc1.64001F
width = 854
height = 480
colorInfo:
lumaBitdepth = 8
chromaBitdepth = 8
initializationData:
data = length 29, hash A220FDE0
data = length 9, hash D971CD89
sample 0:
time = 333333
flags = 0
data = length 177, hash 7E6DB5B7
sample 1:
time = 400000
flags = 1
data = length 11054, hash 1FC7442D
sample 2:
time = 533333
flags = 0
data = length 302, hash 5432E85C
sample 3:
time = 466666
flags = 0
data = length 302, hash 95A21C90
sample 4:
time = 433333
flags = 0
data = length 96, hash 8C98DBB8
sample 5:
time = 500000
flags = 0
data = length 198, hash 114E22D9
sample 6:
time = 666666
flags = 0
data = length 303, hash C2C45716
sample 7:
time = 600000
flags = 0
data = length 168, hash 1B048DCA
sample 8:
time = 566666
flags = 0
data = length 98, hash 3AA3B8B4
sample 9:
time = 633333
flags = 0
data = length 108, hash 4F67E6F8
sample 10:
time = 700000
flags = 0
data = length 110, hash 625A9B91
sample 11:
time = 733333
flags = 1
data = length 11235, hash 8EDDEB9
sample 12:
time = 866666
flags = 0
data = length 358, hash 797276E4
sample 13:
time = 800000
flags = 0
data = length 336, hash B92DBED9
sample 14:
time = 766666
flags = 0
data = length 105, hash 223F3D12
sample 15:
time = 833333
flags = 0
data = length 106, hash DF8D1552
sample 16:
time = 1000000
flags = 0
data = length 235, hash 6127527D
sample 17:
time = 933333
flags = 0
data = length 109, hash E8E69F9F
sample 18:
time = 900000
flags = 0
data = length 84, hash 54E55CB4
sample 19:
time = 966666
flags = 0
data = length 93, hash B6B6263C
sample 20:
time = 1033333
flags = 0
data = length 234, hash A48D3D90
tracksEnded = true
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
seekMap:
isSeekable = true
duration = 900000
getPosition(0) = [[timeUs=0, position=0]]
getPosition(1) = [[timeUs=1, position=0]]
getPosition(450000) = [[timeUs=450000, position=23030]]
getPosition(900000) = [[timeUs=900000, position=46248]]
numberOfTracks = 1
track 256:
total output bytes = 13114
sample count = 12
format 0:
id = 1/256
sampleMimeType = video/avc
codecs = avc1.64001F
width = 854
height = 480
colorInfo:
lumaBitdepth = 8
chromaBitdepth = 8
initializationData:
data = length 29, hash A220FDE0
data = length 9, hash D971CD89
sample 0:
time = 633333
flags = 0
data = length 108, hash 4F67E6F8
sample 1:
time = 700000
flags = 0
data = length 110, hash 625A9B91
sample 2:
time = 733333
flags = 1
data = length 11235, hash 8EDDEB9
sample 3:
time = 866666
flags = 0
data = length 358, hash 797276E4
sample 4:
time = 800000
flags = 0
data = length 336, hash B92DBED9
sample 5:
time = 766666
flags = 0
data = length 105, hash 223F3D12
sample 6:
time = 833333
flags = 0
data = length 106, hash DF8D1552
sample 7:
time = 1000000
flags = 0
data = length 235, hash 6127527D
sample 8:
time = 933333
flags = 0
data = length 109, hash E8E69F9F
sample 9:
time = 900000
flags = 0
data = length 84, hash 54E55CB4
sample 10:
time = 966666
flags = 0
data = length 93, hash B6B6263C
sample 11:
time = 1033333
flags = 0
data = length 234, hash A48D3D90
tracksEnded = true
Loading