Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SSA/ASS subtitles - Overlapping start/end times and position tag is not handled #6595

Merged
merged 10 commits into from
Dec 5, 2019
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@
package com.google.android.exoplayer2.text.ssa;

import android.text.TextUtils;
import android.util.Pair;
import androidx.annotation.Nullable;
import com.google.android.exoplayer2.C;
import com.google.android.exoplayer2.text.Cue;
import com.google.android.exoplayer2.text.SimpleSubtitleDecoder;
import com.google.android.exoplayer2.text.Subtitle;
import com.google.android.exoplayer2.util.Assertions;
import com.google.android.exoplayer2.util.Log;
import com.google.android.exoplayer2.util.LongArray;
import com.google.android.exoplayer2.util.ParsableByteArray;
import com.google.android.exoplayer2.util.Util;
import java.util.ArrayList;
Expand All @@ -40,6 +40,9 @@ public final class SsaDecoder extends SimpleSubtitleDecoder {

private static final Pattern SSA_TIMECODE_PATTERN = Pattern.compile(
"(?:(\\d+):)?(\\d+):(\\d+)(?::|\\.)(\\d+)");
private static final Pattern SSA_POSITION_PATTERN = Pattern.compile(
"\\\\pos\\((\\d+(\\.\\d+)?),\\s*(\\d+(\\.\\d+)?)");

private static final String FORMAT_LINE_PREFIX = "Format: ";
private static final String DIALOGUE_LINE_PREFIX = "Dialogue: ";

Expand All @@ -50,6 +53,9 @@ public final class SsaDecoder extends SimpleSubtitleDecoder {
private int formatEndIndex;
private int formatTextIndex;

private int playResX;
private int playResY;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's probably safer to explicitly initialise these to an invalid value to clearly indicate 'unset', because 0 seems like a potentially genuine value we could see in a subtitle file? And then also update the comparison below on L216.

We have C.LENGTH_UNSET which I think would work well.
https://github.com/google/ExoPlayer/blob/release-v2/library/core/src/main/java/com/google/android/exoplayer2/C.java


public SsaDecoder() {
this(/* initializationData= */ null);
}
Expand All @@ -75,19 +81,15 @@ public SsaDecoder(@Nullable List<byte[]> initializationData) {

@Override
protected Subtitle decode(byte[] bytes, int length, boolean reset) {
ArrayList<Cue> cues = new ArrayList<>();
LongArray cueTimesUs = new LongArray();
ArrayList<List<Cue>> cues = new ArrayList<>();
List<Long> cueTimesUs = new ArrayList<>();

ParsableByteArray data = new ParsableByteArray(bytes, length);
if (!haveInitializationData) {
parseHeader(data);
}
parseEventBody(data, cues, cueTimesUs);

Cue[] cuesArray = new Cue[cues.size()];
cues.toArray(cuesArray);
long[] cueTimesUsArray = cueTimesUs.toArray();
return new SsaSubtitle(cuesArray, cueTimesUsArray);
return new SsaSubtitle(cues, cueTimesUs);
}

/**
Expand All @@ -98,6 +100,12 @@ protected Subtitle decode(byte[] bytes, int length, boolean reset) {
private void parseHeader(ParsableByteArray data) {
String currentLine;
while ((currentLine = data.readLine()) != null) {
if (currentLine.startsWith("PlayResX:")) {
playResX = Integer.valueOf(currentLine.substring(9).trim());
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's a tiny bit clearer to use the string literal again instead of a 'magic' length (saves a future reader manually counting the number of characters in "PlayResX:" :))

playResX = Integer.valueOf(currentLine.substring("PlayResX:".length()).trim());

}
if (currentLine.startsWith("PlayResY:")) {
playResY = Integer.valueOf(currentLine.substring(9).trim());
}
// TODO: Parse useful data from the header.
if (currentLine.startsWith("[Events]")) {
// We've reached the event body.
Expand All @@ -113,7 +121,7 @@ private void parseHeader(ParsableByteArray data) {
* @param cues A list to which parsed cues will be added.
* @param cueTimesUs An array to which parsed cue timestamps will be added.
*/
private void parseEventBody(ParsableByteArray data, List<Cue> cues, LongArray cueTimesUs) {
private void parseEventBody(ParsableByteArray data, List<List<Cue>> cues, List<Long> cueTimesUs) {
String currentLine;
while ((currentLine = data.readLine()) != null) {
if (!haveInitializationData && currentLine.startsWith(FORMAT_LINE_PREFIX)) {
Expand Down Expand Up @@ -167,7 +175,7 @@ private void parseFormatLine(String formatLine) {
* @param cues A list to which parsed cues will be added.
* @param cueTimesUs An array to which parsed cue timestamps will be added.
*/
private void parseDialogueLine(String dialogueLine, List<Cue> cues, LongArray cueTimesUs) {
private void parseDialogueLine(String dialogueLine, List<List<Cue>> cues, List<Long> cueTimesUs) {
if (formatKeyCount == 0) {
Log.w(TAG, "Skipping dialogue line before complete format: " + dialogueLine);
return;
Expand Down Expand Up @@ -196,16 +204,67 @@ private void parseDialogueLine(String dialogueLine, List<Cue> cues, LongArray cu
}
}

// Parse \pos{x,y} attribute
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: I'd move this comment to the javadoc of parsePosition

Pair<Float, Float> position = parsePosition(lineValues[formatTextIndex]);

String text = lineValues[formatTextIndex]
.replaceAll("\\{.*?\\}", "")
.replaceAll("\\\\N", "\n")
.replaceAll("\\\\n", "\n");
cues.add(new Cue(text));
cueTimesUs.add(startTimeUs);

Cue cue;
if (position != null && playResX != 0 && playResY != 0) {
cue = new Cue(
text,
/* textAlignment */ null,
position.second / playResY,
Cue.LINE_TYPE_FRACTION,
Cue.ANCHOR_TYPE_START,
position.first / playResX,
Cue.ANCHOR_TYPE_MIDDLE,
Cue.DIMEN_UNSET);
} else {
cue = new Cue(text);
}

int startTimeIndex = insertToCueTimes(cueTimesUs, startTimeUs);

List<Cue> startCueList = new ArrayList<>();
if (startTimeIndex != 0) {
startCueList.addAll(cues.get(startTimeIndex - 1));
}
cues.add(startTimeIndex, startCueList);

if (endTimeUs != C.TIME_UNSET) {
cues.add(Cue.EMPTY);
cueTimesUs.add(endTimeUs);
int endTimeIndex = insertToCueTimes(cueTimesUs, endTimeUs);
List<Cue> endList = new ArrayList<>(cues.get(endTimeIndex - 1));
cues.add(endTimeIndex, endList);

int i = startTimeIndex;
do {
cues.get(i).add(cue);
i++;
} while (i != endTimeIndex);
}
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could be wrong, but I'm not convinced this correctly handles multiple cues that have the same start or end time.
e.g.

[3, 5] -> "A"
[4, 5] -> "B"

We'll insert the first one, after which we have: cueTimesUs=[3, 5], startTimeIndex=0 & endTimeIndex=1 and cues=[["A"], []].

Then insert the second one: cueTimesUs=[3, 4, 5, 5], startTimeIndex=1 & endTimeIndex=3 and cues=[["A"], ["A", "B"], ["B"], []]

Whereas I think with this input we'd want these lists: cueTimesUs=[3, 4, 5] and cues=[["A"], ["A", "B"], []]

It also took quite a lot of thought for me to follow that through, especially with the multiple mutations to the cue list on each iteration of the outer loop.

Note that we have this same overlapping challenge in the webvtt package and we actually solve it in a slightly different way, by more lazily evaluating Subtitle#getCues (every call to that iterates over all the subtitles we have) [1]. I chatted a bit to the team, and that seems unfortunately inefficient, so I think it makes sense to keep the logic here and not copy webvtt, but maybe we can correct it and make it a bit easier to follow.

My suggestion is to get rid of insertToCueTimes() and do something more like (I haven't tested this, it might have other problems...):

  • (binary?) search for startTimeUs in cueTimesUs
    • if startTimeUs is already there, then get the matching list (by index) from cues and add cue to it.
    • else insert startTimeUs to cueTimesUs and insert a new matching list to cues (containing all the Cues from index - 1 plus cue).
  • Walk through cueTimesUs, adding cue to every entry matching entry in cues until you find a time that's either equal to or greater than endTimeUs (mostly your existing do/while loop)
    • On each step, store a reference to the matching list of cues before you add cue. (This reference should also store the list from the else in the first bullet before cue is added)
    • If the time you stopped on is equal to endTimeUs, then do nothing (the cues list already has the correct 'end' value, right?)
    • If it's greater, then insert a new cues list equal to the most recent list you stored at the top of this sub-section.

[1]


/**
* Insert the given cue time into the given array keeping the array sorted.
*
* @param cueTimes The array with sorted cue times
* @param timeUs The cue time to be inserted
* @return The index where the cue time was inserted
*/
private static int insertToCueTimes(List<Long> cueTimes, long timeUs) {
for (int i = cueTimes.size() - 1; i >= 0; i--) {
if (cueTimes.get(i) <= timeUs) {
cueTimes.add(i + 1, timeUs);
return i + 1;
}
}

cueTimes.add(0, timeUs);
return 0;
}

/**
Expand All @@ -214,7 +273,7 @@ private void parseDialogueLine(String dialogueLine, List<Cue> cues, LongArray cu
* @param timeString The string to parse.
* @return The parsed timestamp in microseconds.
*/
public static long parseTimecodeUs(String timeString) {
private static long parseTimecodeUs(String timeString) {
Matcher matcher = SSA_TIMECODE_PATTERN.matcher(timeString);
if (!matcher.matches()) {
return C.TIME_UNSET;
Expand All @@ -226,4 +285,15 @@ public static long parseTimecodeUs(String timeString) {
return timestampUs;
}

@Nullable
public static Pair<Float, Float> parsePosition(String line){
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might make more sense to use android.graphics.PointF here because it's a little less general/ambiguous than Pair<Float, Float>

Also avoids auto-boxing

Matcher matcher = SSA_POSITION_PATTERN.matcher(line);
if(!matcher.find()){
return null;
}
float x = Float.parseFloat(matcher.group(1));
float y = Float.parseFloat(matcher.group(3));
return new Pair<>(x,y);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -28,45 +28,44 @@
*/
/* package */ final class SsaSubtitle implements Subtitle {

private final Cue[] cues;
private final long[] cueTimesUs;
private final List<List<Cue>> cues;
private final List<Long> cueTimesUs;

/**
* @param cues The cues in the subtitle.
* @param cueTimesUs The cue times, in microseconds.
*/
public SsaSubtitle(Cue[] cues, long[] cueTimesUs) {
public SsaSubtitle(List<List<Cue>> cues, List<Long> cueTimesUs) {
this.cues = cues;
this.cueTimesUs = cueTimesUs;
}

@Override
public int getNextEventTimeIndex(long timeUs) {
int index = Util.binarySearchCeil(cueTimesUs, timeUs, false, false);
return index < cueTimesUs.length ? index : C.INDEX_UNSET;
return index < cueTimesUs.size() ? index : C.INDEX_UNSET;
}

@Override
public int getEventTimeCount() {
return cueTimesUs.length;
return cueTimesUs.size();
}

@Override
public long getEventTime(int index) {
Assertions.checkArgument(index >= 0);
Assertions.checkArgument(index < cueTimesUs.length);
return cueTimesUs[index];
Assertions.checkArgument(index < cueTimesUs.size());
return cueTimesUs.get(index);
}

@Override
public List<Cue> getCues(long timeUs) {
int index = Util.binarySearchFloor(cueTimesUs, timeUs, true, false);
if (index == -1 || cues[index] == Cue.EMPTY) {
if (index == -1 || cues.get(index).isEmpty()) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you can get rid of the empty check, since we'll just return the empty list below anyway (right?)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that's right.

// timeUs is earlier than the start of the first cue, or we have an empty cue.
return Collections.emptyList();
} else {
return Collections.singletonList(cues[index]);
return cues.get(index);
}
}

}