Skip to content

Commit

Permalink
Merge pull request #306 from B0pol/metadata
Browse files Browse the repository at this point in the history
Extract metadata for youtube, soundcloud & mediaccc
  • Loading branch information
Stypox authored Mar 27, 2021
2 parents e61ceef + 152221c commit b4dee6d
Show file tree
Hide file tree
Showing 22 changed files with 313 additions and 83 deletions.
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package org.schabi.newpipe.extractor.localization;

import edu.umd.cs.findbugs.annotations.NonNull;

import javax.annotation.Nonnull;
import java.io.Serializable;
import java.time.OffsetDateTime;
import java.time.ZoneOffset;
Expand All @@ -12,49 +12,49 @@
* A wrapper class that provides a field to describe if the date/time is precise or just an approximation.
*/
public class DateWrapper implements Serializable {
@NonNull private final OffsetDateTime offsetDateTime;
@Nonnull
private final OffsetDateTime offsetDateTime;
private final boolean isApproximation;

/**
* @deprecated Use {@link #DateWrapper(OffsetDateTime)} instead.
*/
@Deprecated
public DateWrapper(@NonNull Calendar calendar) {
public DateWrapper(@Nonnull Calendar calendar) {
this(calendar, false);
}

/**
* @deprecated Use {@link #DateWrapper(OffsetDateTime, boolean)} instead.
*/
@Deprecated
public DateWrapper(@NonNull Calendar calendar, boolean isApproximation) {
public DateWrapper(@Nonnull Calendar calendar, boolean isApproximation) {
this(OffsetDateTime.ofInstant(calendar.toInstant(), ZoneOffset.UTC), isApproximation);
}

public DateWrapper(@NonNull OffsetDateTime offsetDateTime) {
public DateWrapper(@Nonnull OffsetDateTime offsetDateTime) {
this(offsetDateTime, false);
}

public DateWrapper(@NonNull OffsetDateTime offsetDateTime, boolean isApproximation) {
public DateWrapper(@Nonnull OffsetDateTime offsetDateTime, boolean isApproximation) {
this.offsetDateTime = offsetDateTime.withOffsetSameInstant(ZoneOffset.UTC);
this.isApproximation = isApproximation;
}

/**
* @return the wrapped date/time as a {@link Calendar}.
*
* @deprecated use {@link #offsetDateTime()} instead.
*/
@Deprecated
@NonNull
@Nonnull
public Calendar date() {
return GregorianCalendar.from(offsetDateTime.toZonedDateTime());
}

/**
* @return the wrapped date/time.
*/
@NonNull
@Nonnull
public OffsetDateTime offsetDateTime() {
return offsetDateTime;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
package org.schabi.newpipe.extractor.localization;

import org.schabi.newpipe.extractor.exceptions.ParsingException;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import java.util.Objects;
import java.util.*;

public class Localization implements Serializable {
public static final Localization DEFAULT = new Localization("en", "GB");

@Nonnull private final String languageCode;
@Nullable private final String countryCode;
@Nonnull
private final String languageCode;
@Nullable
private final String countryCode;

/**
* @param localizationCodeList a list of localization code, formatted like {@link #getLocalizationCode()}
Expand Down Expand Up @@ -100,4 +100,25 @@ public int hashCode() {
result = 31 * result + Objects.hashCode(countryCode);
return result;
}

/**
* Converts a three letter language code (ISO 639-2/T) to a Locale
* because limits of Java Locale class.
*
* @param code a three letter language code
* @return the Locale corresponding
*/
public static Locale getLocaleFromThreeLetterCode(@Nonnull String code) throws ParsingException {
final String[] languages = Locale.getISOLanguages();
final Map<String, Locale> localeMap = new HashMap<>(languages.length);
for (String language : languages) {
final Locale locale = new Locale(language);
localeMap.put(locale.getISO3Language(), locale);
}
if (localeMap.containsKey(code)) {
return localeMap.get(code);
} else {
throw new ParsingException("Could not get Locale from this three letter language code" + code);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -140,4 +140,10 @@ public String getCategory() {
public List<String> getTags() {
return Collections.emptyList();
}

@Nonnull
@Override
public Privacy getPrivacy() {
return Privacy.PUBLIC;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -262,8 +262,8 @@ public String getHost() {

@Nonnull
@Override
public String getPrivacy() {
return "";
public Privacy getPrivacy() {
return Privacy.PUBLIC;
}

@Nonnull
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -257,8 +257,8 @@ public String getHost() throws ParsingException {

@Nonnull
@Override
public String getPrivacy() {
return "Public";
public Privacy getPrivacy() {
return Privacy.PUBLIC;
}

@Nonnull
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,19 @@
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.LinkHandler;
import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.localization.Localization;
import org.schabi.newpipe.extractor.services.media_ccc.linkHandler.MediaCCCConferenceLinkHandlerFactory;
import org.schabi.newpipe.extractor.services.media_ccc.linkHandler.MediaCCCStreamLinkHandlerFactory;
import org.schabi.newpipe.extractor.stream.*;
import org.schabi.newpipe.extractor.utils.JsonUtils;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.io.IOException;
import java.util.*;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Locale;

public class MediaCCCStreamExtractor extends StreamExtractor {
private JsonObject data;
Expand Down Expand Up @@ -256,8 +261,8 @@ public String getHost() {

@Nonnull
@Override
public String getPrivacy() {
return "";
public Privacy getPrivacy() {
return Privacy.PUBLIC;
}

@Nonnull
Expand All @@ -273,14 +278,14 @@ public String getLicence() {
}

@Override
public Locale getLanguageInfo() {
return null;
public Locale getLanguageInfo() throws ParsingException {
return Localization.getLocaleFromThreeLetterCode(data.getString("original_language"));
}

@Nonnull
@Override
public List<String> getTags() {
return Arrays.asList(data.getArray("tags").toArray(new String[0]));
return JsonUtils.getStringListFromJsonArray(data.getArray("tags"));
}

@Nonnull
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -286,11 +286,7 @@ public StreamInfoItemsCollector getRelatedStreams() throws IOException, Extracti
@Nonnull
@Override
public List<String> getTags() {
try {
return (List) JsonUtils.getArray(json, "tags");
} catch (Exception e) {
return Collections.emptyList();
}
return JsonUtils.getStringListFromJsonArray(json.getArray("tags"));
}

@Nonnull
Expand Down Expand Up @@ -428,8 +424,19 @@ public String getHost() throws ParsingException {

@Nonnull
@Override
public String getPrivacy() throws ParsingException {
return JsonUtils.getString(json, "privacy.label");
public Privacy getPrivacy() {
switch (json.getObject("privacy").getInt("id")) {
case 1:
return Privacy.PUBLIC;
case 2:
return Privacy.UNLISTED;
case 3:
return Privacy.PRIVATE;
case 4:
return Privacy.INTERNAL;
default:
return Privacy.OTHER;
}
}

@Nonnull
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -374,22 +374,21 @@ public String getHost() {
return "";
}

@Nonnull
@Override
public String getPrivacy() {
return "";
public Privacy getPrivacy() {
return track.getString("sharing").equals("public") ? Privacy.PUBLIC : Privacy.PRIVATE;
}

@Nonnull
@Override
public String getCategory() {
return "";
return track.getString("genre");
}

@Nonnull
@Override
public String getLicence() {
return "";
return track.getString("license");
}

@Override
Expand All @@ -400,7 +399,29 @@ public Locale getLanguageInfo() {
@Nonnull
@Override
public List<String> getTags() {
return Collections.emptyList();
// tags are separated by spaces, but they can be multiple words escaped by quotes "
final String[] tag_list = track.getString("tag_list").split(" ");
final List<String> tags = new ArrayList<>();
String escapedTag = "";
boolean isEscaped = false;
for (int i = 0; i < tag_list.length; i++) {
String tag = tag_list[i];
if (tag.startsWith("\"")) {
escapedTag += tag_list[i].replace("\"", "");
isEscaped = true;
} else if (isEscaped) {
if (tag.endsWith("\"")) {
escapedTag += " " + tag.replace("\"", "");
isEscaped = false;
tags.add(escapedTag);
} else {
escapedTag += " " + tag;
}
} else if (!tag.isEmpty()){
tags.add(tag);
}
}
return tags;
}

@Nonnull
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory;
import org.schabi.newpipe.extractor.stream.*;
import org.schabi.newpipe.extractor.utils.JsonUtils;
import org.schabi.newpipe.extractor.utils.Parser;
import org.schabi.newpipe.extractor.utils.Utils;

Expand Down Expand Up @@ -214,7 +215,7 @@ public Description getDescription() throws ParsingException {
// description with more info on links
try {
String description = getTextFromObject(getVideoSecondaryInfoRenderer().getObject("description"), true);
if (description != null && !description.isEmpty()) return new Description(description, Description.HTML);
if (!isNullOrEmpty(description)) return new Description(description, Description.HTML);
} catch (final ParsingException ignored) {
// age-restricted videos cause a ParsingException here
}
Expand Down Expand Up @@ -1107,20 +1108,32 @@ public String getHost() {

@Nonnull
@Override
public String getPrivacy() {
return "";
public Privacy getPrivacy() {
final boolean isUnlisted = playerResponse
.getObject("microformat")
.getObject("playerMicroformatRenderer")
.getBoolean("isUnlisted");
return isUnlisted ? Privacy.UNLISTED : Privacy.PUBLIC;
}

@Nonnull
@Override
public String getCategory() {
return "";
return playerResponse.getObject("microformat")
.getObject("playerMicroformatRenderer")
.getString("category");
}

@Nonnull
@Override
public String getLicence() {
return "";
public String getLicence() throws ParsingException {
final JsonObject metadataRowRenderer = getVideoSecondaryInfoRenderer()
.getObject("metadataRowContainer").getObject("metadataRowContainerRenderer").getArray("rows")
.getObject(0).getObject("metadataRowRenderer");

final JsonArray contents = metadataRowRenderer.getArray("contents");
final String license = getTextFromObject(contents.getObject(0));
return license != null && "Licence".equals(getTextFromObject(metadataRowRenderer.getObject("title"))) ? license : "YouTube licence";
}

@Override
Expand All @@ -1131,7 +1144,7 @@ public Locale getLanguageInfo() {
@Nonnull
@Override
public List<String> getTags() {
return Collections.emptyList();
return JsonUtils.getStringListFromJsonArray(playerResponse.getObject("videoDetails").getArray("keywords"));
}

@Nonnull
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -428,8 +428,7 @@ protected long getTimestampSeconds(String regexPattern) throws ParsingException
* @return the privacy of the stream or an empty String.
* @throws ParsingException
*/
@Nonnull
public abstract String getPrivacy() throws ParsingException;
public abstract Privacy getPrivacy() throws ParsingException;

/**
* The name of the category of the stream.
Expand Down Expand Up @@ -467,7 +466,7 @@ protected long getTimestampSeconds(String regexPattern) throws ParsingException
* The list of tags of the stream.
* If the tag list is not available you can simply return an empty list.
*
* @return the list of tags of the stream or an empty list.
* @return the list of tags of the stream or Collections.emptyList().
* @throws ParsingException
*/
@Nonnull
Expand Down Expand Up @@ -510,4 +509,11 @@ protected long getTimestampSeconds(String regexPattern) throws ParsingException
*/
@Nonnull
public abstract List<MetaInfo> getMetaInfo() throws ParsingException;
public enum Privacy {
PUBLIC,
UNLISTED,
PRIVATE,
INTERNAL,
OTHER
}
}
Loading

0 comments on commit b4dee6d

Please sign in to comment.