Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/localized images #677

Open
wants to merge 26 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
d1c83c9
chg: feat: add localized image priorization
felipemarinho97 Nov 13, 2024
6b669bc
chg: docs: add missing parameter docs
felipemarinho97 Nov 13, 2024
0d72782
debug
felipemarinho97 Nov 13, 2024
0a8639f
chg: fix: always use iso639-1 when calling function
felipemarinho97 Nov 13, 2024
555cf76
chg: add debug log
felipemarinho97 Nov 13, 2024
de322de
chg: fix: modify tmdb request
felipemarinho97 Nov 13, 2024
0434347
chg: fix: do not prioritize original language
felipemarinho97 Nov 13, 2024
5dabe57
chg: fix: add all images in response
felipemarinho97 Nov 13, 2024
f372943
chg: fix: prioritize language
felipemarinho97 Nov 13, 2024
c3844e9
chg: fix: scanner errors
felipemarinho97 Nov 13, 2024
9fdd395
chg: fix: join bug
felipemarinho97 Nov 13, 2024
1b22671
chg: fix: only use 3 languages
felipemarinho97 Nov 14, 2024
a5c956e
chg: docs: add setting to the envfile
felipemarinho97 Nov 14, 2024
b87629d
chg: feat: better ignore regex
felipemarinho97 Nov 14, 2024
4a52639
chg: refactor: use langcodes instead of string for language represent…
felipemarinho97 Nov 14, 2024
f516d31
chg: fix: add fallback
felipemarinho97 Nov 14, 2024
6c9a61e
chg: refactor: remove repetitive code
felipemarinho97 Nov 14, 2024
ddd8255
chg: fix: add media type
felipemarinho97 Nov 14, 2024
2b83db6
chg: fix: make the extra parameter optional
felipemarinho97 Nov 14, 2024
d4f6aea
chg: fix: add null as default original language
felipemarinho97 Nov 14, 2024
3cbe3d6
chg: lint: format code
felipemarinho97 Nov 14, 2024
20281e7
chg: fix: apply MR suggestions
felipemarinho97 Nov 15, 2024
786a9d8
chg: fix: original_language param
felipemarinho97 Nov 15, 2024
d8a8aaa
chg: fix: lint issues
felipemarinho97 Nov 15, 2024
23e8e4d
chg: feat: add localization to collections as well
felipemarinho97 Nov 15, 2024
8ec3767
chg: fix: key_error when original_language is unavailable
felipemarinho97 Nov 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@ LIBRARY_ROOT=./video
# It will automatically be cleaned up on kyoo's startup/shutdown/runtime.
CACHE_ROOT=/tmp/kyoo_cache
LIBRARY_LANGUAGES=en
# A pattern (regex) to ignore video files.
LIBRARY_IGNORE_PATTERN=".*/[dD]ownloads?/.*"
# If this is true, kyoo will prefer to download the media in the original language of the item.
MEDIA_PREFER_ORIGINAL_LANGUAGE=false
# A pattern (regex) to ignore files.
LIBRARY_IGNORE_PATTERN=".*/[dD]ownloads?/.*|.*[Tt][Rr][Aa][Ii][Ll][Ee][Rr].*"

# If this is true, new accounts wont have any permissions before you approve them in your admin dashboard.
REQUIRE_ACCOUNT_VERIFICATION=true
Expand Down
2 changes: 1 addition & 1 deletion scanner/matcher/matcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ async def _identify(self, path: str):
if "mimetype" not in raw or not raw["mimetype"].startswith("video"):
return

logger.info("Identied %s: %s", path, raw)
logger.info("Identified %s: %s", path, raw)

title = raw.get("title")
if not isinstance(title, str):
Expand Down
173 changes: 105 additions & 68 deletions scanner/providers/implementations/themoviedatabase.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from logging import getLogger
from typing import Awaitable, Callable, Dict, List, Optional, Any, TypeVar
from itertools import accumulate, zip_longest
from langcodes import Language

from providers.utils import ProviderError
from matcher.cache import cache
Expand Down Expand Up @@ -31,7 +32,7 @@ def __init__(
api_key: str,
) -> None:
super().__init__()
self._languages = languages
self._languages = [Language.get(l) for l in languages]
self._client = client
self.base = "https://api.themoviedb.org/3"
self.api_key = api_key
Expand Down Expand Up @@ -78,7 +79,7 @@ def flatten(x: Genre | list[Genre]) -> list[Genre]:
[self.genre_map[x["id"]] for x in genres if x["id"] in self.genre_map]
)

def get_languages(self, *args):
def get_languages(self, *args) -> list[Language]:
return self._languages + list(args)

async def get(
Expand All @@ -99,16 +100,17 @@ async def get(

T = TypeVar("T")

def merge_translations(self, host, translations, *, languages: list[str]):
def merge_translations(self, host, translations, *, languages: list[Language]):
host.translations = {
k: v.translations[k] for k, v in zip(languages, translations)
k.to_tag(): v.translations[k.to_tag()]
for k, v in zip(languages, translations)
}
return host

async def process_translations(
self,
for_language: Callable[[str], Awaitable[T]],
languages: list[str],
languages: list[Language],
post_merge: Callable[[T, list[T]], T] | None = None,
) -> T:
tasks = map(lambda lng: for_language(lng), languages)
Expand Down Expand Up @@ -138,24 +140,86 @@ def to_studio(self, company: dict[str, Any]) -> Studio:
},
)

def get_best_image(
self, item: dict[str, Any], lng: Language, key: str
) -> list[dict]:
"""
Retrieves the best available images for a item based on localization.

Args:
item (dict): A dictionary containing item information, including images and language details.
lng (Language): The preferred language for the images.
key (str): The key to access the images in the item dictionary. (e.g. "posters", "backdrops", "logos")
Returns:
list: A list of images, prioritized by localization, original language, and any available image.
"""
# Order images by size and vote average
item["images"][key] = sorted(
item["images"][key],
key=lambda x: (x.get("vote_average", 0), x.get("width", 0)),
reverse=True,
)

# Step 1: Try to get localized images
localized_images = [
image
for image in item["images"][key]
if image.get("iso_639_1") == lng.language
]

# Step 2: If no localized images, try images in the original language
if not localized_images:
localized_images = [
image
for image in item["images"][key]
if image.get("iso_639_1") == item.get("original_language")
]

# Step 3: If still no images, use any available images
if not localized_images:
localized_images = item["images"][key]

# Step 4: If there are no images at all, fallback to _path attribute.
if not localized_images:
localized_images = self._get_image_fallback(item, key)

return self.get_image(localized_images)

def _get_image_fallback(self, item: dict[str, Any], key: str) -> list[dict]:
"""
Fallback to _path attribute if there are no images available in the images list.
"""
if key == "posters":
return [{"file_path": item.get("poster_path")}]
elif key == "backdrops":
return [{"file_path": item.get("backdrop_path")}]

return []

async def search_movie(self, name: str, year: Optional[int]) -> Movie:
search_results = (
await self.get("search/movie", params={"query": name, "year": year})
)["results"]
if len(search_results) == 0:
raise ProviderError(f"No result for a movie named: {name}")
search = self.get_best_result(search_results, name, year)
return await self.identify_movie(search["id"])
original_language = Language.get(search["original_language"])
return await self.identify_movie(
search["id"], original_language=original_language
)

async def identify_movie(self, movie_id: str) -> Movie:
async def identify_movie(
self, movie_id: str, original_language: Optional[Language] = None
) -> Movie:
languages = self.get_languages()

async def for_language(lng: str) -> Movie:
async def for_language(lng: Language) -> Movie:
movie = await self.get(
f"movie/{movie_id}",
params={
"language": lng,
"language": lng.to_tag(),
"append_to_response": "alternative_titles,videos,credits,keywords,images",
"include_image_language": f"{lng.language},null,{original_language.language if original_language else ""}",
},
)
logger.debug("TMDb responded: %s", movie)
Expand Down Expand Up @@ -210,40 +274,27 @@ async def for_language(lng: str) -> Movie:
tagline=movie["tagline"] if movie["tagline"] else None,
tags=list(map(lambda x: x["name"], movie["keywords"]["keywords"])),
overview=movie["overview"],
posters=self.get_image(
movie["images"]["posters"]
+ (
[{"file_path": movie["poster_path"]}]
felipemarinho97 marked this conversation as resolved.
Show resolved Hide resolved
if lng == movie["original_language"]
else []
)
),
logos=self.get_image(movie["images"]["logos"]),
thumbnails=self.get_image(
movie["images"]["backdrops"]
+ (
[{"file_path": movie["backdrop_path"]}]
if lng == movie["original_language"]
else []
)
),
posters=self.get_best_image(movie, lng, "posters"),
logos=self.get_best_image(movie, lng, "logos"),
thumbnails=self.get_best_image(movie, lng, "backdrops"),
trailers=[
f"https://www.youtube.com/watch?v={x['key']}"
for x in movie["videos"]["results"]
if x["type"] == "Trailer" and x["site"] == "YouTube"
],
)
ret.translations = {lng: translation}
ret.translations = {lng.to_tag(): translation}
return ret

ret = await self.process_translations(for_language, languages)
if (
ret.original_language is not None
and ret.original_language not in ret.translations
):
ret.translations[ret.original_language] = (
await for_language(ret.original_language)
).translations[ret.original_language]
orig_language = Language.get(ret.original_language)
ret.translations[orig_language.to_tag()] = (
await for_language(orig_language)
).translations[orig_language.to_tag()]
return ret

@cache(ttl=timedelta(days=1))
Expand All @@ -253,12 +304,13 @@ async def identify_show(
) -> Show:
languages = self.get_languages()

async def for_language(lng: str) -> Show:
async def for_language(lng: Language) -> Show:
show = await self.get(
f"tv/{show_id}",
params={
"language": lng,
"language": lng.to_tag(),
"append_to_response": "alternative_titles,videos,credits,keywords,images,external_ids",
"include_image_language": f"{lng.language},null,en",
},
)
logger.debug("TMDb responded: %s", show)
Expand Down Expand Up @@ -311,30 +363,16 @@ async def for_language(lng: str) -> Show:
tagline=show["tagline"] if show["tagline"] else None,
tags=list(map(lambda x: x["name"], show["keywords"]["results"])),
overview=show["overview"],
posters=self.get_image(
show["images"]["posters"]
+ (
[{"file_path": show["poster_path"]}]
if lng == show["original_language"]
else []
)
),
logos=self.get_image(show["images"]["logos"]),
thumbnails=self.get_image(
show["images"]["backdrops"]
+ (
[{"file_path": show["backdrop_path"]}]
if lng == show["original_language"]
else []
)
),
posters=self.get_best_image(show, lng, "posters"),
logos=self.get_best_image(show, lng, "logos"),
thumbnails=self.get_best_image(show, lng, "backdrops"),
trailers=[
f"https://www.youtube.com/watch?v={x['key']}"
for x in show["videos"]["results"]
if x["type"] == "Trailer" and x["site"] == "YouTube"
],
)
ret.translations = {lng: translation}
ret.translations = {lng.to_tag(): translation}
return ret

def merge_seasons_translations(item: Show, items: list[Show]) -> Show:
Expand Down Expand Up @@ -362,13 +400,14 @@ def merge_seasons_translations(item: Show, items: list[Show]) -> Show:
ret.original_language is not None
and ret.original_language not in ret.translations
):
ret.translations[ret.original_language] = (
await for_language(ret.original_language)
).translations[ret.original_language]
orig_language = Language.get(ret.original_language)
ret.translations[orig_language.to_tag()] = (
await for_language(orig_language)
).translations[orig_language.to_tag()]
return ret

def to_season(
self, season: dict[str, Any], *, language: str, show_id: str
self, season: dict[str, Any], *, language: Language, show_id: str
) -> Season:
return Season(
season_number=season["season_number"],
Expand All @@ -384,7 +423,7 @@ def to_season(
)
},
translations={
language: SeasonTranslation(
language.to_tag(): SeasonTranslation(
name=season["name"],
overview=season["overview"],
posters=[
Expand Down Expand Up @@ -456,19 +495,19 @@ async def search_episode(
async def identify_episode(
self, show_id: str, season: Optional[int], episode_nbr: int, absolute: int
) -> Episode:
async def for_language(lng: str) -> Episode:
async def for_language(lng: Language) -> Episode:
try:
episode = await self.get(
f"tv/{show_id}/season/{season}/episode/{episode_nbr}",
params={
"language": lng,
"language": lng.to_tag(),
},
)
except:
episode = await self.get(
f"tv/{show_id}/season/{season}/episode/{absolute}",
params={
"language": lng,
"language": lng.to_tag(),
},
not_found_fail=f"Could not find episode {episode_nbr} of season {season} of serie {show_id} (absolute: {absolute})",
)
Expand Down Expand Up @@ -509,7 +548,7 @@ async def for_language(lng: str) -> Episode:
name=episode["name"],
overview=episode["overview"],
)
ret.translations = {lng: translation}
ret.translations = {lng.to_tag(): translation}
return ret

return await self.process_translations(for_language, self.get_languages())
Expand Down Expand Up @@ -698,11 +737,13 @@ async def get_absolute_number(
async def identify_collection(self, provider_id: str) -> Collection:
languages = self.get_languages()

async def for_language(lng: str) -> Collection:
async def for_language(lng: Language) -> Collection:
collection = await self.get(
f"collection/{provider_id}",
params={
"language": lng,
"language": lng.to_tag(),
"append_to_response": "images",
"include_image_language": f"{lng.language},null,en",
},
)
logger.debug("TMDb responded: %s", collection)
Expand All @@ -718,15 +759,11 @@ async def for_language(lng: str) -> Collection:
translation = CollectionTranslation(
name=collection["name"],
overview=collection["overview"],
posters=[
f"https://image.tmdb.org/t/p/original{collection['poster_path']}"
],
posters=self.get_best_image(collection, lng, "posters"),
logos=[],
thumbnails=[
f"https://image.tmdb.org/t/p/original{collection['backdrop_path']}"
],
thumbnails=self.get_best_image(collection, lng, "backdrops"),
)
ret.translations = {lng: translation}
ret.translations = {lng.to_tag(): translation}
return ret

return await self.process_translations(for_language, languages)
7 changes: 6 additions & 1 deletion scanner/providers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ def normalize_lang(lang: str) -> str:

# For now, the API of kyoo only support one language so we remove the others.
default_languages = os.environ.get("LIBRARY_LANGUAGES", "").split(",")
media_prefer_original_language = (
os.environ.get("MEDIA_PREFER_ORIGINAL_LANGUAGE", "false").lower() == "true"
)


def sort_translations(
Expand Down Expand Up @@ -64,7 +67,9 @@ def select_image(
chain(
*(
getattr(trans, kind)
for trans in sort_translations(value, prefer_orginal=True)
for trans in sort_translations(
value, prefer_orginal=media_prefer_original_language
)
)
),
None,
Expand Down