Skip to content

Commit

Permalink
Merge branch 'main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
AlajeBash authored Oct 23, 2023
2 parents a2d509e + f8a69f0 commit 4a819cc
Show file tree
Hide file tree
Showing 34 changed files with 4,555 additions and 2,840 deletions.
2 changes: 1 addition & 1 deletion .github/actions/load-img/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ runs:
# We just need to be able to use `pnpm`. `npm` will fail on
# any `workspace:*` dependency which we have in the root `package.json`
setup_python: false
install_deps: false
install_recipe: ""

- name: Install `@actions/artifact`
shell: bash
Expand Down
6 changes: 1 addition & 5 deletions .github/actions/setup-env/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,6 @@ inputs:
default: "true"
description: Whether to set up Node.js and dependencies

install_deps:
default: "true"
description: Whether to install dependencies using Just

install_recipe:
default: "install"
description: The recipe to use to install dependencies
Expand Down Expand Up @@ -65,7 +61,7 @@ runs:
# Install dependencies
- name: Install dependencies
if: inputs.install_deps == 'true'
if: inputs.install_recipe != ''
shell: bash
run: |
just ${{ inputs.install_recipe }}
18 changes: 8 additions & 10 deletions api/api/controllers/search_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,12 +350,14 @@ def search(
("extension", None),
("category", None),
("categories", "category"),
("source", None),
("license", None),
("license_type", "license"),
# Audio-specific filters
("length", None),
# Image-specific filters
("aspect_ratio", None),
("size", None),
("source", None),
("license", "license__keyword"),
("license_type", "license__keyword"),
]
for serializer_field, es_field in filters:
if serializer_field in search_params.data:
Expand Down Expand Up @@ -512,9 +514,7 @@ def related_media(uuid: str, index: str, filter_dead: bool) -> list[Hit]:

# Search the default index for the item itself as it might be sensitive.
item_search = Search(index=index)
# TODO: remove `__keyword` after
# https://github.com/WordPress/openverse/pull/3143 is merged.
item_hit = item_search.query(Term(identifier__keyword=uuid)).execute().hits[0]
item_hit = item_search.query(Term(identifier=uuid)).execute().hits[0]

# Match related using title.
title = getattr(item_hit, "title", None)
Expand All @@ -539,9 +539,7 @@ def related_media(uuid: str, index: str, filter_dead: bool) -> list[Hit]:
s = Search(index=f"{index}-filtered")

# Exclude the current item and mature content.
# TODO: remove `__keyword` after
# https://github.com/WordPress/openverse/pull/3143 is merged.
s = s.query(related_query & ~Term(identifier__keyword=uuid) & ~Term(mature=True))
s = s.query(related_query & ~Term(identifier=uuid) & ~Term(mature=True))
# Exclude the dynamically disabled sources.
s = _exclude_filtered(s)

Expand Down Expand Up @@ -579,7 +577,7 @@ def get_sources(index):
aggs = {
"unique_sources": {
"terms": {
"field": "source.keyword",
"field": "source",
"size": size,
"order": {"_key": "desc"},
}
Expand Down
8 changes: 1 addition & 7 deletions api/api/utils/search_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,7 @@ def build(
# Use `identifier` rather than the document `id` due to
# `id` instability between refreshes:
# https://github.com/WordPress/openverse/issues/2306
# `identifier` is mapped as `text` which will match fuzzily.
# Use `identifier.keyword` to match _exactly_
# cf: https://github.com/WordPress/openverse/issues/2154
Q(
"terms",
**{"identifier.keyword": all_result_identifiers},
)
Q("terms", identifier=all_result_identifiers)
)

# The default query size is 10, so we need to slice the query
Expand Down
6 changes: 3 additions & 3 deletions catalog/dags/data_refresh/data_refresh_task_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
from airflow.utils.trigger_rule import TriggerRule

from common import ingestion_server
from common.constants import REFRESH_POKE_INTERVAL, XCOM_PULL_TEMPLATE
from common.constants import XCOM_PULL_TEMPLATE
from common.sensors.single_run_external_dags_sensor import SingleRunExternalDAGsSensor
from common.sensors.utils import get_most_recent_dag_run
from data_refresh.data_refresh_types import DataRefresh
Expand Down Expand Up @@ -108,7 +108,7 @@ def create_data_refresh_task_group(
task_id="wait_for_data_refresh",
external_dag_ids=external_dag_ids,
check_existence=True,
poke_interval=REFRESH_POKE_INTERVAL,
poke_interval=data_refresh.data_refresh_poke_interval,
mode="reschedule",
pool=DATA_REFRESH_POOL,
)
Expand All @@ -131,7 +131,7 @@ def create_data_refresh_task_group(
# Wait for the whole DAG, not just a part of it
external_task_id=None,
check_existence=False,
poke_interval=REFRESH_POKE_INTERVAL,
poke_interval=data_refresh.filtered_index_poke_interval,
execution_date_fn=lambda _: get_most_recent_dag_run(
create_filtered_index_dag_id
),
Expand Down
23 changes: 22 additions & 1 deletion catalog/dags/data_refresh/data_refresh_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@
for each of our media types. This configuration information is used to generate
the dynamic Data Refresh dags.
"""
import os
from dataclasses import dataclass, field
from datetime import datetime, timedelta

from common.constants import REFRESH_POKE_INTERVAL


@dataclass
class DataRefresh:
Expand Down Expand Up @@ -41,6 +44,12 @@ class DataRefresh:
index_readiness_timeout: timedelta expressing amount of time it may take
to await a healthy ES index after reindexing
doc_md: str used for the DAG's documentation markdown
data_refresh_poke_interval: int number of seconds to wait between
checks to see if the batched updates have
completed.
filtered_index_poke_interval: int number of seconds to wait between
checks to see if the filtered batched updates
have completed.
"""

dag_id: str = field(init=False)
Expand All @@ -55,6 +64,8 @@ class DataRefresh:
create_materialized_view_timeout: timedelta = timedelta(hours=1)
create_filtered_index_timeout: timedelta = timedelta(days=1)
index_readiness_timeout: timedelta = timedelta(days=1)
data_refresh_poke_interval: int = REFRESH_POKE_INTERVAL
filtered_index_poke_interval: int = REFRESH_POKE_INTERVAL

def __post_init__(self):
self.dag_id = f"{self.media_type}_data_refresh"
Expand All @@ -68,6 +79,16 @@ def __post_init__(self):
refresh_matview_timeout=timedelta(hours=72),
create_pop_constants_view_timeout=timedelta(hours=24),
create_materialized_view_timeout=timedelta(hours=72),
data_refresh_poke_interval=int(os.getenv("DATA_REFRESH_POKE_INTERVAL", 60)),
filtered_index_poke_interval=int(
os.getenv("DATA_REFRESH_POKE_INTERVAL", 60 * 30)
),
),
DataRefresh(
media_type="audio",
data_refresh_poke_interval=int(
os.getenv("DATA_REFRESH_POKE_INTERVAL", 60 * 30)
),
filtered_index_poke_interval=int(os.getenv("DATA_REFRESH_POKE_INTERVAL", 60)),
),
DataRefresh(media_type="audio"),
]
11 changes: 11 additions & 0 deletions documentation/changelogs/api/2023.10.23.11.40.59.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# 2023.10.23.11.40.59

## Improvements

- Use top-level `keyword` fields instead of subfields
([#3161](https://github.com/WordPress/openverse/pull/3161)) by @dhruvkb

## Bug Fixes

- Make API nginx log once per request
([#3163](https://github.com/WordPress/openverse/pull/3163)) by @obulat
1 change: 0 additions & 1 deletion documentation/meta/ci_cd/actions.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ host dependencies are needed).
{
setup_python: "true" | "false" // default: "true"
setup_nodejs: "true" | "false" // default: "true"
install_deps: "true" | "false" // default: "true"
install_recipe: string // default: "install"
}
```
Expand Down
2 changes: 1 addition & 1 deletion frontend/src/composables/use-external-sources.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ export const useExternalSources = () => {
return IMAGE
})
const externalSources = computed(() => {
const query = searchStore.searchQueryParams
const query = searchStore.apiSearchQueryParams
const type = externalSourcesType.value
return getAdditionalSources(type, query)
})
Expand Down
6 changes: 1 addition & 5 deletions frontend/src/constants/filters.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ export interface Filters {
sizes: FilterItem[]
audioProviders: FilterItem[]
imageProviders: FilterItem[]
searchBy: FilterItem[]
}
export type FilterCategory = keyof Filters

Expand All @@ -40,7 +39,6 @@ export const mediaFilterKeys = deepFreeze<Record<SearchType, FilterCategory[]>>(
"aspectRatios",
"sizes",
"imageProviders",
"searchBy",
],
[AUDIO]: [
"licenseTypes",
Expand All @@ -49,11 +47,10 @@ export const mediaFilterKeys = deepFreeze<Record<SearchType, FilterCategory[]>>(
"audioExtensions",
"lengths",
"audioProviders",
"searchBy",
],
[VIDEO]: [],
[MODEL_3D]: [],
[ALL_MEDIA]: ["licenseTypes", "licenses", "searchBy"],
[ALL_MEDIA]: ["licenseTypes", "licenses"],
}
)

Expand Down Expand Up @@ -96,7 +93,6 @@ const filterCodesPerCategory = deepFreeze<Record<FilterCategory, string[]>>({
sizes: ["small", "medium", "large"],
audioProviders: [],
imageProviders: [],
searchBy: ["creator"],
})
/**
* Converts the filterCodesPerCategory object into the format that's used by the filter store.
Expand Down
11 changes: 4 additions & 7 deletions frontend/src/data/media-service.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { decodeMediaData } from "~/utils/decode-media-data"
import type { ApiQueryParams } from "~/utils/search-query-transform"
import type { PaginatedSearchQuery } from "~/types/search"
import type { ApiService } from "~/data/api-service"
import type { DetailFromMediaType, Media } from "~/types/media"
import { AUDIO, type SupportedMediaType } from "~/constants/media"
Expand Down Expand Up @@ -47,7 +47,7 @@ class MediaService<T extends Media> {
* @param params - API search query parameters
*/
async search(
params: ApiQueryParams
params: PaginatedSearchQuery
): Promise<MediaResult<Record<string, Media>>> {
// Add the `peaks` param to all audio searches automatically
if (this.mediaType === AUDIO) {
Expand Down Expand Up @@ -88,14 +88,11 @@ class MediaService<T extends Media> {
`MediaService.getRelatedMedia() id parameter required to retrieve related media.`
)
}
const params: ApiQueryParams = {}
if (this.mediaType === AUDIO) {
params.peaks = "true"
}
const params = this.mediaType === AUDIO ? { peaks: "true" } : undefined
const res = (await this.apiService.get(
this.mediaType,
`${id}/related`,
params as unknown as Record<string, string>
params
)) as AxiosResponse<MediaResult<DetailFromMediaType<T>[]>>
return {
...res.data,
Expand Down
2 changes: 1 addition & 1 deletion frontend/src/pages/search.vue
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ export default defineComponent({
const {
searchTerm,
searchType,
searchQueryParams: query,
apiSearchQueryParams: query,
searchTypeIsSupported: supported,
} = storeToRefs(searchStore)
Expand Down
23 changes: 9 additions & 14 deletions frontend/src/stores/media/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ import { defineStore } from "pinia"
import { warn } from "~/utils/console"
import { hash, rand as prng } from "~/utils/prng"
import { isRetriable, parseFetchingError } from "~/utils/errors"
import prepareSearchQueryParams from "~/utils/prepare-search-query-params"
import type {
AudioDetail,
DetailFromMediaType,
Expand All @@ -25,6 +24,8 @@ import { isSearchTypeSupported, useSearchStore } from "~/stores/search"
import { useRelatedMediaStore } from "~/stores/media/related-media"
import { deepFreeze } from "~/utils/deep-freeze"

import { PaginatedSearchQuery } from "~/types/search"

export type MediaStoreResult = {
count: number
pageCount: number
Expand Down Expand Up @@ -442,19 +443,13 @@ export const useMediaStore = defineStore("media", {
mediaType: SupportedMediaType
shouldPersistMedia: boolean
}) {
const queryParams = prepareSearchQueryParams({
...useSearchStore().searchQueryParams,
})
let page = 1
if (shouldPersistMedia) {
/**
* If `shouldPersistMedia` is true, then we increment the page that was set by a previous
* fetch. Normally, if `shouldPersistMedia` is true, `page` should have been set to 1 by the
* previous fetch.
*/
page = this.results[mediaType].page + 1
queryParams.page = `${page}`
let page = this.results[mediaType].page + 1
const queryParams: PaginatedSearchQuery = {
...useSearchStore().apiSearchQueryParams,
// Don't need to set `page` parameter for the first page.
page: shouldPersistMedia ? `${page}` : undefined,
}

this._updateFetchState(mediaType, "start")
try {
const accessToken = this.$nuxt.$openverseApiToken
Expand All @@ -467,7 +462,7 @@ export const useMediaStore = defineStore("media", {
* In such cases, we show the "No results" client error page.
*/
if (!mediaCount) {
page = 0
page = 1
errorData = {
message: `No results found for ${queryParams.q}`,
code: NO_RESULT,
Expand Down
Loading

0 comments on commit 4a819cc

Please sign in to comment.