From eb00e9b68f25a0205197c23a5548ed43d005229d Mon Sep 17 00:00:00 2001 From: Leire Aguirre Date: Tue, 29 Oct 2024 11:08:22 +0100 Subject: [PATCH 01/50] fix and update test snaps --- .../base/base-date/base-date.test.ts | 53 +++++++++++++++++++ .../__snapshots__/BaseTableInfo.spec.js.snap | 20 +++---- .../TableFiltrableColumn.spec.js.snap | 2 +- .../SelectOptionsSearch.spec.js.snap | 2 +- 4 files changed, 65 insertions(+), 12 deletions(-) diff --git a/argilla-frontend/components/base/base-date/base-date.test.ts b/argilla-frontend/components/base/base-date/base-date.test.ts index 4c71569404..9a64dddb71 100644 --- a/argilla-frontend/components/base/base-date/base-date.test.ts +++ b/argilla-frontend/components/base/base-date/base-date.test.ts @@ -1,8 +1,16 @@ import { mount } from "@vue/test-utils"; import BaseDateVue from "./BaseDate.vue"; +import "vue-i18n"; + +const mocks = { + $i18n: { + locale: "en", + }, +}; const dateMocked = new Date("2023-07-19 00:00:00"); jest.useFakeTimers("modern").setSystemTime(dateMocked); +jest.mock("vue-i18n"); describe("Base Date should", () => { test("should format date correctly", () => { @@ -17,6 +25,11 @@ describe("Base Date should", () => { test("should format day 1 second ago", () => { const baseDate = mount(BaseDateVue, { + mocks: { + $i18n: { + locale: "en", + }, + }, propsData: { date: new Date("2023-07-18 23:59:59").toString(), format: "date-relative-now", @@ -28,6 +41,11 @@ describe("Base Date should", () => { test("should format day 2 seconds ago", () => { const baseDate = mount(BaseDateVue, { + mocks: { + $i18n: { + locale: "en", + }, + }, propsData: { date: new Date("2023-07-18 23:59:58").toString(), format: "date-relative-now", @@ -39,6 +57,11 @@ describe("Base Date should", () => { test("should format day 2 hours ago", () => { const baseDate = mount(BaseDateVue, { + mocks: { + $i18n: { + locale: "en", + }, + }, propsData: { date: new Date("2023-07-18 22:00").toString(), format: "date-relative-now", @@ -50,6 +73,11 @@ describe("Base Date should", () => { test("should format day as yesterday", () => { const baseDate = mount(BaseDateVue, { + mocks: { + $i18n: { + locale: "en", + }, + }, propsData: { date: new Date("2023-07-17 22:00").toString(), format: "date-relative-now", @@ -61,6 +89,11 @@ describe("Base Date should", () => { test("should format day 2 days ago", () => { const baseDate = mount(BaseDateVue, { + mocks: { + $i18n: { + locale: "en", + }, + }, propsData: { date: new Date("2023-07-16 22:00").toString(), format: "date-relative-now", @@ -72,6 +105,11 @@ describe("Base Date should", () => { test("should format day last week", () => { const baseDate = mount(BaseDateVue, { + mocks: { + $i18n: { + locale: "en", + }, + }, propsData: { date: new Date("2023-07-11 22:00").toString(), format: "date-relative-now", @@ -83,6 +121,11 @@ describe("Base Date should", () => { test("should format day 2 weeks ago", () => { const baseDate = mount(BaseDateVue, { + mocks: { + $i18n: { + locale: "en", + }, + }, propsData: { date: new Date("2023-07-01 22:00").toString(), format: "date-relative-now", @@ -94,6 +137,11 @@ describe("Base Date should", () => { test("should format day last month", () => { const baseDate = mount(BaseDateVue, { + mocks: { + $i18n: { + locale: "en", + }, + }, propsData: { date: new Date("2023-06-18 22:00").toString(), format: "date-relative-now", @@ -105,6 +153,11 @@ describe("Base Date should", () => { test("should format day 2 months ago", () => { const baseDate = mount(BaseDateVue, { + mocks: { + $i18n: { + locale: "en", + }, + }, propsData: { date: new Date("2023-05-18 22:00").toString(), format: "date-relative-now", diff --git a/argilla-frontend/components/base/base-table/__snapshots__/BaseTableInfo.spec.js.snap b/argilla-frontend/components/base/base-table/__snapshots__/BaseTableInfo.spec.js.snap index 408c589fa6..4f97888edd 100644 --- a/argilla-frontend/components/base/base-table/__snapshots__/BaseTableInfo.spec.js.snap +++ b/argilla-frontend/components/base/base-table/__snapshots__/BaseTableInfo.spec.js.snap @@ -2,22 +2,22 @@ exports[`BaseTableInfo renders properly 1`] = ` -
+
-
-
- +
+
+
-
-
    -
  • - recognai +
    +
      +
    • + recognai
    • -
    • - recognai +
    • + recognai
    diff --git a/argilla-frontend/components/base/base-table/__snapshots__/TableFiltrableColumn.spec.js.snap b/argilla-frontend/components/base/base-table/__snapshots__/TableFiltrableColumn.spec.js.snap index b46249229a..5bb68a4766 100644 --- a/argilla-frontend/components/base/base-table/__snapshots__/TableFiltrableColumn.spec.js.snap +++ b/argilla-frontend/components/base/base-table/__snapshots__/TableFiltrableColumn.spec.js.snap @@ -1,7 +1,7 @@ // Jest Snapshot v1, https://goo.gl/fbAQLP exports[`TableFiltrableColumn renders properly 1`] = ` -
    @@ -15,6 +16,7 @@ :class="{ active: recordCriteria.page.isBulkMode }" @click="switchBulkMode()" :data-title="$t('bulk_mode')" + :aria-selected="recordCriteria.page.isBulkMode" >
    @@ -25,8 +25,8 @@ import "assets/icons/light-theme"; import "assets/icons/dark-theme"; import "assets/icons/system-theme"; import "assets/icons/high-contrast-theme"; + export default { - name: "ThemeSwitch", data() { return { themes: ["system", "dark", "light", "high-contrast"], diff --git a/argilla-frontend/components/features/user-settings/useUserSettingsLanguageViewModel.ts b/argilla-frontend/components/features/user-settings/useUserSettingsLanguageViewModel.ts new file mode 100644 index 0000000000..30e71e8f32 --- /dev/null +++ b/argilla-frontend/components/features/user-settings/useUserSettingsLanguageViewModel.ts @@ -0,0 +1,12 @@ +import { useContext } from "@nuxtjs/composition-api"; +import { useLanguageChanger } from "~/v1/infrastructure/services"; + +export const useUserSettingsLanguageViewModel = () => { + const context = useContext(); + const { change, languages } = useLanguageChanger(context); + + return { + change, + languages, + }; +}; diff --git a/argilla-frontend/nuxt.config.ts b/argilla-frontend/nuxt.config.ts index 8375d73a82..96a0cfa20c 100644 --- a/argilla-frontend/nuxt.config.ts +++ b/argilla-frontend/nuxt.config.ts @@ -115,14 +115,17 @@ const config: NuxtConfig = { locales: [ { code: "en", + name: "English", file: "en.js", }, { code: "de", + name: "Deutsch", file: "de.js", }, { code: "es", + name: "Español", file: "es.js", }, ], diff --git a/argilla-frontend/translation/de.js b/argilla-frontend/translation/de.js index 6e0810d89a..8226fa127d 100644 --- a/argilla-frontend/translation/de.js +++ b/argilla-frontend/translation/de.js @@ -88,6 +88,7 @@ export default { apiKeyDescription: "API-Keys erlauben es die Datensätze über das Python SDK zu verwalten.", theme: "Theme", + language: "Sprache", copyKey: "API-Key kopieren", }, userAvatarTooltip: { @@ -341,6 +342,12 @@ export default { annotator: "Der persistente Speicher ist nicht aktiviert. Alle Daten gehen verloren, wenn dieser Space neu gestartet wird.", }, + colorSchema: { + system: "System", + light: "Licht", + dark: "Dunkel", + "high-contrast": "Hoher Kontrast", + }, validations: { businessLogic: { missing_vector: { diff --git a/argilla-frontend/translation/en.js b/argilla-frontend/translation/en.js index 009525c13c..448064600b 100644 --- a/argilla-frontend/translation/en.js +++ b/argilla-frontend/translation/en.js @@ -58,7 +58,7 @@ export default { breadcrumbs: { home: "Home", datasetSettings: "settings", - userSettings: "my settings", + userSettings: "My settings", }, datasets: { left: "left", @@ -86,6 +86,7 @@ export default { apiKeyDescription: "API key tokens allow you to manage datasets using the Python SDK.", theme: "Theme", + language: "Language", copyKey: "Copy key", }, userAvatarTooltip: { @@ -345,6 +346,12 @@ export default { annotator: "Persistent storage is not enabled. All data will be lost if this space restarts.", }, + colorSchema: { + system: "System", + light: "Light", + dark: "Dark", + "high-contrast": "High contrast", + }, validations: { businessLogic: { missing_vector: { diff --git a/argilla-frontend/translation/es.js b/argilla-frontend/translation/es.js index ed40c9396e..3bc4d8c922 100644 --- a/argilla-frontend/translation/es.js +++ b/argilla-frontend/translation/es.js @@ -84,6 +84,8 @@ export default { apiKey: "Clave de API", apiKeyDescription: "Los tokens de clave API permiten administrar datasets utilizando el SDK de Python", + theme: "Tema", + language: "Idioma", copyKey: "Copiar clave", }, userAvatarTooltip: { @@ -335,6 +337,12 @@ export default { annotator: "El almacenamiento persistente no está habilitado. Todos los datos se perderán si este espacio se reinicia", }, + colorSchema: { + system: "Sistema", + light: "Claro", + dark: "Oscuro", + "high-contrast": "Alto contraste", + }, validations: { businessLogic: { missing_vector: { diff --git a/argilla-frontend/v1/infrastructure/services/useColorSchema.ts b/argilla-frontend/v1/infrastructure/services/useColorSchema.ts index 0357ca861a..2b5d57cb72 100644 --- a/argilla-frontend/v1/infrastructure/services/useColorSchema.ts +++ b/argilla-frontend/v1/infrastructure/services/useColorSchema.ts @@ -1,14 +1,17 @@ import { ref } from "vue"; +import { useLocalStorage } from "./useLocalStorage"; export const useColorSchema = () => { + const { get, set } = useLocalStorage(); const systemTheme = window.matchMedia("(prefers-color-scheme: dark)").matches ? "dark" : "light"; - const currentTheme = ref(localStorage.getItem("theme") || "system"); + const currentTheme = ref(get("theme") || "system"); const setTheme = (theme: string) => { currentTheme.value = theme; - localStorage.setItem("theme", theme); + set("theme", theme); + if (theme !== "system") { document.documentElement.setAttribute("data-theme", theme); } else { diff --git a/argilla-frontend/v1/infrastructure/services/useLanguageDetector.test.ts b/argilla-frontend/v1/infrastructure/services/useLanguageDetector.test.ts index 3bfe47cf24..de6318a0ca 100644 --- a/argilla-frontend/v1/infrastructure/services/useLanguageDetector.test.ts +++ b/argilla-frontend/v1/infrastructure/services/useLanguageDetector.test.ts @@ -1,10 +1,14 @@ import { useLanguageDetector } from "./useLanguageDetector"; +import { useLocalStorage } from "./useLocalStorage"; + +jest.mock("./useLocalStorage"); +const useLocalStorageMock = jest.mocked(useLocalStorage); describe("useLanguageDetector", () => { const context = { app: { i18n: { - locales: [{ code: "en" }, { code: "es" }, { code: "fr-CA" }], + locales: [{ code: "en" }, { code: "es" }, { code: "fr" }], setLocale: jest.fn(), }, }, @@ -15,24 +19,34 @@ describe("useLanguageDetector", () => { }); describe("initialize should", () => { - test("change to the detected language when it exists", () => { + test("set the browser language if the user does not have the language saved and the browser language is supported", () => { Object.defineProperty(window.navigator, "language", { - value: "fr-CA", + value: "es", configurable: true, }); + useLocalStorageMock.mockReturnValue({ + get: jest.fn().mockReturnValue(null), + pop: jest.fn(), + set: jest.fn(), + }); const { initialize } = useLanguageDetector(context); initialize(); - expect(context.app.i18n.setLocale).toHaveBeenCalledWith("fr-CA"); + expect(context.app.i18n.setLocale).toHaveBeenCalledWith("es"); }); - test("change to base language code if not exist the complete code into locales", () => { + test("set the browser language if the user does not have the language saved and the browser language is supported", () => { Object.defineProperty(window.navigator, "language", { value: "es-AR", configurable: true, }); + useLocalStorageMock.mockReturnValue({ + get: jest.fn().mockReturnValue(null), + pop: jest.fn(), + set: jest.fn(), + }); const { initialize } = useLanguageDetector(context); @@ -41,17 +55,36 @@ describe("useLanguageDetector", () => { expect(context.app.i18n.setLocale).toHaveBeenCalledWith("es"); }); - test("not change to the language code when the detected language does not exist", () => { + test("set English if the user does not have the language saved and the browser language is not supported", () => { Object.defineProperty(window.navigator, "language", { value: "de", configurable: true, }); + useLocalStorageMock.mockReturnValue({ + get: jest.fn().mockReturnValue(null), + pop: jest.fn(), + set: jest.fn(), + }); + + const { initialize } = useLanguageDetector(context); + + initialize(); + + expect(context.app.i18n.setLocale).toHaveBeenCalledWith("en"); + }); + + test("set the language saved by the user", () => { + useLocalStorageMock.mockReturnValue({ + get: () => "fr", + pop: jest.fn(), + set: jest.fn(), + }); const { initialize } = useLanguageDetector(context); initialize(); - expect(context.app.i18n.setLocale).toHaveBeenCalledTimes(0); + expect(context.app.i18n.setLocale).toHaveBeenCalledWith("fr"); }); }); }); diff --git a/argilla-frontend/v1/infrastructure/services/useLanguageDetector.ts b/argilla-frontend/v1/infrastructure/services/useLanguageDetector.ts index c2da119991..8482ed6da9 100644 --- a/argilla-frontend/v1/infrastructure/services/useLanguageDetector.ts +++ b/argilla-frontend/v1/infrastructure/services/useLanguageDetector.ts @@ -1,4 +1,5 @@ import { NuxtI18nInstance } from "@nuxtjs/i18n"; +import { useLocalStorage } from "./useLocalStorage"; type Context = { app: { @@ -10,14 +11,13 @@ type Context = { }; export const useLanguageDetector = (context: Context) => { - const { i18n } = context.app; + const { change } = useLanguageChanger(context); + const { get } = useLocalStorage(); - const change = (language: string) => { - i18n.setLocale(language); - }; + const { i18n } = context.app; const detect = () => { - return navigator.language; + return get("language") || navigator.language; }; const exists = (language: string) => { @@ -36,9 +36,28 @@ export const useLanguageDetector = (context: Context) => { if (exists(languageCode)) { return change(languageCode); } + + change("en"); }; return { initialize, }; }; + +export const useLanguageChanger = (context: Context) => { + const { i18n } = context.app; + + const { set } = useLocalStorage(); + + const change = (language: string) => { + i18n.setLocale(language); + + set("language", language); + }; + + return { + change, + languages: i18n.locales.sort((a, b) => a.code.localeCompare(b.code)), + }; +}; diff --git a/argilla-frontend/v1/infrastructure/services/useLocalStorage.ts b/argilla-frontend/v1/infrastructure/services/useLocalStorage.ts index 7d6c8a7483..967bc4f356 100644 --- a/argilla-frontend/v1/infrastructure/services/useLocalStorage.ts +++ b/argilla-frontend/v1/infrastructure/services/useLocalStorage.ts @@ -1,4 +1,9 @@ -type Options = "showShortcutsHelper" | "layout" | "redirectTo"; +type Options = + | "showShortcutsHelper" + | "layout" + | "redirectTo" + | "language" + | "theme"; const STORAGE_KEY = "argilla"; From 53c734c86d66c3bec6eb9473fb79a542f7f04d0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dami=C3=A1n=20Pumar?= Date: Tue, 19 Nov 2024 11:53:09 +0100 Subject: [PATCH 25/50] =?UTF-8?q?=E2=9A=A1=EF=B8=8F=20Fix=20highlight=20te?= =?UTF-8?q?xt=20(#5693)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix https://github.com/argilla-io/argilla/issues/5683 --- argilla-frontend/CHANGELOG.md | 4 ++++ .../useSpanAnnotationTextFieldViewModel.ts | 12 ++++-------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/argilla-frontend/CHANGELOG.md b/argilla-frontend/CHANGELOG.md index e6f5aad8e0..e9b929b2cf 100644 --- a/argilla-frontend/CHANGELOG.md +++ b/argilla-frontend/CHANGELOG.md @@ -21,6 +21,10 @@ These are the section headers that we use: - Add a high-contrast theme & improvements for the forced-colors mode. ([#5661](https://github.com/argilla-io/argilla/pull/5661)) - Add English as the default language and add language selector in the user settings page. ([#5690](https://github.com/argilla-io/argilla/pull/5690)) +### Fixed + +- Fixed highlighting on same record ([#5693](https://github.com/argilla-io/argilla/pull/5693)) + ## [2.4.1](https://github.com/argilla-io/argilla/compare/v2.4.0...v2.4.1) ### Added diff --git a/argilla-frontend/components/features/annotation/container/fields/span-annotation/useSpanAnnotationTextFieldViewModel.ts b/argilla-frontend/components/features/annotation/container/fields/span-annotation/useSpanAnnotationTextFieldViewModel.ts index 98f63a9308..1342df139d 100644 --- a/argilla-frontend/components/features/annotation/container/fields/span-annotation/useSpanAnnotationTextFieldViewModel.ts +++ b/argilla-frontend/components/features/annotation/container/fields/span-annotation/useSpanAnnotationTextFieldViewModel.ts @@ -8,17 +8,13 @@ import { Question } from "~/v1/domain/entities/question/Question"; import { SpanQuestionAnswer } from "~/v1/domain/entities/question/QuestionAnswer"; import { SpanAnswer } from "~/v1/domain/entities/IAnswer"; -export const useSpanAnnotationTextFieldViewModel = ({ - name, - spanQuestion, - id, - searchText, -}: { +export const useSpanAnnotationTextFieldViewModel = (props: { name: string; spanQuestion: Question; id: string; searchText: string; }) => { + const { name, spanQuestion, id } = props; const searchTextHighlight = useSearchTextHighlight(name); const spanAnnotationSupported = ref(true); const answer = spanQuestion.answer as SpanQuestionAnswer; @@ -146,7 +142,7 @@ export const useSpanAnnotationTextFieldViewModel = ({ ); watch( - () => searchText, + () => props.searchText, (newValue) => { searchTextHighlight.highlightText(newValue); } @@ -161,7 +157,7 @@ export const useSpanAnnotationTextFieldViewModel = ({ spanAnnotationSupported.value = false; } - searchTextHighlight.highlightText(searchText); + searchTextHighlight.highlightText(props.searchText); }); onUnmounted(() => { From 6e63b047ca87bbb2f64fe6397abe9c34a2c57d07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Francisco=20Calvo?= Date: Tue, 19 Nov 2024 15:20:26 +0100 Subject: [PATCH 26/50] [FEATURE] Add Webhooks (#5467) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description This is the feature branch for Webhooks implementation. Closes #1836 **Type of change** - New feature (non-breaking change which adds functionality) **How Has This Been Tested** - [ ] Manually tested locally and in HF spaces. **Checklist** - I added relevant documentation - I followed the style guidelines of this project - I did a self-review of my code - I made corresponding changes to the documentation - I confirm My changes generate no new warnings - I have added tests that prove my fix is effective or that my feature works - I have added relevant notes to the CHANGELOG.md file (See https://keepachangelog.com/) --------- Co-authored-by: Paco Aranda Co-authored-by: Damián Pumar Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Francisco Aranda Co-authored-by: David Berenstein Co-authored-by: Sara Han <127759186+sdiazlor@users.noreply.github.com> --- argilla-server/.env.dev | 2 + argilla-server/.env.test | 1 + argilla-server/CHANGELOG.md | 8 + .../docker/argilla-hf-spaces/Dockerfile | 1 - .../docker/argilla-hf-spaces/Procfile | 3 +- argilla-server/pdm.lock | 111 +- argilla-server/pyproject.toml | 5 +- .../6ed1b8bf8e08_create_webhooks_table.py | 50 + .../api/handlers/v1/datasets/records_bulk.py | 14 +- .../api/handlers/v1/webhooks.py | 99 + .../api/policies/v1/__init__.py | 2 + .../api/policies/v1/webhook_policy.py | 38 + .../src/argilla_server/api/routes.py | 2 + .../argilla_server/api/schemas/v1/webhooks.py | 70 + .../src/argilla_server/bulk/records_bulk.py | 14 + .../src/argilla_server/cli/worker.py | 4 +- .../src/argilla_server/contexts/datasets.py | 76 +- .../argilla_server/contexts/distribution.py | 7 + .../src/argilla_server/contexts/webhooks.py | 49 + argilla-server/src/argilla_server/database.py | 1 + argilla-server/src/argilla_server/enums.py | 8 +- .../src/argilla_server/jobs/dataset_jobs.py | 3 +- .../src/argilla_server/jobs/hub_jobs.py | 6 +- .../src/argilla_server/jobs/queues.py | 4 +- .../src/argilla_server/jobs/webhook_jobs.py | 55 + .../src/argilla_server/models/database.py | 28 + .../src/argilla_server/utils/str_enum.py | 22 + .../src/argilla_server/validators/webhooks.py | 38 + .../src/argilla_server/webhooks/__init__.py | 14 + .../argilla_server/webhooks/v1/__init__.py | 14 + .../src/argilla_server/webhooks/v1/commons.py | 67 + .../argilla_server/webhooks/v1/datasets.py | 55 + .../src/argilla_server/webhooks/v1/enums.py | 54 + .../src/argilla_server/webhooks/v1/event.py | 36 + .../src/argilla_server/webhooks/v1/ping.py | 34 + .../src/argilla_server/webhooks/v1/records.py | 55 + .../argilla_server/webhooks/v1/responses.py | 60 + .../src/argilla_server/webhooks/v1/schemas.py | 159 ++ argilla-server/tests/conftest.py | 23 +- argilla-server/tests/factories.py | 19 +- .../test_create_dataset_records_bulk.py | 56 +- .../test_upsert_dataset_records_bulk.py | 100 +- .../records/test_delete_dataset_records.py | 60 + .../v1/datasets/test_create_dataset.py | 33 +- .../v1/datasets/test_delete_dataset.py | 52 + .../v1/datasets/test_publish_dataset.py | 55 + .../v1/datasets/test_update_dataset.py | 35 +- .../v1/records/test_create_record_response.py | 129 +- .../handlers/v1/records/test_delete_record.py | 52 + .../handlers/v1/records/test_update_record.py | 53 + ...test_create_current_user_responses_bulk.py | 202 +- .../v1/responses/test_delete_response.py | 64 +- .../v1/responses/test_update_response.py | 167 +- .../unit/api/handlers/v1/test_datasets.py | 6 +- .../unit/api/handlers/v1/webhooks/__init__.py | 14 + .../v1/webhooks/test_create_webhook.py | 265 +++ .../v1/webhooks/test_delete_webhook.py | 93 + .../v1/webhooks/test_list_webhooks.py | 90 + .../handlers/v1/webhooks/test_ping_webhook.py | 98 + .../v1/webhooks/test_update_webhook.py | 439 ++++ argilla-server/tests/unit/jobs/__init__.py | 14 + .../tests/unit/jobs/webhook_jobs/__init__.py | 14 + .../test_enqueue_notify_events.py | 58 + argilla-server/tests/unit/models/__init__.py | 14 + .../tests/unit/models/test_webhook.py | 30 + .../tests/unit/webhooks/__init__.py | 14 + .../tests/unit/webhooks/v1/__init__.py | 14 + .../webhooks/v1/test_notify_ping_event.py | 52 + argilla/CHANGELOG.md | 1 + argilla/docs/how_to_guides/index.md | 19 +- argilla/docs/how_to_guides/webhooks.md | 160 ++ .../docs/how_to_guides/webhooks_internals.md | 1863 +++++++++++++++++ argilla/docs/reference/argilla/SUMMARY.md | 1 + argilla/docs/reference/argilla/webhooks.md | 61 + argilla/mkdocs.yml | 1 + argilla/pdm.lock | 107 +- argilla/pyproject.toml | 15 +- argilla/src/argilla/__init__.py | 1 + argilla/src/argilla/_api/_client.py | 14 +- argilla/src/argilla/_api/_webhooks.py | 122 ++ .../src/argilla/_helpers/_resource_repr.py | 1 + argilla/src/argilla/_models/__init__.py | 1 + argilla/src/argilla/_models/_webhook.py | 72 + argilla/src/argilla/client.py | 71 +- argilla/src/argilla/responses.py | 10 + argilla/src/argilla/webhooks/__init__.py | 43 + argilla/src/argilla/webhooks/_event.py | 179 ++ argilla/src/argilla/webhooks/_handler.py | 78 + argilla/src/argilla/webhooks/_helpers.py | 202 ++ argilla/src/argilla/webhooks/_resource.py | 98 + examples/webhooks/basic-webhooks/README.md | 20 + examples/webhooks/basic-webhooks/main.py | 76 + .../webhooks/basic-webhooks/requirements.txt | 3 + 93 files changed, 6700 insertions(+), 103 deletions(-) create mode 100644 argilla-server/src/argilla_server/alembic/versions/6ed1b8bf8e08_create_webhooks_table.py create mode 100644 argilla-server/src/argilla_server/api/handlers/v1/webhooks.py create mode 100644 argilla-server/src/argilla_server/api/policies/v1/webhook_policy.py create mode 100644 argilla-server/src/argilla_server/api/schemas/v1/webhooks.py create mode 100644 argilla-server/src/argilla_server/contexts/webhooks.py create mode 100644 argilla-server/src/argilla_server/jobs/webhook_jobs.py create mode 100644 argilla-server/src/argilla_server/utils/str_enum.py create mode 100644 argilla-server/src/argilla_server/validators/webhooks.py create mode 100644 argilla-server/src/argilla_server/webhooks/__init__.py create mode 100644 argilla-server/src/argilla_server/webhooks/v1/__init__.py create mode 100644 argilla-server/src/argilla_server/webhooks/v1/commons.py create mode 100644 argilla-server/src/argilla_server/webhooks/v1/datasets.py create mode 100644 argilla-server/src/argilla_server/webhooks/v1/enums.py create mode 100644 argilla-server/src/argilla_server/webhooks/v1/event.py create mode 100644 argilla-server/src/argilla_server/webhooks/v1/ping.py create mode 100644 argilla-server/src/argilla_server/webhooks/v1/records.py create mode 100644 argilla-server/src/argilla_server/webhooks/v1/responses.py create mode 100644 argilla-server/src/argilla_server/webhooks/v1/schemas.py rename argilla-server/tests/unit/api/handlers/v1/{records => datasets/records/records_bulk}/test_upsert_dataset_records_bulk.py (67%) create mode 100644 argilla-server/tests/unit/api/handlers/v1/datasets/records/test_delete_dataset_records.py create mode 100644 argilla-server/tests/unit/api/handlers/v1/datasets/test_delete_dataset.py create mode 100644 argilla-server/tests/unit/api/handlers/v1/datasets/test_publish_dataset.py create mode 100644 argilla-server/tests/unit/api/handlers/v1/records/test_delete_record.py create mode 100644 argilla-server/tests/unit/api/handlers/v1/records/test_update_record.py create mode 100644 argilla-server/tests/unit/api/handlers/v1/webhooks/__init__.py create mode 100644 argilla-server/tests/unit/api/handlers/v1/webhooks/test_create_webhook.py create mode 100644 argilla-server/tests/unit/api/handlers/v1/webhooks/test_delete_webhook.py create mode 100644 argilla-server/tests/unit/api/handlers/v1/webhooks/test_list_webhooks.py create mode 100644 argilla-server/tests/unit/api/handlers/v1/webhooks/test_ping_webhook.py create mode 100644 argilla-server/tests/unit/api/handlers/v1/webhooks/test_update_webhook.py create mode 100644 argilla-server/tests/unit/jobs/__init__.py create mode 100644 argilla-server/tests/unit/jobs/webhook_jobs/__init__.py create mode 100644 argilla-server/tests/unit/jobs/webhook_jobs/test_enqueue_notify_events.py create mode 100644 argilla-server/tests/unit/models/__init__.py create mode 100644 argilla-server/tests/unit/models/test_webhook.py create mode 100644 argilla-server/tests/unit/webhooks/__init__.py create mode 100644 argilla-server/tests/unit/webhooks/v1/__init__.py create mode 100644 argilla-server/tests/unit/webhooks/v1/test_notify_ping_event.py create mode 100644 argilla/docs/how_to_guides/webhooks.md create mode 100644 argilla/docs/how_to_guides/webhooks_internals.md create mode 100644 argilla/docs/reference/argilla/webhooks.md create mode 100644 argilla/src/argilla/_api/_webhooks.py create mode 100644 argilla/src/argilla/_models/_webhook.py create mode 100644 argilla/src/argilla/webhooks/__init__.py create mode 100644 argilla/src/argilla/webhooks/_event.py create mode 100644 argilla/src/argilla/webhooks/_handler.py create mode 100644 argilla/src/argilla/webhooks/_helpers.py create mode 100644 argilla/src/argilla/webhooks/_resource.py create mode 100644 examples/webhooks/basic-webhooks/README.md create mode 100644 examples/webhooks/basic-webhooks/main.py create mode 100644 examples/webhooks/basic-webhooks/requirements.txt diff --git a/argilla-server/.env.dev b/argilla-server/.env.dev index a542666ee6..76c10523d0 100644 --- a/argilla-server/.env.dev +++ b/argilla-server/.env.dev @@ -1,2 +1,4 @@ +OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES # Needed by RQ to work with forked processes on MacOS ALEMBIC_CONFIG=src/argilla_server/alembic.ini +ARGILLA_AUTH_SECRET_KEY=8VO7na5N/jQx+yP/N+HlE8q51vPdrxqlh6OzoebIyko= # With this we avoid using a different key every time the server is reloaded ARGILLA_DATABASE_URL=sqlite+aiosqlite:///${HOME}/.argilla/argilla.db?check_same_thread=False diff --git a/argilla-server/.env.test b/argilla-server/.env.test index c5d975e485..55d04fe762 100644 --- a/argilla-server/.env.test +++ b/argilla-server/.env.test @@ -1 +1,2 @@ ARGILLA_DATABASE_URL=sqlite+aiosqlite:///${HOME}/.argilla/argilla-test.db?check_same_thread=False +ARGILLA_REDIS_URL=redis://localhost:6379/1 # Using a different Redis database for testing diff --git a/argilla-server/CHANGELOG.md b/argilla-server/CHANGELOG.md index e53661abf0..c1fee89500 100644 --- a/argilla-server/CHANGELOG.md +++ b/argilla-server/CHANGELOG.md @@ -16,6 +16,14 @@ These are the section headers that we use: ## [Unreleased]() +### Added + +- Added new endpoints to create, update, ping and delete webhooks. ([#5453](https://github.com/argilla-io/argilla/pull/5453)) +- Added new webhook events when responses are created, updated, deleted. ([#5468](https://github.com/argilla-io/argilla/pull/5468)) +- Added new webhook events when datasets are created, updated, deleted or published. ([#5468](https://github.com/argilla-io/argilla/pull/5468)) +- Added new webhook events when records are created, updated, deleted or completed. ([#5489](https://github.com/argilla-io/argilla/pull/5489)) +- Added new `high` RQ queue to process high priority background jobs. ([#5467](https://github.com/argilla-io/argilla/pull/5467)) + ### Changed - Changed default python version to 3.13. ([#5649](https://github.com/argilla-io/argilla/pull/5649)) diff --git a/argilla-server/docker/argilla-hf-spaces/Dockerfile b/argilla-server/docker/argilla-hf-spaces/Dockerfile index 69effa0246..83624ce1fd 100644 --- a/argilla-server/docker/argilla-hf-spaces/Dockerfile +++ b/argilla-server/docker/argilla-hf-spaces/Dockerfile @@ -60,7 +60,6 @@ ENV ELASTIC_CONTAINER=true ENV ES_JAVA_OPTS="-Xms1g -Xmx1g" ENV ARGILLA_HOME_PATH=/data/argilla -ENV BACKGROUND_NUM_WORKERS=2 ENV REINDEX_DATASETS=1 CMD ["/bin/bash", "start.sh"] diff --git a/argilla-server/docker/argilla-hf-spaces/Procfile b/argilla-server/docker/argilla-hf-spaces/Procfile index 751d36e4b4..940bc02e6d 100644 --- a/argilla-server/docker/argilla-hf-spaces/Procfile +++ b/argilla-server/docker/argilla-hf-spaces/Procfile @@ -1,4 +1,5 @@ elastic: /usr/share/elasticsearch/bin/elasticsearch redis: /usr/bin/redis-server -worker: sleep 30; rq worker-pool --num-workers ${BACKGROUND_NUM_WORKERS} +worker_high: sleep 30; rq worker-pool --num-workers 2 high +worker_default: sleep 30; rq worker-pool --num-workers 1 default argilla: sleep 30; /bin/bash start_argilla_server.sh diff --git a/argilla-server/pdm.lock b/argilla-server/pdm.lock index afbb0afb5f..8f6cdd1005 100644 --- a/argilla-server/pdm.lock +++ b/argilla-server/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "postgresql", "test"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:bac5d1141affbb735ae70537eb0534eb8dabf287bd33af762ac7bd877073d282" +content_hash = "sha256:c25b92ead1fbe755395a5c38f8c182e43f78bbf735e8393aa844b7299ee55fdd" [[metadata.targets]] requires_python = ">=3.9" @@ -840,6 +840,20 @@ files = [ {file = "defusedxml-0.8.0rc2.tar.gz", hash = "sha256:138c7d540a78775182206c7c97fe65b246a2f40b29471e1a2f1b0da76e7a3942"}, ] +[[package]] +name = "deprecated" +version = "1.2.14" +requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +summary = "Python @deprecated decorator to deprecate old python classes, functions or methods." +groups = ["default"] +dependencies = [ + "wrapt<2,>=1.10", +] +files = [ + {file = "Deprecated-1.2.14-py2.py3-none-any.whl", hash = "sha256:6fac8b097794a90302bdbb17b9b815e732d3c4720583ff1b198499d78470466c"}, + {file = "Deprecated-1.2.14.tar.gz", hash = "sha256:e5323eb936458dccc2582dc6f9c322c852a775a27065ff2b0c4970b9d53d01b3"}, +] + [[package]] name = "dill" version = "0.3.8" @@ -2307,6 +2321,20 @@ files = [ {file = "requests_oauthlib-2.0.0-py2.py3-none-any.whl", hash = "sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36"}, ] +[[package]] +name = "respx" +version = "0.21.1" +requires_python = ">=3.7" +summary = "A utility for mocking out the Python HTTPX and HTTP Core libraries." +groups = ["test"] +dependencies = [ + "httpx>=0.21.0", +] +files = [ + {file = "respx-0.21.1-py2.py3-none-any.whl", hash = "sha256:05f45de23f0c785862a2c92a3e173916e8ca88e4caad715dd5f68584d6053c20"}, + {file = "respx-0.21.1.tar.gz", hash = "sha256:0bd7fe21bfaa52106caa1223ce61224cf30786985f17c63c5d71eff0307ee8af"}, +] + [[package]] name = "rich" version = "13.9.3" @@ -2450,6 +2478,24 @@ files = [ {file = "sqlalchemy-2.0.36.tar.gz", hash = "sha256:7f2767680b6d2398aea7082e45a774b2b0767b5c8d8ffb9c8b683088ea9b29c5"}, ] +[[package]] +name = "standardwebhooks" +version = "1.0.0" +requires_python = ">=3.6" +summary = "Standard Webhooks" +groups = ["default"] +dependencies = [ + "Deprecated", + "attrs>=21.3.0", + "httpx>=0.23.0", + "python-dateutil", + "types-Deprecated", + "types-python-dateutil", +] +files = [ + {file = "standardwebhooks-1.0.0.tar.gz", hash = "sha256:d94b99c0dcea84156e03adad94f8dba32d5454cc68e12ec2c824051b55bb67ff"}, +] + [[package]] name = "starlette" version = "0.41.2" @@ -2506,6 +2552,28 @@ files = [ {file = "typer-0.9.4.tar.gz", hash = "sha256:f714c2d90afae3a7929fcd72a3abb08df305e1ff61719381384211c4070af57f"}, ] +[[package]] +name = "types-deprecated" +version = "1.2.9.20240311" +requires_python = ">=3.8" +summary = "Typing stubs for Deprecated" +groups = ["default"] +files = [ + {file = "types-Deprecated-1.2.9.20240311.tar.gz", hash = "sha256:0680e89989a8142707de8103f15d182445a533c1047fd9b7e8c5459101e9b90a"}, + {file = "types_Deprecated-1.2.9.20240311-py3-none-any.whl", hash = "sha256:d7793aaf32ff8f7e49a8ac781de4872248e0694c4b75a7a8a186c51167463f9d"}, +] + +[[package]] +name = "types-python-dateutil" +version = "2.9.0.20241003" +requires_python = ">=3.8" +summary = "Typing stubs for python-dateutil" +groups = ["default"] +files = [ + {file = "types-python-dateutil-2.9.0.20241003.tar.gz", hash = "sha256:58cb85449b2a56d6684e41aeefb4c4280631246a0da1a719bdbe6f3fb0317446"}, + {file = "types_python_dateutil-2.9.0.20241003-py3-none-any.whl", hash = "sha256:250e1d8e80e7bbc3a6c99b907762711d1a1cdd00e978ad39cb5940f6f0a87f3d"}, +] + [[package]] name = "typing-extensions" version = "4.12.2" @@ -2779,6 +2847,47 @@ files = [ {file = "websockets-13.1.tar.gz", hash = "sha256:a3b3366087c1bc0a2795111edcadddb8b3b59509d5db5d7ea3fdd69f954a8878"}, ] +[[package]] +name = "wrapt" +version = "1.16.0" +requires_python = ">=3.6" +summary = "Module for decorators, wrappers and monkey patching." +groups = ["default"] +files = [ + {file = "wrapt-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ffa565331890b90056c01db69c0fe634a776f8019c143a5ae265f9c6bc4bd6d4"}, + {file = "wrapt-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e4fdb9275308292e880dcbeb12546df7f3e0f96c6b41197e0cf37d2826359020"}, + {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb2dee3874a500de01c93d5c71415fcaef1d858370d405824783e7a8ef5db440"}, + {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2a88e6010048489cda82b1326889ec075a8c856c2e6a256072b28eaee3ccf487"}, + {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac83a914ebaf589b69f7d0a1277602ff494e21f4c2f743313414378f8f50a4cf"}, + {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:73aa7d98215d39b8455f103de64391cb79dfcad601701a3aa0dddacf74911d72"}, + {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:807cc8543a477ab7422f1120a217054f958a66ef7314f76dd9e77d3f02cdccd0"}, + {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bf5703fdeb350e36885f2875d853ce13172ae281c56e509f4e6eca049bdfb136"}, + {file = "wrapt-1.16.0-cp310-cp310-win32.whl", hash = "sha256:f6b2d0c6703c988d334f297aa5df18c45e97b0af3679bb75059e0e0bd8b1069d"}, + {file = "wrapt-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:decbfa2f618fa8ed81c95ee18a387ff973143c656ef800c9f24fb7e9c16054e2"}, + {file = "wrapt-1.16.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1dd50a2696ff89f57bd8847647a1c363b687d3d796dc30d4dd4a9d1689a706f0"}, + {file = "wrapt-1.16.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:44a2754372e32ab315734c6c73b24351d06e77ffff6ae27d2ecf14cf3d229202"}, + {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e9723528b9f787dc59168369e42ae1c3b0d3fadb2f1a71de14531d321ee05b0"}, + {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dbed418ba5c3dce92619656802cc5355cb679e58d0d89b50f116e4a9d5a9603e"}, + {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:941988b89b4fd6b41c3f0bfb20e92bd23746579736b7343283297c4c8cbae68f"}, + {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6a42cd0cfa8ffc1915aef79cb4284f6383d8a3e9dcca70c445dcfdd639d51267"}, + {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:1ca9b6085e4f866bd584fb135a041bfc32cab916e69f714a7d1d397f8c4891ca"}, + {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d5e49454f19ef621089e204f862388d29e6e8d8b162efce05208913dde5b9ad6"}, + {file = "wrapt-1.16.0-cp38-cp38-win32.whl", hash = "sha256:c31f72b1b6624c9d863fc095da460802f43a7c6868c5dda140f51da24fd47d7b"}, + {file = "wrapt-1.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:490b0ee15c1a55be9c1bd8609b8cecd60e325f0575fc98f50058eae366e01f41"}, + {file = "wrapt-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9b201ae332c3637a42f02d1045e1d0cccfdc41f1f2f801dafbaa7e9b4797bfc2"}, + {file = "wrapt-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2076fad65c6736184e77d7d4729b63a6d1ae0b70da4868adeec40989858eb3fb"}, + {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5cd603b575ebceca7da5a3a251e69561bec509e0b46e4993e1cac402b7247b8"}, + {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b47cfad9e9bbbed2339081f4e346c93ecd7ab504299403320bf85f7f85c7d46c"}, + {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8212564d49c50eb4565e502814f694e240c55551a5f1bc841d4fcaabb0a9b8a"}, + {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:5f15814a33e42b04e3de432e573aa557f9f0f56458745c2074952f564c50e664"}, + {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:db2e408d983b0e61e238cf579c09ef7020560441906ca990fe8412153e3b291f"}, + {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:edfad1d29c73f9b863ebe7082ae9321374ccb10879eeabc84ba3b69f2579d537"}, + {file = "wrapt-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed867c42c268f876097248e05b6117a65bcd1e63b779e916fe2e33cd6fd0d3c3"}, + {file = "wrapt-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:eb1b046be06b0fce7249f1d025cd359b4b80fc1c3e24ad9eca33e0dcdb2e4a35"}, + {file = "wrapt-1.16.0-py3-none-any.whl", hash = "sha256:6906c4100a8fcbf2fa735f6059214bb13b97f75b1a61777fcf6432121ef12ef1"}, + {file = "wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d"}, +] + [[package]] name = "xxhash" version = "3.5.0" diff --git a/argilla-server/pyproject.toml b/argilla-server/pyproject.toml index 4a43fdc0c6..4e0e0adaab 100644 --- a/argilla-server/pyproject.toml +++ b/argilla-server/pyproject.toml @@ -1,5 +1,4 @@ [project] -# Remove me name = "argilla-server" dynamic = ["version"] description = "Open-source tool for exploring, labeling, and monitoring data for NLP projects." @@ -55,6 +54,8 @@ dependencies = [ # For CLI "typer >= 0.6.0, < 0.10.0", # spaCy only supports typer<0.10.0 "packaging>=23.2", + # For Webhooks + "standardwebhooks>=1.0.0", # For HF dataset import "datasets >= 3.0.1", "pillow >= 10.4.0", @@ -99,6 +100,8 @@ test = [ "factory-boy~=3.2.1", "httpx>=0.26.0", "pytest-randomly>=3.15.0", + # For mocking httpx requests and responses + "respx>=0.21.1", # pytest-randomly requires numpy < 2.0.0 "numpy<2.0.0", ] diff --git a/argilla-server/src/argilla_server/alembic/versions/6ed1b8bf8e08_create_webhooks_table.py b/argilla-server/src/argilla_server/alembic/versions/6ed1b8bf8e08_create_webhooks_table.py new file mode 100644 index 0000000000..62629ca376 --- /dev/null +++ b/argilla-server/src/argilla_server/alembic/versions/6ed1b8bf8e08_create_webhooks_table.py @@ -0,0 +1,50 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""create webhooks table + +Revision ID: 6ed1b8bf8e08 +Revises: 660d6c6b3360 +Create Date: 2024-09-02 11:41:57.561655 + +""" + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "6ed1b8bf8e08" +down_revision = "660d6c6b3360" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.create_table( + "webhooks", + sa.Column("id", sa.Uuid(), nullable=False), + sa.Column("url", sa.Text(), nullable=False), + sa.Column("secret", sa.Text(), nullable=False), + sa.Column("events", sa.JSON(), nullable=False), + sa.Column("enabled", sa.Boolean(), nullable=False, server_default=sa.text("true")), + sa.Column("description", sa.Text(), nullable=True), + sa.Column("inserted_at", sa.DateTime(), nullable=False), + sa.Column("updated_at", sa.DateTime(), nullable=False), + sa.PrimaryKeyConstraint("id"), + ) + + +def downgrade() -> None: + op.drop_table("webhooks") diff --git a/argilla-server/src/argilla_server/api/handlers/v1/datasets/records_bulk.py b/argilla-server/src/argilla_server/api/handlers/v1/datasets/records_bulk.py index 4244c3a735..46818826e1 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/datasets/records_bulk.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/datasets/records_bulk.py @@ -26,7 +26,6 @@ from argilla_server.models import Dataset, User from argilla_server.search_engine import SearchEngine, get_search_engine from argilla_server.security import auth -from argilla_server.telemetry import TelemetryClient, get_telemetry_client router = APIRouter() @@ -43,7 +42,6 @@ async def create_dataset_records_bulk( db: AsyncSession = Depends(get_async_db), search_engine: SearchEngine = Depends(get_search_engine), current_user: User = Security(auth.get_current_user), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), ): dataset = await Dataset.get_or_raise( db, @@ -58,9 +56,7 @@ async def create_dataset_records_bulk( await authorize(current_user, DatasetPolicy.create_records(dataset)) - records_bulk = await CreateRecordsBulk(db, search_engine).create_records_bulk(dataset, records_bulk_create) - - return records_bulk + return await CreateRecordsBulk(db, search_engine).create_records_bulk(dataset, records_bulk_create) @router.put("/datasets/{dataset_id}/records/bulk", response_model=RecordsBulk) @@ -71,7 +67,6 @@ async def upsert_dataset_records_bulk( db: AsyncSession = Depends(get_async_db), search_engine: SearchEngine = Depends(get_search_engine), current_user: User = Security(auth.get_current_user), - telemetry_client: TelemetryClient = Depends(get_telemetry_client), ): dataset = await Dataset.get_or_raise( db, @@ -86,9 +81,4 @@ async def upsert_dataset_records_bulk( await authorize(current_user, DatasetPolicy.upsert_records(dataset)) - records_bulk = await UpsertRecordsBulk(db, search_engine).upsert_records_bulk(dataset, records_bulk_upsert) - - updated = len(records_bulk.updated_item_ids) - created = len(records_bulk.items) - updated - - return records_bulk + return await UpsertRecordsBulk(db, search_engine).upsert_records_bulk(dataset, records_bulk_upsert) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/webhooks.py b/argilla-server/src/argilla_server/api/handlers/v1/webhooks.py new file mode 100644 index 0000000000..54513dbc04 --- /dev/null +++ b/argilla-server/src/argilla_server/api/handlers/v1/webhooks.py @@ -0,0 +1,99 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from uuid import UUID +from sqlalchemy.ext.asyncio import AsyncSession +from fastapi import APIRouter, Depends, Security, status + +from argilla_server.database import get_async_db +from argilla_server.api.policies.v1 import WebhookPolicy, authorize +from argilla_server.webhooks.v1.ping import notify_ping_event +from argilla_server.security import auth +from argilla_server.models import User +from argilla_server.api.schemas.v1.webhooks import ( + WebhookUpdate as WebhookUpdateSchema, + WebhookCreate as WebhookCreateSchema, + Webhooks as WebhooksSchema, + Webhook as WebhookSchema, +) +from argilla_server.contexts import webhooks +from argilla_server.models import Webhook + +router = APIRouter(tags=["webhooks"]) + + +@router.get("/webhooks", response_model=WebhooksSchema) +async def list_webhooks( + *, + db: AsyncSession = Depends(get_async_db), + current_user: User = Security(auth.get_current_user), +): + await authorize(current_user, WebhookPolicy.list) + + return WebhooksSchema(items=await webhooks.list_webhooks(db)) + + +@router.post("/webhooks", status_code=status.HTTP_201_CREATED, response_model=WebhookSchema) +async def create_webhook( + *, + db: AsyncSession = Depends(get_async_db), + current_user: User = Security(auth.get_current_user), + webhook_create: WebhookCreateSchema, +): + await authorize(current_user, WebhookPolicy.create) + + return await webhooks.create_webhook(db, webhook_create.dict()) + + +@router.patch("/webhooks/{webhook_id}", response_model=WebhookSchema) +async def update_webhook( + *, + db: AsyncSession = Depends(get_async_db), + current_user: User = Security(auth.get_current_user), + webhook_id: UUID, + webhook_update: WebhookUpdateSchema, +): + webhook = await Webhook.get_or_raise(db, webhook_id) + + await authorize(current_user, WebhookPolicy.update) + + return await webhooks.update_webhook(db, webhook, webhook_update.dict(exclude_unset=True)) + + +@router.delete("/webhooks/{webhook_id}", response_model=WebhookSchema) +async def delete_webhook( + *, + db: AsyncSession = Depends(get_async_db), + current_user: User = Security(auth.get_current_user), + webhook_id: UUID, +): + webhook = await Webhook.get_or_raise(db, webhook_id) + + await authorize(current_user, WebhookPolicy.delete) + + return await webhooks.delete_webhook(db, webhook) + + +@router.post("/webhooks/{webhook_id}/ping", status_code=status.HTTP_204_NO_CONTENT) +async def ping_webhook( + *, + db: AsyncSession = Depends(get_async_db), + current_user: User = Security(auth.get_current_user), + webhook_id: UUID, +): + webhook = await Webhook.get_or_raise(db, webhook_id) + + await authorize(current_user, WebhookPolicy.ping) + + notify_ping_event(webhook) diff --git a/argilla-server/src/argilla_server/api/policies/v1/__init__.py b/argilla-server/src/argilla_server/api/policies/v1/__init__.py index dccbad5b3d..8fb19696ba 100644 --- a/argilla-server/src/argilla_server/api/policies/v1/__init__.py +++ b/argilla-server/src/argilla_server/api/policies/v1/__init__.py @@ -24,6 +24,7 @@ from argilla_server.api.policies.v1.vector_settings_policy import VectorSettingsPolicy from argilla_server.api.policies.v1.workspace_policy import WorkspacePolicy from argilla_server.api.policies.v1.workspace_user_policy import WorkspaceUserPolicy +from argilla_server.api.policies.v1.webhook_policy import WebhookPolicy from argilla_server.api.policies.v1.job_policy import JobPolicy __all__ = [ @@ -38,6 +39,7 @@ "VectorSettingsPolicy", "WorkspacePolicy", "WorkspaceUserPolicy", + "WebhookPolicy", "JobPolicy", "authorize", "is_authorized", diff --git a/argilla-server/src/argilla_server/api/policies/v1/webhook_policy.py b/argilla-server/src/argilla_server/api/policies/v1/webhook_policy.py new file mode 100644 index 0000000000..ed7587a3e3 --- /dev/null +++ b/argilla-server/src/argilla_server/api/policies/v1/webhook_policy.py @@ -0,0 +1,38 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from argilla_server.api.policies.v1.commons import PolicyAction +from argilla_server.models import User + + +class WebhookPolicy: + @classmethod + async def list(cls, actor: User) -> bool: + return actor.is_owner + + @classmethod + async def create(cls, actor: User) -> bool: + return actor.is_owner + + @classmethod + async def update(cls, actor: User) -> bool: + return actor.is_owner + + @classmethod + async def delete(cls, actor: User) -> bool: + return actor.is_owner + + @classmethod + async def ping(cls, actor: User) -> bool: + return actor.is_owner diff --git a/argilla-server/src/argilla_server/api/routes.py b/argilla-server/src/argilla_server/api/routes.py index 8cd7244777..069b412d13 100644 --- a/argilla-server/src/argilla_server/api/routes.py +++ b/argilla-server/src/argilla_server/api/routes.py @@ -62,6 +62,7 @@ from argilla_server.api.handlers.v1 import ( workspaces as workspaces_v1, ) +from argilla_server.api.handlers.v1 import webhooks as webhooks_v1 from argilla_server.api.handlers.v1 import jobs as jobs_v1 from argilla_server.errors.base_errors import __ALL__ from argilla_server.errors.error_handler import APIErrorHandler @@ -93,6 +94,7 @@ def create_api_v1(): users_v1.router, vectors_settings_v1.router, workspaces_v1.router, + webhooks_v1.router, jobs_v1.router, oauth2_v1.router, settings_v1.router, diff --git a/argilla-server/src/argilla_server/api/schemas/v1/webhooks.py b/argilla-server/src/argilla_server/api/schemas/v1/webhooks.py new file mode 100644 index 0000000000..a093b7f26a --- /dev/null +++ b/argilla-server/src/argilla_server/api/schemas/v1/webhooks.py @@ -0,0 +1,70 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from datetime import datetime +from typing import List, Optional +from uuid import UUID + +from argilla_server.webhooks.v1.enums import WebhookEvent +from argilla_server.api.schemas.v1.commons import UpdateSchema +from argilla_server.pydantic_v1 import BaseModel, Field, HttpUrl + +WEBHOOK_EVENTS_MIN_ITEMS = 1 +WEBHOOK_DESCRIPTION_MIN_LENGTH = 1 +WEBHOOK_DESCRIPTION_MAX_LENGTH = 1000 + + +class Webhook(BaseModel): + id: UUID + url: str + secret: str + events: List[WebhookEvent] + enabled: bool + description: Optional[str] + inserted_at: datetime + updated_at: datetime + + class Config: + orm_mode = True + + +class Webhooks(BaseModel): + items: List[Webhook] + + +class WebhookCreate(BaseModel): + url: HttpUrl + events: List[WebhookEvent] = Field( + min_items=WEBHOOK_EVENTS_MIN_ITEMS, + unique_items=True, + ) + description: Optional[str] = Field( + min_length=WEBHOOK_DESCRIPTION_MIN_LENGTH, + max_length=WEBHOOK_DESCRIPTION_MAX_LENGTH, + ) + + +class WebhookUpdate(UpdateSchema): + url: Optional[HttpUrl] + events: Optional[List[WebhookEvent]] = Field( + min_items=WEBHOOK_EVENTS_MIN_ITEMS, + unique_items=True, + ) + enabled: Optional[bool] + description: Optional[str] = Field( + min_length=WEBHOOK_DESCRIPTION_MIN_LENGTH, + max_length=WEBHOOK_DESCRIPTION_MAX_LENGTH, + ) + + __non_nullable_fields__ = {"url", "events", "enabled"} diff --git a/argilla-server/src/argilla_server/bulk/records_bulk.py b/argilla-server/src/argilla_server/bulk/records_bulk.py index f0069bc528..328f7c53c7 100644 --- a/argilla-server/src/argilla_server/bulk/records_bulk.py +++ b/argilla-server/src/argilla_server/bulk/records_bulk.py @@ -30,6 +30,8 @@ ) from argilla_server.api.schemas.v1.responses import UserResponseCreate from argilla_server.api.schemas.v1.suggestions import SuggestionCreate +from argilla_server.webhooks.v1.enums import RecordEvent +from argilla_server.webhooks.v1.records import notify_record_event as notify_record_event_v1 from argilla_server.contexts import distribution from argilla_server.contexts.records import ( fetch_records_by_external_ids_as_dict, @@ -69,6 +71,9 @@ async def create_records_bulk(self, dataset: Dataset, bulk_create: RecordsBulkCr await _preload_records_relationships_before_index(self._db, records) await self._search_engine.index_records(dataset, records) + for record in records: + await notify_record_event_v1(self._db, RecordEvent.created, record) + return RecordsBulk(items=records) async def _upsert_records_relationships(self, records: List[Record], records_create: List[RecordCreate]) -> None: @@ -186,6 +191,8 @@ async def upsert_records_bulk( await _preload_records_relationships_before_index(self._db, records) await self._search_engine.index_records(dataset, records) + await self._notify_upsert_record_events(records) + return RecordsBulkWithUpdateInfo( items=records, updated_item_ids=[record.id for record in found_records.values()], @@ -205,6 +212,13 @@ async def _fetch_existing_dataset_records( return {**records_by_external_id, **records_by_id} + async def _notify_upsert_record_events(self, records: List[Record]) -> None: + for record in records: + if record.inserted_at == record.updated_at: + await notify_record_event_v1(self._db, RecordEvent.created, record) + else: + await notify_record_event_v1(self._db, RecordEvent.updated, record) + async def _preload_records_relationships_before_index(db: "AsyncSession", records: Sequence[Record]) -> None: await db.execute( diff --git a/argilla-server/src/argilla_server/cli/worker.py b/argilla-server/src/argilla_server/cli/worker.py index 710f35422a..cce9330ed9 100644 --- a/argilla-server/src/argilla_server/cli/worker.py +++ b/argilla-server/src/argilla_server/cli/worker.py @@ -16,13 +16,13 @@ from typing import List -from argilla_server.jobs.queues import DEFAULT_QUEUE +from argilla_server.jobs.queues import DEFAULT_QUEUE, HIGH_QUEUE DEFAULT_NUM_WORKERS = 2 def worker( - queues: List[str] = typer.Option([DEFAULT_QUEUE.name], help="Name of queues to listen"), + queues: List[str] = typer.Option([DEFAULT_QUEUE.name, HIGH_QUEUE.name], help="Name of queues to listen"), num_workers: int = typer.Option(DEFAULT_NUM_WORKERS, help="Number of workers to start"), ) -> None: from rq.worker_pool import WorkerPool diff --git a/argilla-server/src/argilla_server/contexts/datasets.py b/argilla-server/src/argilla_server/contexts/datasets.py index b31aafe3b4..af59d0736a 100644 --- a/argilla-server/src/argilla_server/contexts/datasets.py +++ b/argilla-server/src/argilla_server/contexts/datasets.py @@ -58,6 +58,19 @@ VectorSettingsCreate, ) from argilla_server.api.schemas.v1.vectors import Vector as VectorSchema +from argilla_server.webhooks.v1.enums import DatasetEvent, ResponseEvent, RecordEvent +from argilla_server.webhooks.v1.records import ( + build_record_event as build_record_event_v1, + notify_record_event as notify_record_event_v1, +) +from argilla_server.webhooks.v1.responses import ( + build_response_event as build_response_event_v1, + notify_response_event as notify_response_event_v1, +) +from argilla_server.webhooks.v1.datasets import ( + build_dataset_event as build_dataset_event_v1, + notify_dataset_event as notify_dataset_event_v1, +) from argilla_server.contexts import accounts, distribution from argilla_server.database import get_async_db from argilla_server.enums import DatasetStatus, UserRole @@ -150,7 +163,11 @@ async def create_dataset(db: AsyncSession, dataset_attrs: dict) -> Dataset: await DatasetCreateValidator.validate(db, dataset) - return await dataset.save(db) + await dataset.save(db) + + await notify_dataset_event_v1(db, DatasetEvent.created, dataset) + + return dataset def _allowed_roles_for_metadata_property_create(metadata_property_create: MetadataPropertyCreate) -> List[UserRole]: @@ -163,10 +180,12 @@ def _allowed_roles_for_metadata_property_create(metadata_property_create: Metada async def publish_dataset(db: AsyncSession, search_engine: SearchEngine, dataset: Dataset) -> Dataset: await DatasetPublishValidator.validate(db, dataset) - dataset = await dataset.update(db, status=DatasetStatus.ready, autocommit=True) + dataset = await dataset.update(db, status=DatasetStatus.ready) await search_engine.create_index(dataset) + await notify_dataset_event_v1(db, DatasetEvent.published, dataset) + return dataset @@ -177,13 +196,18 @@ async def update_dataset(db: AsyncSession, dataset: Dataset, dataset_attrs: dict dataset_jobs.update_dataset_records_status_job.delay(dataset.id) + await notify_dataset_event_v1(db, DatasetEvent.updated, dataset) + return dataset async def delete_dataset(db: AsyncSession, search_engine: SearchEngine, dataset: Dataset) -> Dataset: - dataset = await dataset.delete(db, autocommit=True) + deleted_dataset_event_v1 = await build_dataset_event_v1(db, DatasetEvent.deleted, dataset) + + dataset = await dataset.delete(db) await search_engine.delete_index(dataset) + await deleted_dataset_event_v1.notify(db) return dataset @@ -245,7 +269,6 @@ async def create_metadata_property( settings=metadata_property_create.settings.dict(), allowed_roles=_allowed_roles_for_metadata_property_create(metadata_property_create), dataset_id=dataset.id, - autocommit=True, ) if dataset.is_ready: @@ -302,7 +325,6 @@ async def create_vector_settings( title=vector_settings_create.title, dimensions=vector_settings_create.dimensions, dataset_id=dataset.id, - autocommit=True, ) if dataset.is_ready: @@ -729,14 +751,23 @@ async def preload_records_relationships_before_validate(db: AsyncSession, record async def delete_records( db: AsyncSession, search_engine: "SearchEngine", dataset: Dataset, records_ids: List[UUID] ) -> None: - records = await Record.delete_many( - db=db, - conditions=[Record.id.in_(records_ids), Record.dataset_id == dataset.id], - autocommit=True, - ) + params = [Record.id.in_(records_ids), Record.dataset_id == dataset.id] + + records = (await db.execute(select(Record).filter(*params).order_by(Record.inserted_at.asc()))).scalars().all() + + deleted_record_events_v1 = [] + for record in records: + deleted_record_events_v1.append( + await build_record_event_v1(db, RecordEvent.deleted, record), + ) + + records = await Record.delete_many(db, conditions=params) await search_engine.delete_records(dataset=dataset, records=records) + for deleted_record_event_v1 in deleted_record_events_v1: + await deleted_record_event_v1.notify(db) + async def update_record( db: AsyncSession, search_engine: "SearchEngine", record: Record, record_update: "RecordUpdate" @@ -765,13 +796,18 @@ async def update_record( await _preload_record_relationships_before_index(db, record) await search_engine.index_records(record.dataset, [record]) + await notify_record_event_v1(db, RecordEvent.updated, record) + return record async def delete_record(db: AsyncSession, search_engine: "SearchEngine", record: Record) -> Record: - record = await record.delete(db=db, autocommit=True) + deleted_record_event_v1 = await build_record_event_v1(db, RecordEvent.deleted, record) + + record = await record.delete(db) await search_engine.delete_records(dataset=record.dataset, records=[record]) + await deleted_record_event_v1.notify(db) return record @@ -794,6 +830,7 @@ async def create_response( user_id=user.id, autocommit=False, ) + await _touch_dataset_last_activity_at(db, record.dataset) await db.commit() @@ -803,6 +840,8 @@ async def create_response( await _load_users_from_responses([response]) await search_engine.update_record_response(response) + await notify_response_event_v1(db, ResponseEvent.created, response) + return response @@ -827,6 +866,8 @@ async def update_response( await _load_users_from_responses(response) await search_engine.update_record_response(response) + await notify_response_event_v1(db, ResponseEvent.updated, response) + return response @@ -855,10 +896,17 @@ async def upsert_response( await _load_users_from_responses(response) await search_engine.update_record_response(response) + if response.inserted_at == response.updated_at: + await notify_response_event_v1(db, ResponseEvent.created, response) + else: + await notify_response_event_v1(db, ResponseEvent.updated, response) + return response async def delete_response(db: AsyncSession, search_engine: SearchEngine, response: Response) -> Response: + deleted_response_event_v1 = await build_response_event_v1(db, ResponseEvent.deleted, response) + response = await response.delete(db, autocommit=False) await _touch_dataset_last_activity_at(db, response.record.dataset) @@ -869,6 +917,8 @@ async def delete_response(db: AsyncSession, search_engine: SearchEngine, respons await _load_users_from_responses(response) await search_engine.delete_record_response(response) + await deleted_response_event_v1.notify(db) + return response @@ -912,7 +962,6 @@ async def upsert_suggestion( db, schema=SuggestionCreateWithRecordId(record_id=record.id, **suggestion_create.dict()), constraints=[Suggestion.record_id, Suggestion.question_id], - autocommit=True, ) await _preload_suggestion_relationships_before_index(db, suggestion) @@ -929,7 +978,6 @@ async def delete_suggestions( await Suggestion.delete_many( db=db, conditions=[Suggestion.id.in_(suggestions_ids), Suggestion.record_id == record.id], - autocommit=True, ) for suggestion in suggestions: @@ -952,7 +1000,7 @@ async def list_suggestions_by_id_and_record_id( async def delete_suggestion(db: AsyncSession, search_engine: SearchEngine, suggestion: Suggestion) -> Suggestion: - suggestion = await suggestion.delete(db, autocommit=True) + suggestion = await suggestion.delete(db) await search_engine.delete_record_suggestion(suggestion) diff --git a/argilla-server/src/argilla_server/contexts/distribution.py b/argilla-server/src/argilla_server/contexts/distribution.py index 6849e85e5e..21298d0d59 100644 --- a/argilla-server/src/argilla_server/contexts/distribution.py +++ b/argilla-server/src/argilla_server/contexts/distribution.py @@ -22,6 +22,8 @@ from sqlalchemy.orm import selectinload from sqlalchemy.ext.asyncio import AsyncSession +from argilla_server.webhooks.v1.enums import RecordEvent +from argilla_server.webhooks.v1.records import notify_record_event as notify_record_event_v1 from argilla_server.enums import DatasetDistributionStrategy, RecordStatus from argilla_server.models import Record from argilla_server.search_engine.base import SearchEngine @@ -58,6 +60,11 @@ async def update_record_status(search_engine: SearchEngine, record_id: UUID) -> await search_engine.partial_record_update(record, status=record.status) + await notify_record_event_v1(db, RecordEvent.updated, record) + + if record.is_completed(): + await notify_record_event_v1(db, RecordEvent.completed, record) + return record diff --git a/argilla-server/src/argilla_server/contexts/webhooks.py b/argilla-server/src/argilla_server/contexts/webhooks.py new file mode 100644 index 0000000000..08b29e3109 --- /dev/null +++ b/argilla-server/src/argilla_server/contexts/webhooks.py @@ -0,0 +1,49 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Sequence + +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from argilla_server.models import Webhook +from argilla_server.validators.webhooks import WebhookCreateValidator + + +async def list_webhooks(db: AsyncSession) -> Sequence[Webhook]: + result = await db.execute(select(Webhook).order_by(Webhook.inserted_at.asc())) + + return result.scalars().all() + + +async def list_enabled_webhooks(db: AsyncSession) -> Sequence[Webhook]: + result = await db.execute(select(Webhook).where(Webhook.enabled == True).order_by(Webhook.inserted_at.asc())) + + return result.scalars().all() + + +async def create_webhook(db: AsyncSession, webhook_attrs: dict) -> Webhook: + webhook = Webhook(**webhook_attrs) + + await WebhookCreateValidator.validate(db, webhook) + + return await webhook.save(db) + + +async def update_webhook(db: AsyncSession, webhook: Webhook, webhook_attrs: dict) -> Webhook: + return await webhook.update(db, **webhook_attrs) + + +async def delete_webhook(db: AsyncSession, webhook: Webhook) -> Webhook: + return await webhook.delete(db) diff --git a/argilla-server/src/argilla_server/database.py b/argilla-server/src/argilla_server/database.py index 6441cb3593..a2dceb166e 100644 --- a/argilla-server/src/argilla_server/database.py +++ b/argilla-server/src/argilla_server/database.py @@ -38,6 +38,7 @@ "1.28": "ca7293c38970", "2.0": "237f7c674d74", "2.4": "660d6c6b3360", + "2.5": "6ed1b8bf8e08", } ) diff --git a/argilla-server/src/argilla_server/enums.py b/argilla-server/src/argilla_server/enums.py index 7d88323695..03acc8b1b1 100644 --- a/argilla-server/src/argilla_server/enums.py +++ b/argilla-server/src/argilla_server/enums.py @@ -16,13 +16,7 @@ try: from enum import StrEnum except ImportError: - from enum import Enum - - class StrEnum(str, Enum): - """Custom StrEnum class for Python <3.11 compatibility.""" - - def __str__(self): - return str(self.value) + from argilla_server.utils.str_enum import StrEnum class FieldType(StrEnum): diff --git a/argilla-server/src/argilla_server/jobs/dataset_jobs.py b/argilla-server/src/argilla_server/jobs/dataset_jobs.py index 7edb0f131d..a34c92e8ae 100644 --- a/argilla-server/src/argilla_server/jobs/dataset_jobs.py +++ b/argilla-server/src/argilla_server/jobs/dataset_jobs.py @@ -21,12 +21,11 @@ from argilla_server.models import Record, Response from argilla_server.database import AsyncSessionLocal -from argilla_server.jobs.queues import DEFAULT_QUEUE +from argilla_server.jobs.queues import DEFAULT_QUEUE, JOB_TIMEOUT_DISABLED from argilla_server.search_engine.base import SearchEngine from argilla_server.settings import settings from argilla_server.contexts import distribution -JOB_TIMEOUT_DISABLED = -1 JOB_RECORDS_YIELD_PER = 100 diff --git a/argilla-server/src/argilla_server/jobs/hub_jobs.py b/argilla-server/src/argilla_server/jobs/hub_jobs.py index 0315435b24..3c3611cfdd 100644 --- a/argilla-server/src/argilla_server/jobs/hub_jobs.py +++ b/argilla-server/src/argilla_server/jobs/hub_jobs.py @@ -24,15 +24,11 @@ from argilla_server.database import AsyncSessionLocal from argilla_server.search_engine.base import SearchEngine from argilla_server.api.schemas.v1.datasets import HubDatasetMapping -from argilla_server.jobs.queues import DEFAULT_QUEUE - -# TODO: Move this to be defined on jobs queues as a shared constant -JOB_TIMEOUT_DISABLED = -1 +from argilla_server.jobs.queues import DEFAULT_QUEUE, JOB_TIMEOUT_DISABLED HUB_DATASET_TAKE_ROWS = 10_000 -# TODO: Once we merge webhooks we should change the queue to use a different one (default queue is deleted there) @job(DEFAULT_QUEUE, timeout=JOB_TIMEOUT_DISABLED, retry=Retry(max=3)) async def import_dataset_from_hub_job(name: str, subset: str, split: str, dataset_id: UUID, mapping: dict) -> None: async with AsyncSessionLocal() as db: diff --git a/argilla-server/src/argilla_server/jobs/queues.py b/argilla-server/src/argilla_server/jobs/queues.py index 0f17a63bd6..8733f7f902 100644 --- a/argilla-server/src/argilla_server/jobs/queues.py +++ b/argilla-server/src/argilla_server/jobs/queues.py @@ -18,7 +18,9 @@ from argilla_server.settings import settings - REDIS_CONNECTION = redis.from_url(settings.redis_url) DEFAULT_QUEUE = Queue("default", connection=REDIS_CONNECTION) +HIGH_QUEUE = Queue("high", connection=REDIS_CONNECTION) + +JOB_TIMEOUT_DISABLED = -1 diff --git a/argilla-server/src/argilla_server/jobs/webhook_jobs.py b/argilla-server/src/argilla_server/jobs/webhook_jobs.py new file mode 100644 index 0000000000..6bc24a417d --- /dev/null +++ b/argilla-server/src/argilla_server/jobs/webhook_jobs.py @@ -0,0 +1,55 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import httpx + +from typing import List + +from uuid import UUID +from datetime import datetime + +from rq.job import Retry, Job +from rq.decorators import job +from sqlalchemy.ext.asyncio import AsyncSession +from fastapi.encoders import jsonable_encoder + +from argilla_server.webhooks.v1.commons import notify_event +from argilla_server.database import AsyncSessionLocal +from argilla_server.jobs.queues import HIGH_QUEUE +from argilla_server.contexts import webhooks +from argilla_server.models import Webhook + + +async def enqueue_notify_events(db: AsyncSession, event: str, timestamp: datetime, data: dict) -> List[Job]: + enabled_webhooks = await webhooks.list_enabled_webhooks(db) + if len(enabled_webhooks) == 0: + return [] + + enqueued_jobs = [] + jsonable_data = jsonable_encoder(data) + for enabled_webhook in enabled_webhooks: + if event in enabled_webhook.events: + enqueue_job = notify_event_job.delay(enabled_webhook.id, event, timestamp, jsonable_data) + enqueued_jobs.append(enqueue_job) + + return enqueued_jobs + + +@job(HIGH_QUEUE, retry=Retry(max=3, interval=[10, 60, 180])) +async def notify_event_job(webhook_id: UUID, event: str, timestamp: datetime, data: dict) -> None: + async with AsyncSessionLocal() as db: + webhook = await Webhook.get_or_raise(db, webhook_id) + + response = notify_event(webhook, event, timestamp, data) + response.raise_for_status() diff --git a/argilla-server/src/argilla_server/models/database.py b/argilla-server/src/argilla_server/models/database.py index dda4936e0d..5b298ae1e6 100644 --- a/argilla-server/src/argilla_server/models/database.py +++ b/argilla-server/src/argilla_server/models/database.py @@ -13,6 +13,7 @@ # limitations under the License. import secrets +import base64 from datetime import datetime from typing import Any, List, Optional, Union from uuid import UUID @@ -62,9 +63,11 @@ "MetadataProperty", "Vector", "VectorSettings", + "Webhook", ] _USER_API_KEY_BYTES_LENGTH = 80 +_WEBHOOK_SECRET_BYTES_LENGTH = 64 class Field(DatabaseModel): @@ -248,6 +251,9 @@ class Record(DatabaseModel): __table_args__ = (UniqueConstraint("external_id", "dataset_id", name="record_external_id_dataset_id_uq"),) + def is_completed(self) -> bool: + return self.status == RecordStatus.completed + def vector_value_by_vector_settings(self, vector_settings: "VectorSettings") -> Union[List[float], None]: for vector in self.vectors: if vector.vector_settings_id == vector_settings.id: @@ -535,3 +541,25 @@ def __repr__(self): f"username={self.username!r}, role={self.role.value!r}, " f"inserted_at={str(self.inserted_at)!r}, updated_at={str(self.updated_at)!r})" ) + + +def generate_webhook_secret() -> str: + # NOTE: https://www.standardwebhooks.com implementation requires a base64 encoded secret + return base64.b64encode(secrets.token_bytes(_WEBHOOK_SECRET_BYTES_LENGTH)).decode("utf-8") + + +class Webhook(DatabaseModel): + __tablename__ = "webhooks" + + url: Mapped[str] = mapped_column(Text) + secret: Mapped[str] = mapped_column(Text, default=generate_webhook_secret) + events: Mapped[List[str]] = mapped_column(JSON) + enabled: Mapped[bool] = mapped_column(default=True, server_default=sql.true()) + description: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + + def __repr__(self): + return ( + f"Webhook(id={str(self.id)!r}, url={self.url!r}, events={self.events!r}, " + f"enabled={self.enabled!r}, description={self.description!r}, " + f"inserted_at={str(self.inserted_at)!r}, updated_at={str(self.updated_at)!r})" + ) diff --git a/argilla-server/src/argilla_server/utils/str_enum.py b/argilla-server/src/argilla_server/utils/str_enum.py new file mode 100644 index 0000000000..29108d4f4c --- /dev/null +++ b/argilla-server/src/argilla_server/utils/str_enum.py @@ -0,0 +1,22 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from enum import Enum + + +class StrEnum(str, Enum): + """Custom StrEnum class for Python <3.11 compatibility.""" + + def __str__(self): + return str(self.value) diff --git a/argilla-server/src/argilla_server/validators/webhooks.py b/argilla-server/src/argilla_server/validators/webhooks.py new file mode 100644 index 0000000000..064427fbb4 --- /dev/null +++ b/argilla-server/src/argilla_server/validators/webhooks.py @@ -0,0 +1,38 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from sqlalchemy import select, func +from sqlalchemy.ext.asyncio import AsyncSession + +from argilla_server.models import Webhook +from argilla_server.errors.future import UnprocessableEntityError + +MAXIMUM_NUMBER_OF_WEBHOOKS = 10 + + +class WebhookCreateValidator: + @classmethod + async def validate(cls, db: AsyncSession, webhook: Webhook) -> None: + await cls._validate_maximum_number_of_webhooks(db) + + @classmethod + async def _validate_maximum_number_of_webhooks(cls, db: AsyncSession) -> None: + if await cls._count_webhooks(db) >= MAXIMUM_NUMBER_OF_WEBHOOKS: + raise UnprocessableEntityError( + f"You can't create more than {MAXIMUM_NUMBER_OF_WEBHOOKS} webhooks. Please delete some of them first" + ) + + @classmethod + async def _count_webhooks(cls, db: AsyncSession) -> int: + return (await db.execute(select(func.count(Webhook.id)))).scalar_one() diff --git a/argilla-server/src/argilla_server/webhooks/__init__.py b/argilla-server/src/argilla_server/webhooks/__init__.py new file mode 100644 index 0000000000..4b6cecae7f --- /dev/null +++ b/argilla-server/src/argilla_server/webhooks/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/argilla-server/src/argilla_server/webhooks/v1/__init__.py b/argilla-server/src/argilla_server/webhooks/v1/__init__.py new file mode 100644 index 0000000000..4b6cecae7f --- /dev/null +++ b/argilla-server/src/argilla_server/webhooks/v1/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/argilla-server/src/argilla_server/webhooks/v1/commons.py b/argilla-server/src/argilla_server/webhooks/v1/commons.py new file mode 100644 index 0000000000..bf1a94af7c --- /dev/null +++ b/argilla-server/src/argilla_server/webhooks/v1/commons.py @@ -0,0 +1,67 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import secrets +import httpx + +from math import floor +from typing_extensions import Dict +from datetime import datetime, timezone +from standardwebhooks.webhooks import Webhook + +from argilla_server.models import Webhook as WebhookModel + +MSG_ID_BYTES_LENGTH = 16 + +NOTIFY_EVENT_DEFAULT_TIMEOUT = httpx.Timeout(timeout=20.0) + + +# NOTE: We are using standard webhooks implementation. +# For more information take a look to https://www.standardwebhooks.com +def notify_event(webhook: WebhookModel, event: str, timestamp: datetime, data: Dict) -> httpx.Response: + timestamp_attempt = datetime.utcnow() + + msg_id = _generate_msg_id() + payload = json.dumps(_build_payload(event, timestamp, data)) + signature = Webhook(webhook.secret).sign(msg_id, timestamp_attempt, payload) + + return httpx.post( + webhook.url, + headers=_build_headers(msg_id, timestamp_attempt, signature), + content=payload, + timeout=NOTIFY_EVENT_DEFAULT_TIMEOUT, + ) + + +def _generate_msg_id() -> str: + return f"msg_{secrets.token_urlsafe(MSG_ID_BYTES_LENGTH)}" + + +def _build_headers(msg_id: str, timestamp: datetime, signature: str) -> Dict: + return { + "webhook-id": msg_id, + "webhook-timestamp": str(floor(timestamp.replace(tzinfo=timezone.utc).timestamp())), + "webhook-signature": signature, + "content-type": "application/json", + } + + +def _build_payload(type: str, timestamp: datetime, data: Dict) -> Dict: + return { + "type": type, + "version": 1, + "timestamp": timestamp.strftime("%Y-%m-%dT%H:%M:%S.%fZ"), + "data": data, + } diff --git a/argilla-server/src/argilla_server/webhooks/v1/datasets.py b/argilla-server/src/argilla_server/webhooks/v1/datasets.py new file mode 100644 index 0000000000..079d352bb1 --- /dev/null +++ b/argilla-server/src/argilla_server/webhooks/v1/datasets.py @@ -0,0 +1,55 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List +from datetime import datetime + +from rq.job import Job +from sqlalchemy import select +from sqlalchemy.orm import selectinload +from sqlalchemy.ext.asyncio import AsyncSession + +from argilla_server.models import Dataset +from argilla_server.webhooks.v1.event import Event +from argilla_server.webhooks.v1.schemas import DatasetEventSchema +from argilla_server.webhooks.v1.enums import DatasetEvent + + +async def notify_dataset_event(db: AsyncSession, dataset_event: DatasetEvent, dataset: Dataset) -> List[Job]: + event = await build_dataset_event(db, dataset_event, dataset) + + return await event.notify(db) + + +async def build_dataset_event(db: AsyncSession, dataset_event: DatasetEvent, dataset: Dataset) -> Event: + # NOTE: Force loading required association resources required by the event schema + ( + await db.execute( + select(Dataset) + .where(Dataset.id == dataset.id) + .options( + selectinload(Dataset.workspace), + selectinload(Dataset.questions), + selectinload(Dataset.fields), + selectinload(Dataset.metadata_properties), + selectinload(Dataset.vectors_settings), + ) + ) + ).scalar_one() + + return Event( + event=dataset_event, + timestamp=datetime.utcnow(), + data=DatasetEventSchema.from_orm(dataset).dict(), + ) diff --git a/argilla-server/src/argilla_server/webhooks/v1/enums.py b/argilla-server/src/argilla_server/webhooks/v1/enums.py new file mode 100644 index 0000000000..25d05688c2 --- /dev/null +++ b/argilla-server/src/argilla_server/webhooks/v1/enums.py @@ -0,0 +1,54 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +try: + from enum import StrEnum +except ImportError: + from argilla_server.utils.str_enum import StrEnum + + +class WebhookEvent(StrEnum): + dataset_created = "dataset.created" + dataset_updated = "dataset.updated" + dataset_deleted = "dataset.deleted" + dataset_published = "dataset.published" + + record_created = "record.created" + record_updated = "record.updated" + record_deleted = "record.deleted" + record_completed = "record.completed" + + response_created = "response.created" + response_updated = "response.updated" + response_deleted = "response.deleted" + + +class DatasetEvent(StrEnum): + created = WebhookEvent.dataset_created.value + updated = WebhookEvent.dataset_updated.value + deleted = WebhookEvent.dataset_deleted.value + published = WebhookEvent.dataset_published.value + + +class RecordEvent(StrEnum): + created = WebhookEvent.record_created.value + updated = WebhookEvent.record_updated.value + deleted = WebhookEvent.record_deleted.value + completed = WebhookEvent.record_completed.value + + +class ResponseEvent(StrEnum): + created = WebhookEvent.response_created.value + updated = WebhookEvent.response_updated.value + deleted = WebhookEvent.response_deleted.value diff --git a/argilla-server/src/argilla_server/webhooks/v1/event.py b/argilla-server/src/argilla_server/webhooks/v1/event.py new file mode 100644 index 0000000000..f5f3d9670b --- /dev/null +++ b/argilla-server/src/argilla_server/webhooks/v1/event.py @@ -0,0 +1,36 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List +from datetime import datetime + +from rq.job import Job +from sqlalchemy.ext.asyncio import AsyncSession + +from argilla_server.jobs.webhook_jobs import enqueue_notify_events + + +class Event: + def __init__(self, event: str, timestamp: datetime, data: dict): + self.event = event + self.timestamp = timestamp + self.data = data + + async def notify(self, db: AsyncSession) -> List[Job]: + return await enqueue_notify_events( + db, + event=self.event, + timestamp=self.timestamp, + data=self.data, + ) diff --git a/argilla-server/src/argilla_server/webhooks/v1/ping.py b/argilla-server/src/argilla_server/webhooks/v1/ping.py new file mode 100644 index 0000000000..fcf592e6bc --- /dev/null +++ b/argilla-server/src/argilla_server/webhooks/v1/ping.py @@ -0,0 +1,34 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import httpx + +from datetime import datetime + +from argilla_server.contexts import info +from argilla_server.models import Webhook +from argilla_server.webhooks.v1.commons import notify_event +from argilla_server.webhooks.v1.enums import WebhookEvent + + +def notify_ping_event(webhook: Webhook) -> httpx.Response: + return notify_event( + webhook=webhook, + event="ping", + timestamp=datetime.utcnow(), + data={ + "agent": "argilla-server", + "version": info.argilla_version(), + }, + ) diff --git a/argilla-server/src/argilla_server/webhooks/v1/records.py b/argilla-server/src/argilla_server/webhooks/v1/records.py new file mode 100644 index 0000000000..f03172d473 --- /dev/null +++ b/argilla-server/src/argilla_server/webhooks/v1/records.py @@ -0,0 +1,55 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from datetime import datetime +from typing import List + +from rq.job import Job +from sqlalchemy import select +from sqlalchemy.orm import selectinload +from sqlalchemy.ext.asyncio import AsyncSession + +from argilla_server.models import Record, Dataset +from argilla_server.webhooks.v1.event import Event +from argilla_server.webhooks.v1.enums import RecordEvent +from argilla_server.webhooks.v1.schemas import RecordEventSchema + + +async def notify_record_event(db: AsyncSession, record_event: RecordEvent, record: Record) -> List[Job]: + event = await build_record_event(db, record_event, record) + + return await event.notify(db) + + +async def build_record_event(db: AsyncSession, record_event: RecordEvent, record: Record) -> Event: + # NOTE: Force loading required association resources required by the event schema + ( + await db.execute( + select(Dataset) + .where(Dataset.id == record.dataset_id) + .options( + selectinload(Dataset.workspace), + selectinload(Dataset.fields), + selectinload(Dataset.questions), + selectinload(Dataset.metadata_properties), + selectinload(Dataset.vectors_settings), + ) + ) + ).scalar_one() + + return Event( + event=record_event, + timestamp=datetime.utcnow(), + data=RecordEventSchema.from_orm(record).dict(), + ) diff --git a/argilla-server/src/argilla_server/webhooks/v1/responses.py b/argilla-server/src/argilla_server/webhooks/v1/responses.py new file mode 100644 index 0000000000..122fae0fd9 --- /dev/null +++ b/argilla-server/src/argilla_server/webhooks/v1/responses.py @@ -0,0 +1,60 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List +from datetime import datetime + +from rq.job import Job +from sqlalchemy import select +from sqlalchemy.orm import selectinload +from sqlalchemy.ext.asyncio import AsyncSession + +from argilla_server.models import Response, Record, Dataset +from argilla_server.webhooks.v1.event import Event +from argilla_server.webhooks.v1.enums import ResponseEvent +from argilla_server.webhooks.v1.schemas import ResponseEventSchema + + +async def notify_response_event(db: AsyncSession, response_event: ResponseEvent, response: Response) -> List[Job]: + event = await build_response_event(db, response_event, response) + + return await event.notify(db) + + +async def build_response_event(db: AsyncSession, response_event: ResponseEvent, response: Response) -> Event: + # NOTE: Force loading required association resources required by the event schema + ( + await db.execute( + select(Response) + .where(Response.id == response.id) + .options( + selectinload(Response.user), + selectinload(Response.record).options( + selectinload(Record.dataset).options( + selectinload(Dataset.workspace), + selectinload(Dataset.questions), + selectinload(Dataset.fields), + selectinload(Dataset.metadata_properties), + selectinload(Dataset.vectors_settings), + ), + ), + ), + ) + ).scalar_one() + + return Event( + event=response_event, + timestamp=datetime.utcnow(), + data=ResponseEventSchema.from_orm(response).dict(), + ) diff --git a/argilla-server/src/argilla_server/webhooks/v1/schemas.py b/argilla-server/src/argilla_server/webhooks/v1/schemas.py new file mode 100644 index 0000000000..9db5aae9b1 --- /dev/null +++ b/argilla-server/src/argilla_server/webhooks/v1/schemas.py @@ -0,0 +1,159 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from uuid import UUID +from typing import Optional, List +from datetime import datetime + +from argilla_server.pydantic_v1 import BaseModel, Field + + +class UserEventSchema(BaseModel): + id: UUID + first_name: str + last_name: Optional[str] + username: str + role: str + inserted_at: datetime + updated_at: datetime + + class Config: + orm_mode = True + + +class WorkspaceEventSchema(BaseModel): + id: UUID + name: str + inserted_at: datetime + updated_at: datetime + + class Config: + orm_mode = True + + +class DatasetQuestionEventSchema(BaseModel): + id: UUID + name: str + title: str + description: Optional[str] + required: bool + settings: dict + inserted_at: datetime + updated_at: datetime + + class Config: + orm_mode = True + + +class DatasetFieldEventSchema(BaseModel): + id: UUID + name: str + title: str + required: bool + settings: dict + inserted_at: datetime + updated_at: datetime + + class Config: + orm_mode = True + + +class DatasetMetadataPropertyEventSchema(BaseModel): + id: UUID + name: str + title: str + settings: dict + visible_for_annotators: bool + inserted_at: datetime + updated_at: datetime + + class Config: + orm_mode = True + + +class DatasetVectorSettingsEventSchema(BaseModel): + id: UUID + name: str + title: str + dimensions: int + inserted_at: datetime + updated_at: datetime + + class Config: + orm_mode = True + + +class DatasetEventSchema(BaseModel): + id: UUID + name: str + guidelines: Optional[str] + allow_extra_metadata: bool + status: str + distribution: dict + workspace: WorkspaceEventSchema + questions: List[DatasetQuestionEventSchema] + fields: List[DatasetFieldEventSchema] + metadata_properties: List[DatasetMetadataPropertyEventSchema] + vectors_settings: List[DatasetVectorSettingsEventSchema] + last_activity_at: datetime + inserted_at: datetime + updated_at: datetime + + class Config: + orm_mode = True + + +class RecordEventSchema(BaseModel): + id: UUID + status: str + # TODO: Truncate fields so we don't respond with big field values. + # Or find another possible solution. + fields: dict + metadata: Optional[dict] = Field(None, alias="metadata_") + external_id: Optional[str] + # TODO: + # responses: + # - Create a new `GET /api/v1/records/{record_id}/responses` endpoint. + # - Or use `/api/v1/records/{record_id}` endpoint. + # - Other possible alternative is to expand the responses here but using + # a RecordResponseEventSchema not including the record inside. + # suggestions: + # - Can use `GET /api/v1/records/{record_id}/suggestions` endpoint. + # - Or use `/api/v1/records/{record_id}` endpoint. + # - Other possible alternative is to expand the suggestions here but using + # a RecordSuggestionEventSchema not including the record inside. + # vectors: + # - Create a new `GET /api/v1/records/{record_id}/vectors` endpoint. + # - Or use `/api/v1/records/{record_id}` endpoint. + # - Other possible alternative is to expand the vectors here but using + # a RecordVectorEventSchema not including the record inside. + dataset: DatasetEventSchema + inserted_at: datetime + updated_at: datetime + + class Config: + orm_mode = True + + +class ResponseEventSchema(BaseModel): + id: UUID + values: Optional[dict] + status: str + record: RecordEventSchema + user: UserEventSchema + inserted_at: datetime + updated_at: datetime + + class Config: + orm_mode = True diff --git a/argilla-server/tests/conftest.py b/argilla-server/tests/conftest.py index 67d704bf2c..55b4a53af5 100644 --- a/argilla-server/tests/conftest.py +++ b/argilla-server/tests/conftest.py @@ -12,17 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -import asyncio -from typing import TYPE_CHECKING, AsyncGenerator, Generator - import httpx +import asyncio import pytest import pytest_asyncio + +from rq import Queue +from typing import TYPE_CHECKING, AsyncGenerator, Generator +from sqlalchemy import NullPool, create_engine +from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine + from argilla_server.cli.database.migrate import migrate_db from argilla_server.database import database_url_sync +from argilla_server.jobs.queues import REDIS_CONNECTION from argilla_server.settings import settings -from sqlalchemy import NullPool, create_engine -from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine from tests.database import SyncTestSession, TestSession, set_task @@ -97,6 +100,16 @@ def sync_db(sync_connection: "Connection") -> Generator["Session", None, None]: sync_connection.rollback() +@pytest.fixture(autouse=True) +def empty_job_queues(): + queues = Queue.all(connection=REDIS_CONNECTION) + + for queue in queues: + queue.empty() + + yield + + @pytest.fixture def async_db_proxy(mocker: "MockerFixture", sync_db: "Session") -> "AsyncSession": """Create a mocked `AsyncSession` that proxies to the sync session. This will allow us to execute the async CLI commands diff --git a/argilla-server/tests/factories.py b/argilla-server/tests/factories.py index 984418a633..9fdea394fb 100644 --- a/argilla-server/tests/factories.py +++ b/argilla-server/tests/factories.py @@ -14,9 +14,14 @@ import inspect import random - import factory + +from factory.alchemy import SESSION_PERSISTENCE_COMMIT, SESSION_PERSISTENCE_FLUSH +from factory.builder import BuildStep, StepBuilder, parse_declarations +from sqlalchemy.ext.asyncio import async_object_session + from argilla_server.enums import DatasetDistributionStrategy, FieldType, MetadataPropertyType, OptionsOrder +from argilla_server.webhooks.v1.enums import WebhookEvent from argilla_server.models import ( Dataset, Field, @@ -32,11 +37,9 @@ VectorSettings, Workspace, WorkspaceUser, + Webhook, ) from argilla_server.models.base import DatabaseModel -from factory.alchemy import SESSION_PERSISTENCE_COMMIT, SESSION_PERSISTENCE_FLUSH -from factory.builder import BuildStep, StepBuilder, parse_declarations -from sqlalchemy.ext.asyncio import async_object_session from tests.database import SyncTestSession, TestSession @@ -416,3 +419,11 @@ class Meta: record = factory.SubFactory(RecordFactory) question = factory.SubFactory(QuestionFactory) value = "negative" + + +class WebhookFactory(BaseFactory): + class Meta: + model = Webhook + + url = factory.Sequence(lambda n: f"https://example-{n}.com") + events = [WebhookEvent.response_created] diff --git a/argilla-server/tests/unit/api/handlers/v1/datasets/records/records_bulk/test_create_dataset_records_bulk.py b/argilla-server/tests/unit/api/handlers/v1/datasets/records/records_bulk/test_create_dataset_records_bulk.py index cd2abcdb2f..decdec847e 100644 --- a/argilla-server/tests/unit/api/handlers/v1/datasets/records/records_bulk/test_create_dataset_records_bulk.py +++ b/argilla-server/tests/unit/api/handlers/v1/datasets/records/records_bulk/test_create_dataset_records_bulk.py @@ -12,12 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from uuid import UUID - import pytest + +from uuid import UUID from httpx import AsyncClient from sqlalchemy import func, select from sqlalchemy.ext.asyncio import AsyncSession +from fastapi.encoders import jsonable_encoder from argilla_server.enums import ( DatasetStatus, @@ -27,7 +28,12 @@ RecordStatus, DatasetDistributionStrategy, ) +from argilla_server.jobs.queues import HIGH_QUEUE +from argilla_server.models.database import Record, Response, Suggestion, User +from argilla_server.webhooks.v1.enums import RecordEvent +from argilla_server.webhooks.v1.records import build_record_event from argilla_server.models.database import Record, Response, Suggestion, User + from tests.factories import ( DatasetFactory, LabelSelectionQuestionFactory, @@ -40,6 +46,7 @@ TextQuestionFactory, ChatFieldFactory, CustomFieldFactory, + WebhookFactory, AnnotatorFactory, ) @@ -789,3 +796,48 @@ async def test_create_dataset_records_bulk_updates_records_status( assert (await Record.get(db, UUID(response_items[1]["id"]))).status == RecordStatus.pending assert (await Record.get(db, UUID(response_items[2]["id"]))).status == RecordStatus.pending assert (await Record.get(db, UUID(response_items[3]["id"]))).status == RecordStatus.pending + + async def test_create_dataset_records_bulk_enqueue_webhook_record_created_events( + self, db: AsyncSession, async_client: AsyncClient, owner_auth_header: dict + ): + dataset = await DatasetFactory.create(status=DatasetStatus.ready) + await TextFieldFactory.create(name="prompt", dataset=dataset) + await TextQuestionFactory.create(name="text-question", dataset=dataset) + + webhook = await WebhookFactory.create(events=[RecordEvent.created]) + + response = await async_client.post( + self.url(dataset.id), + headers=owner_auth_header, + json={ + "items": [ + { + "fields": { + "prompt": "You should exercise more.", + }, + }, + { + "fields": { + "prompt": "Do you like to exercise?", + }, + }, + ], + }, + ) + + assert response.status_code == 201, response.json() + + records = (await db.execute(select(Record).order_by(Record.inserted_at.asc()))).scalars().all() + + event_a = await build_record_event(db, RecordEvent.created, records[0]) + event_b = await build_record_event(db, RecordEvent.created, records[1]) + + assert HIGH_QUEUE.count == 2 + + assert HIGH_QUEUE.jobs[0].args[0] == webhook.id + assert HIGH_QUEUE.jobs[0].args[1] == RecordEvent.created + assert HIGH_QUEUE.jobs[0].args[3] == jsonable_encoder(event_a.data) + + assert HIGH_QUEUE.jobs[1].args[0] == webhook.id + assert HIGH_QUEUE.jobs[1].args[1] == RecordEvent.created + assert HIGH_QUEUE.jobs[1].args[3] == jsonable_encoder(event_b.data) diff --git a/argilla-server/tests/unit/api/handlers/v1/records/test_upsert_dataset_records_bulk.py b/argilla-server/tests/unit/api/handlers/v1/datasets/records/records_bulk/test_upsert_dataset_records_bulk.py similarity index 67% rename from argilla-server/tests/unit/api/handlers/v1/records/test_upsert_dataset_records_bulk.py rename to argilla-server/tests/unit/api/handlers/v1/datasets/records/records_bulk/test_upsert_dataset_records_bulk.py index 737664fda6..49649973f8 100644 --- a/argilla-server/tests/unit/api/handlers/v1/records/test_upsert_dataset_records_bulk.py +++ b/argilla-server/tests/unit/api/handlers/v1/datasets/records/records_bulk/test_upsert_dataset_records_bulk.py @@ -16,19 +16,25 @@ from uuid import UUID from httpx import AsyncClient +from fastapi.encoders import jsonable_encoder from sqlalchemy import func, select from sqlalchemy.ext.asyncio import AsyncSession +from argilla_server.models import User, Record +from argilla_server.jobs.queues import HIGH_QUEUE from argilla_server.models import User, Record from argilla_server.enums import DatasetDistributionStrategy, ResponseStatus, DatasetStatus, RecordStatus +from argilla_server.webhooks.v1.enums import RecordEvent +from argilla_server.webhooks.v1.records import build_record_event from tests.factories import ( DatasetFactory, RecordFactory, TextFieldFactory, TextQuestionFactory, - ResponseFactory, AnnotatorFactory, + WebhookFactory, + ResponseFactory, ) @@ -221,3 +227,95 @@ async def test_upsert_dataset_records_bulk_updates_records_status( assert record_b.status == RecordStatus.pending assert record_c.status == RecordStatus.pending assert record_d.status == RecordStatus.pending + + async def test_upsert_dataset_records_bulk_enqueue_webhook_record_created_events( + self, db: AsyncSession, async_client: AsyncClient, owner_auth_header: dict + ): + dataset = await DatasetFactory.create(status=DatasetStatus.ready) + await TextFieldFactory.create(name="prompt", dataset=dataset) + await TextQuestionFactory.create(name="text-question", dataset=dataset) + + webhook = await WebhookFactory.create(events=[RecordEvent.created, RecordEvent.updated]) + + response = await async_client.put( + self.url(dataset.id), + headers=owner_auth_header, + json={ + "items": [ + { + "fields": { + "prompt": "Does exercise help reduce stress?", + }, + }, + { + "fields": { + "prompt": "What is the best way to reduce stress?", + }, + }, + ], + }, + ) + + assert response.status_code == 200 + + records = (await db.execute(select(Record).order_by(Record.inserted_at.asc()))).scalars().all() + + event_a = await build_record_event(db, RecordEvent.created, records[0]) + event_b = await build_record_event(db, RecordEvent.created, records[1]) + + assert HIGH_QUEUE.count == 2 + + assert HIGH_QUEUE.jobs[0].args[0] == webhook.id + assert HIGH_QUEUE.jobs[0].args[1] == RecordEvent.created + assert HIGH_QUEUE.jobs[0].args[3] == jsonable_encoder(event_a.data) + + assert HIGH_QUEUE.jobs[1].args[0] == webhook.id + assert HIGH_QUEUE.jobs[1].args[1] == RecordEvent.created + assert HIGH_QUEUE.jobs[1].args[3] == jsonable_encoder(event_b.data) + + async def test_upsert_dataset_records_bulk_enqueue_webhook_record_updated_events( + self, db: AsyncSession, async_client: AsyncClient, owner_auth_header: dict + ): + dataset = await DatasetFactory.create(status=DatasetStatus.ready) + await TextFieldFactory.create(name="prompt", dataset=dataset) + await TextQuestionFactory.create(name="text-question", dataset=dataset) + + records = await RecordFactory.create_batch(2, dataset=dataset) + + webhook = await WebhookFactory.create(events=[RecordEvent.created, RecordEvent.updated]) + + response = await async_client.put( + self.url(dataset.id), + headers=owner_auth_header, + json={ + "items": [ + { + "id": str(records[0].id), + "metadata": { + "metadata-key": "metadata-value", + }, + }, + { + "id": str(records[1].id), + "metadata": { + "metadata-key": "metadata-value", + }, + }, + ], + }, + ) + + assert response.status_code == 200 + + event_a = await build_record_event(db, RecordEvent.updated, records[0]) + event_b = await build_record_event(db, RecordEvent.updated, records[1]) + + assert HIGH_QUEUE.count == 2 + + assert HIGH_QUEUE.jobs[0].args[0] == webhook.id + assert HIGH_QUEUE.jobs[0].args[1] == RecordEvent.updated + assert HIGH_QUEUE.jobs[0].args[3] == jsonable_encoder(event_a.data) + + assert HIGH_QUEUE.jobs[1].args[0] == webhook.id + assert HIGH_QUEUE.jobs[1].args[1] == RecordEvent.updated + assert HIGH_QUEUE.jobs[1].args[3] == jsonable_encoder(event_b.data) diff --git a/argilla-server/tests/unit/api/handlers/v1/datasets/records/test_delete_dataset_records.py b/argilla-server/tests/unit/api/handlers/v1/datasets/records/test_delete_dataset_records.py new file mode 100644 index 0000000000..19773b3512 --- /dev/null +++ b/argilla-server/tests/unit/api/handlers/v1/datasets/records/test_delete_dataset_records.py @@ -0,0 +1,60 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from uuid import UUID +from httpx import AsyncClient +from fastapi.encoders import jsonable_encoder +from sqlalchemy.ext.asyncio import AsyncSession + +from argilla_server.jobs.queues import HIGH_QUEUE +from argilla_server.webhooks.v1.enums import RecordEvent +from argilla_server.webhooks.v1.records import build_record_event + +from tests.factories import DatasetFactory, RecordFactory, WebhookFactory + + +@pytest.mark.asyncio +class TestDeleteDatasetRecords: + def url(self, dataset_id: UUID) -> str: + return f"/api/v1/datasets/{dataset_id}/records" + + async def test_delete_dataset_records_enqueue_webhook_record_deleted_events( + self, db: AsyncSession, async_client: AsyncClient, owner_auth_header: dict + ): + dataset = await DatasetFactory.create() + records = await RecordFactory.create_batch(2, dataset=dataset) + webhook = await WebhookFactory.create(events=[RecordEvent.deleted]) + + event_a = await build_record_event(db, RecordEvent.deleted, records[0]) + event_b = await build_record_event(db, RecordEvent.deleted, records[1]) + + response = await async_client.delete( + self.url(dataset.id), + headers=owner_auth_header, + params={"ids": f"{records[0].id},{records[1].id}"}, + ) + + assert response.status_code == 204 + + assert HIGH_QUEUE.count == 2 + + assert HIGH_QUEUE.jobs[0].args[0] == webhook.id + assert HIGH_QUEUE.jobs[0].args[1] == RecordEvent.deleted + assert HIGH_QUEUE.jobs[0].args[3] == jsonable_encoder(event_a.data) + + assert HIGH_QUEUE.jobs[1].args[0] == webhook.id + assert HIGH_QUEUE.jobs[1].args[1] == RecordEvent.deleted + assert HIGH_QUEUE.jobs[1].args[3] == jsonable_encoder(event_b.data) diff --git a/argilla-server/tests/unit/api/handlers/v1/datasets/test_create_dataset.py b/argilla-server/tests/unit/api/handlers/v1/datasets/test_create_dataset.py index ce955a29c9..b6f1871f98 100644 --- a/argilla-server/tests/unit/api/handlers/v1/datasets/test_create_dataset.py +++ b/argilla-server/tests/unit/api/handlers/v1/datasets/test_create_dataset.py @@ -18,11 +18,15 @@ from httpx import AsyncClient from sqlalchemy import func, select from sqlalchemy.ext.asyncio import AsyncSession +from fastapi.encoders import jsonable_encoder -from argilla_server.enums import DatasetDistributionStrategy, DatasetStatus from argilla_server.models import Dataset +from argilla_server.jobs.queues import HIGH_QUEUE +from argilla_server.enums import DatasetDistributionStrategy, DatasetStatus +from argilla_server.webhooks.v1.enums import DatasetEvent +from argilla_server.webhooks.v1.datasets import build_dataset_event -from tests.factories import WorkspaceFactory +from tests.factories import WebhookFactory, WorkspaceFactory @pytest.mark.asyncio @@ -202,3 +206,28 @@ async def test_create_dataset_with_invalid_metadata( assert response.status_code == 422 assert (await db.execute(select(func.count(Dataset.id)))).scalar_one() == 0 + + async def test_create_dataset_enqueue_webhook_dataset_created_event( + self, db: AsyncSession, async_client: AsyncClient, owner_auth_header: dict + ): + workspace = await WorkspaceFactory.create() + webhook = await WebhookFactory.create(events=[DatasetEvent.created]) + + response = await async_client.post( + self.url(), + headers=owner_auth_header, + json={ + "name": "Dataset Name", + "workspace_id": str(workspace.id), + }, + ) + + assert response.status_code == 201 + + dataset = (await db.execute(select(Dataset))).scalar_one() + event = await build_dataset_event(db, DatasetEvent.created, dataset) + + assert HIGH_QUEUE.count == 1 + assert HIGH_QUEUE.jobs[0].args[0] == webhook.id + assert HIGH_QUEUE.jobs[0].args[1] == DatasetEvent.created + assert HIGH_QUEUE.jobs[0].args[3] == jsonable_encoder(event.data) diff --git a/argilla-server/tests/unit/api/handlers/v1/datasets/test_delete_dataset.py b/argilla-server/tests/unit/api/handlers/v1/datasets/test_delete_dataset.py new file mode 100644 index 0000000000..d01feabcbf --- /dev/null +++ b/argilla-server/tests/unit/api/handlers/v1/datasets/test_delete_dataset.py @@ -0,0 +1,52 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from uuid import UUID +from httpx import AsyncClient +from fastapi.encoders import jsonable_encoder +from sqlalchemy.ext.asyncio import AsyncSession + +from argilla_server.jobs.queues import HIGH_QUEUE +from argilla_server.webhooks.v1.enums import DatasetEvent +from argilla_server.webhooks.v1.datasets import build_dataset_event + +from tests.factories import DatasetFactory, WebhookFactory + + +@pytest.mark.asyncio +class TestDeleteDataset: + def url(self, dataset_id: UUID) -> str: + return f"/api/v1/datasets/{dataset_id}" + + async def test_delete_dataset_enqueue_webhook_dataset_deleted_event( + self, db: AsyncSession, async_client: AsyncClient, owner_auth_header: dict + ): + dataset = await DatasetFactory.create() + webhook = await WebhookFactory.create(events=[DatasetEvent.deleted]) + + event = await build_dataset_event(db, DatasetEvent.deleted, dataset) + + response = await async_client.delete( + self.url(dataset.id), + headers=owner_auth_header, + ) + + assert response.status_code == 200 + + assert HIGH_QUEUE.count == 1 + assert HIGH_QUEUE.jobs[0].args[0] == webhook.id + assert HIGH_QUEUE.jobs[0].args[1] == DatasetEvent.deleted + assert HIGH_QUEUE.jobs[0].args[3] == jsonable_encoder(event.data) diff --git a/argilla-server/tests/unit/api/handlers/v1/datasets/test_publish_dataset.py b/argilla-server/tests/unit/api/handlers/v1/datasets/test_publish_dataset.py new file mode 100644 index 0000000000..9fb3a11481 --- /dev/null +++ b/argilla-server/tests/unit/api/handlers/v1/datasets/test_publish_dataset.py @@ -0,0 +1,55 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from uuid import UUID +from httpx import AsyncClient +from fastapi.encoders import jsonable_encoder +from sqlalchemy.ext.asyncio import AsyncSession + +from argilla_server.jobs.queues import HIGH_QUEUE +from argilla_server.webhooks.v1.enums import DatasetEvent +from argilla_server.webhooks.v1.datasets import build_dataset_event + +from tests.factories import DatasetFactory, TextFieldFactory, RatingQuestionFactory, WebhookFactory + + +@pytest.mark.asyncio +class TestPublishDataset: + def url(self, dataset_id: UUID) -> str: + return f"/api/v1/datasets/{dataset_id}/publish" + + async def test_publish_dataset_enqueue_webhook_dataset_published_event( + self, db: AsyncSession, async_client: AsyncClient, owner_auth_header: dict + ): + dataset = await DatasetFactory.create() + await TextFieldFactory.create(dataset=dataset, required=True) + await RatingQuestionFactory.create(dataset=dataset, required=True) + + webhook = await WebhookFactory.create(events=[DatasetEvent.published]) + + response = await async_client.put( + self.url(dataset.id), + headers=owner_auth_header, + ) + + assert response.status_code == 200 + + event = await build_dataset_event(db, DatasetEvent.published, dataset) + + assert HIGH_QUEUE.count == 1 + assert HIGH_QUEUE.jobs[0].args[0] == webhook.id + assert HIGH_QUEUE.jobs[0].args[1] == DatasetEvent.published + assert HIGH_QUEUE.jobs[0].args[3] == jsonable_encoder(event.data) diff --git a/argilla-server/tests/unit/api/handlers/v1/datasets/test_update_dataset.py b/argilla-server/tests/unit/api/handlers/v1/datasets/test_update_dataset.py index 113cdc3ce1..6c69d8abeb 100644 --- a/argilla-server/tests/unit/api/handlers/v1/datasets/test_update_dataset.py +++ b/argilla-server/tests/unit/api/handlers/v1/datasets/test_update_dataset.py @@ -12,14 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any -from uuid import UUID - import pytest + +from uuid import UUID +from typing import Any from httpx import AsyncClient +from sqlalchemy.ext.asyncio import AsyncSession +from fastapi.encoders import jsonable_encoder +from argilla_server.jobs.queues import HIGH_QUEUE from argilla_server.enums import DatasetDistributionStrategy, DatasetStatus -from tests.factories import DatasetFactory, RecordFactory, ResponseFactory +from argilla_server.webhooks.v1.datasets import build_dataset_event +from argilla_server.webhooks.v1.enums import DatasetEvent + +from tests.factories import DatasetFactory, RecordFactory, ResponseFactory, WebhookFactory @pytest.mark.asyncio @@ -208,3 +214,24 @@ async def test_update_dataset_metadata_as_none(self, async_client: AsyncClient, assert response.status_code == 200 assert dataset.metadata_ == None + + async def test_update_dataset_enqueue_webhook_dataset_updated_event( + self, db: AsyncSession, async_client: AsyncClient, owner_auth_header: dict + ): + dataset = await DatasetFactory.create() + webhook = await WebhookFactory.create(events=[DatasetEvent.updated]) + + response = await async_client.patch( + self.url(dataset.id), + headers=owner_auth_header, + json={"name": "Updated dataset"}, + ) + + assert response.status_code == 200 + + event = await build_dataset_event(db, DatasetEvent.updated, dataset) + + assert HIGH_QUEUE.count == 1 + assert HIGH_QUEUE.jobs[0].args[0] == webhook.id + assert HIGH_QUEUE.jobs[0].args[1] == DatasetEvent.updated + assert HIGH_QUEUE.jobs[0].args[3] == jsonable_encoder(event.data) diff --git a/argilla-server/tests/unit/api/handlers/v1/records/test_create_record_response.py b/argilla-server/tests/unit/api/handlers/v1/records/test_create_record_response.py index ce433d036d..68fbd93685 100644 --- a/argilla-server/tests/unit/api/handlers/v1/records/test_create_record_response.py +++ b/argilla-server/tests/unit/api/handlers/v1/records/test_create_record_response.py @@ -12,19 +12,23 @@ # See the License for the specific language governing permissions and # limitations under the License. -from datetime import datetime -from uuid import UUID - import pytest +from uuid import UUID +from datetime import datetime from httpx import AsyncClient from sqlalchemy import func, select from sqlalchemy.ext.asyncio import AsyncSession +from fastapi.encoders import jsonable_encoder -from argilla_server.enums import ResponseStatus, RecordStatus, DatasetDistributionStrategy from argilla_server.models import Response, User +from argilla_server.jobs.queues import HIGH_QUEUE +from argilla_server.webhooks.v1.enums import RecordEvent, ResponseEvent +from argilla_server.webhooks.v1.responses import build_response_event +from argilla_server.webhooks.v1.records import build_record_event +from argilla_server.enums import ResponseStatus, RecordStatus, DatasetDistributionStrategy -from tests.factories import DatasetFactory, RecordFactory, SpanQuestionFactory, TextQuestionFactory +from tests.factories import DatasetFactory, RecordFactory, SpanQuestionFactory, TextQuestionFactory, WebhookFactory @pytest.mark.asyncio @@ -516,3 +520,118 @@ async def test_create_record_response_does_not_updates_record_status_to_complete assert response.status_code == 201 assert record.status == RecordStatus.pending + + async def test_create_record_response_enqueue_webhook_response_created_event( + self, db: AsyncSession, async_client: AsyncClient, owner: User, owner_auth_header: dict + ): + dataset = await DatasetFactory.create( + distribution={ + "strategy": DatasetDistributionStrategy.overlap, + "min_submitted": 2, + } + ) + + await TextQuestionFactory.create(name="text-question", dataset=dataset) + + record = await RecordFactory.create(fields={"field-a": "Hello"}, dataset=dataset) + + webhook = await WebhookFactory.create(events=[ResponseEvent.created]) + + resp = await async_client.post( + self.url(record.id), + headers=owner_auth_header, + json={ + "values": { + "text-question": { + "value": "text question response", + }, + }, + "status": ResponseStatus.submitted, + }, + ) + + assert resp.status_code == 201 + + response = (await db.execute(select(Response))).scalar_one() + event = await build_response_event(db, ResponseEvent.created, response) + + assert HIGH_QUEUE.count == 1 + assert HIGH_QUEUE.jobs[0].args[0] == webhook.id + assert HIGH_QUEUE.jobs[0].args[1] == ResponseEvent.created + assert HIGH_QUEUE.jobs[0].args[3] == jsonable_encoder(event.data) + + async def test_create_record_response_enqueue_webhook_record_updated_event( + self, db: AsyncSession, async_client: AsyncClient, owner: User, owner_auth_header: dict + ): + dataset = await DatasetFactory.create( + distribution={ + "strategy": DatasetDistributionStrategy.overlap, + "min_submitted": 1, + } + ) + + await TextQuestionFactory.create(name="text-question", dataset=dataset) + + record = await RecordFactory.create(fields={"field-a": "Hello"}, dataset=dataset) + + webhook = await WebhookFactory.create(events=[RecordEvent.updated]) + + response = await async_client.post( + self.url(record.id), + headers=owner_auth_header, + json={ + "values": { + "text-question": { + "value": "text question response", + }, + }, + "status": ResponseStatus.submitted, + }, + ) + + assert response.status_code == 201 + + event = await build_record_event(db, RecordEvent.updated, record) + + assert HIGH_QUEUE.count == 1 + assert HIGH_QUEUE.jobs[0].args[0] == webhook.id + assert HIGH_QUEUE.jobs[0].args[1] == RecordEvent.updated + assert HIGH_QUEUE.jobs[0].args[3] == jsonable_encoder(event.data) + + async def test_create_record_response_enqueue_webhook_record_completed_event( + self, db: AsyncSession, async_client: AsyncClient, owner: User, owner_auth_header: dict + ): + dataset = await DatasetFactory.create( + distribution={ + "strategy": DatasetDistributionStrategy.overlap, + "min_submitted": 1, + } + ) + + await TextQuestionFactory.create(name="text-question", dataset=dataset) + + record = await RecordFactory.create(fields={"field-a": "Hello"}, dataset=dataset) + + webhook = await WebhookFactory.create(events=[RecordEvent.completed]) + + response = await async_client.post( + self.url(record.id), + headers=owner_auth_header, + json={ + "values": { + "text-question": { + "value": "text question response", + }, + }, + "status": ResponseStatus.submitted, + }, + ) + + assert response.status_code == 201 + + event = await build_record_event(db, RecordEvent.completed, record) + + assert HIGH_QUEUE.count == 1 + assert HIGH_QUEUE.jobs[0].args[0] == webhook.id + assert HIGH_QUEUE.jobs[0].args[1] == RecordEvent.completed + assert HIGH_QUEUE.jobs[0].args[3] == jsonable_encoder(event.data) diff --git a/argilla-server/tests/unit/api/handlers/v1/records/test_delete_record.py b/argilla-server/tests/unit/api/handlers/v1/records/test_delete_record.py new file mode 100644 index 0000000000..ab017e50fa --- /dev/null +++ b/argilla-server/tests/unit/api/handlers/v1/records/test_delete_record.py @@ -0,0 +1,52 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from uuid import UUID +from httpx import AsyncClient +from fastapi.encoders import jsonable_encoder +from sqlalchemy.ext.asyncio import AsyncSession + +from argilla_server.jobs.queues import HIGH_QUEUE +from argilla_server.webhooks.v1.enums import RecordEvent +from argilla_server.webhooks.v1.records import build_record_event + +from tests.factories import RecordFactory, WebhookFactory + + +@pytest.mark.asyncio +class TestDeleteRecord: + def url(self, record_id: UUID) -> str: + return f"/api/v1/records/{record_id}" + + async def test_delete_record_enqueue_webhook_record_deleted_event( + self, db: AsyncSession, async_client: AsyncClient, owner_auth_header: dict + ): + record = await RecordFactory.create() + webhook = await WebhookFactory.create(events=[RecordEvent.deleted]) + + event = await build_record_event(db, RecordEvent.deleted, record) + + response = await async_client.delete( + self.url(record.id), + headers=owner_auth_header, + ) + + assert response.status_code == 200 + + assert HIGH_QUEUE.count == 1 + assert HIGH_QUEUE.jobs[0].args[0] == webhook.id + assert HIGH_QUEUE.jobs[0].args[1] == RecordEvent.deleted + assert HIGH_QUEUE.jobs[0].args[3] == jsonable_encoder(event.data) diff --git a/argilla-server/tests/unit/api/handlers/v1/records/test_update_record.py b/argilla-server/tests/unit/api/handlers/v1/records/test_update_record.py new file mode 100644 index 0000000000..d8eba32655 --- /dev/null +++ b/argilla-server/tests/unit/api/handlers/v1/records/test_update_record.py @@ -0,0 +1,53 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from uuid import UUID +from httpx import AsyncClient +from fastapi.encoders import jsonable_encoder +from sqlalchemy.ext.asyncio import AsyncSession + +from argilla_server.jobs.queues import HIGH_QUEUE +from argilla_server.webhooks.v1.enums import RecordEvent +from argilla_server.webhooks.v1.records import build_record_event + +from tests.factories import RecordFactory, WebhookFactory + + +@pytest.mark.asyncio +class TestUpdateRecord: + def url(self, record_id: UUID) -> str: + return f"/api/v1/records/{record_id}" + + async def test_update_record_enqueue_webhook_record_updated_event( + self, db: AsyncSession, async_client: AsyncClient, owner_auth_header: dict + ): + record = await RecordFactory.create() + webhook = await WebhookFactory.create(events=[RecordEvent.updated]) + + response = await async_client.patch( + self.url(record.id), + headers=owner_auth_header, + json={}, + ) + + assert response.status_code == 200 + + event = await build_record_event(db, RecordEvent.updated, record) + + assert HIGH_QUEUE.count == 1 + assert HIGH_QUEUE.jobs[0].args[0] == webhook.id + assert HIGH_QUEUE.jobs[0].args[1] == RecordEvent.updated + assert HIGH_QUEUE.jobs[0].args[3] == jsonable_encoder(event.data) diff --git a/argilla-server/tests/unit/api/handlers/v1/responses/test_create_current_user_responses_bulk.py b/argilla-server/tests/unit/api/handlers/v1/responses/test_create_current_user_responses_bulk.py index 07b4bf0199..3cfe3fb7a4 100644 --- a/argilla-server/tests/unit/api/handlers/v1/responses/test_create_current_user_responses_bulk.py +++ b/argilla-server/tests/unit/api/handlers/v1/responses/test_create_current_user_responses_bulk.py @@ -11,28 +11,36 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import os +import pytest + +from uuid import UUID, uuid4 from datetime import datetime from unittest.mock import call -from uuid import UUID, uuid4 +from httpx import AsyncClient +from sqlalchemy import func, select +from sqlalchemy.ext.asyncio import AsyncSession +from fastapi.encoders import jsonable_encoder -import pytest from argilla_server.constants import API_KEY_HEADER_NAME -from argilla_server.enums import ResponseStatus, RecordStatus +from argilla_server.enums import DatasetDistributionStrategy, ResponseStatus, RecordStatus +from argilla_server.jobs.queues import HIGH_QUEUE from argilla_server.models import Response, User from argilla_server.search_engine import SearchEngine from argilla_server.use_cases.responses.upsert_responses_in_bulk import UpsertResponsesInBulkUseCase -from httpx import AsyncClient -from sqlalchemy import func, select -from sqlalchemy.ext.asyncio import AsyncSession - +from argilla_server.webhooks.v1.enums import RecordEvent, ResponseEvent +from argilla_server.webhooks.v1.responses import build_response_event +from argilla_server.webhooks.v1.records import build_record_event from tests.factories import ( AnnotatorFactory, DatasetFactory, RatingQuestionFactory, RecordFactory, ResponseFactory, + WebhookFactory, WorkspaceUserFactory, + TextQuestionFactory, ) @@ -447,3 +455,183 @@ async def refresh_records(records): await use_case.execute([bulk_item.item for bulk_item in bulk_items], user) profiler.open_in_browser() + + async def test_create_current_user_responses_bulk_enqueue_webhook_response_created_event( + self, db: AsyncSession, async_client: AsyncClient, owner: User, owner_auth_header: dict + ): + dataset = await DatasetFactory.create( + distribution={ + "strategy": DatasetDistributionStrategy.overlap, + "min_submitted": 2, + }, + ) + + await TextQuestionFactory.create(name="text-question", dataset=dataset) + + record = await RecordFactory.create(fields={"field-a": "Hello"}, dataset=dataset) + + webhook = await WebhookFactory.create(events=[ResponseEvent.created, ResponseEvent.updated]) + + resp = await async_client.post( + self.url(), + headers=owner_auth_header, + json={ + "items": [ + { + "values": { + "text-question": { + "value": "Created value", + }, + }, + "status": ResponseStatus.submitted, + "record_id": str(record.id), + }, + ], + }, + ) + + assert resp.status_code == 200 + + response = (await db.execute(select(Response))).scalar_one() + event = await build_response_event(db, ResponseEvent.created, response) + + assert HIGH_QUEUE.count == 1 + assert HIGH_QUEUE.jobs[0].args[0] == webhook.id + assert HIGH_QUEUE.jobs[0].args[1] == ResponseEvent.created + assert HIGH_QUEUE.jobs[0].args[3] == jsonable_encoder(event.data) + + async def test_create_current_user_responses_bulk_enqueue_webhook_response_updated_event( + self, db: AsyncSession, async_client: AsyncClient, owner: User, owner_auth_header: dict + ): + dataset = await DatasetFactory.create( + distribution={ + "strategy": DatasetDistributionStrategy.overlap, + "min_submitted": 2, + }, + ) + + await TextQuestionFactory.create(name="text-question", dataset=dataset) + + record = await RecordFactory.create(fields={"field-a": "Hello"}, dataset=dataset) + + response = await ResponseFactory.create( + values={"text-question": {"value": "Created value"}}, + status=ResponseStatus.submitted, + record=record, + user=owner, + ) + + webhook = await WebhookFactory.create(events=[ResponseEvent.created, ResponseEvent.updated]) + + resp = await async_client.post( + self.url(), + headers=owner_auth_header, + json={ + "items": [ + { + "values": { + "text-question": { + "value": "Updated value", + }, + }, + "status": ResponseStatus.submitted, + "record_id": str(record.id), + }, + ], + }, + ) + + assert resp.status_code == 200 + + event = await build_response_event(db, ResponseEvent.updated, response) + + assert HIGH_QUEUE.count == 1 + assert HIGH_QUEUE.jobs[0].args[0] == webhook.id + assert HIGH_QUEUE.jobs[0].args[1] == ResponseEvent.updated + assert HIGH_QUEUE.jobs[0].args[3] == jsonable_encoder(event.data) + + async def test_create_current_user_responses_bulk_enqueue_webhook_record_updated_event( + self, db: AsyncSession, async_client: AsyncClient, owner: User, owner_auth_header: dict + ): + dataset = await DatasetFactory.create( + distribution={ + "strategy": DatasetDistributionStrategy.overlap, + "min_submitted": 1, + }, + ) + + await TextQuestionFactory.create(name="text-question", dataset=dataset) + + record = await RecordFactory.create(fields={"field-a": "Hello"}, dataset=dataset) + + webhook = await WebhookFactory.create(events=[RecordEvent.updated]) + + resp = await async_client.post( + self.url(), + headers=owner_auth_header, + json={ + "items": [ + { + "values": { + "text-question": { + "value": "Created value", + }, + }, + "status": ResponseStatus.submitted, + "record_id": str(record.id), + }, + ], + }, + ) + + assert resp.status_code == 200 + + event = await build_record_event(db, RecordEvent.updated, record) + + assert HIGH_QUEUE.count == 1 + assert HIGH_QUEUE.jobs[0].args[0] == webhook.id + assert HIGH_QUEUE.jobs[0].args[1] == RecordEvent.updated + assert HIGH_QUEUE.jobs[0].args[3] == jsonable_encoder(event.data) + + async def test_create_current_user_responses_bulk_enqueue_webhook_record_completed_event( + self, db: AsyncSession, async_client: AsyncClient, owner: User, owner_auth_header: dict + ): + dataset = await DatasetFactory.create( + distribution={ + "strategy": DatasetDistributionStrategy.overlap, + "min_submitted": 1, + }, + ) + + await TextQuestionFactory.create(name="text-question", dataset=dataset) + + record = await RecordFactory.create(fields={"field-a": "Hello"}, dataset=dataset) + + webhook = await WebhookFactory.create(events=[RecordEvent.completed]) + + resp = await async_client.post( + self.url(), + headers=owner_auth_header, + json={ + "items": [ + { + "values": { + "text-question": { + "value": "Created value", + }, + }, + "status": ResponseStatus.submitted, + "record_id": str(record.id), + }, + ], + }, + ) + + assert resp.status_code == 200 + + event = await build_record_event(db, RecordEvent.completed, record) + + assert HIGH_QUEUE.count == 1 + assert HIGH_QUEUE.jobs[0].args[0] == webhook.id + assert HIGH_QUEUE.jobs[0].args[1] == RecordEvent.completed + assert HIGH_QUEUE.jobs[0].args[3] == jsonable_encoder(event.data) diff --git a/argilla-server/tests/unit/api/handlers/v1/responses/test_delete_response.py b/argilla-server/tests/unit/api/handlers/v1/responses/test_delete_response.py index 6b9d4ec749..af66f6d2ec 100644 --- a/argilla-server/tests/unit/api/handlers/v1/responses/test_delete_response.py +++ b/argilla-server/tests/unit/api/handlers/v1/responses/test_delete_response.py @@ -12,16 +12,21 @@ # See the License for the specific language governing permissions and # limitations under the License. -from uuid import UUID - import pytest +from uuid import UUID from httpx import AsyncClient +from fastapi.encoders import jsonable_encoder +from sqlalchemy.ext.asyncio import AsyncSession from argilla_server.models import User +from argilla_server.jobs.queues import HIGH_QUEUE +from argilla_server.webhooks.v1.enums import RecordEvent, ResponseEvent +from argilla_server.webhooks.v1.responses import build_response_event +from argilla_server.webhooks.v1.records import build_record_event from argilla_server.enums import DatasetDistributionStrategy, RecordStatus, ResponseStatus -from tests.factories import DatasetFactory, RecordFactory, ResponseFactory, TextQuestionFactory +from tests.factories import DatasetFactory, RecordFactory, ResponseFactory, TextQuestionFactory, WebhookFactory @pytest.mark.asyncio @@ -64,3 +69,56 @@ async def test_delete_response_does_not_updates_record_status_to_pending( assert resp.status_code == 200 assert record.status == RecordStatus.completed + + async def test_delete_response_enqueue_webhook_response_deleted_event( + self, db: AsyncSession, async_client: AsyncClient, owner_auth_header: dict + ): + response = await ResponseFactory.create() + webhook = await WebhookFactory.create(events=[ResponseEvent.deleted]) + + event = await build_response_event(db, ResponseEvent.deleted, response) + + resp = await async_client.delete(self.url(response.id), headers=owner_auth_header) + + assert resp.status_code == 200 + + assert HIGH_QUEUE.count == 1 + assert HIGH_QUEUE.jobs[0].args[0] == webhook.id + assert HIGH_QUEUE.jobs[0].args[1] == ResponseEvent.deleted + assert HIGH_QUEUE.jobs[0].args[3] == jsonable_encoder(event.data) + + async def test_delete_response_enqueue_webhook_record_updated_event( + self, db: AsyncSession, async_client: AsyncClient, owner_auth_header: dict + ): + record = await RecordFactory.create() + responses = await ResponseFactory.create_batch(2, record=record) + webhook = await WebhookFactory.create(events=[RecordEvent.updated]) + + response = await async_client.delete(self.url(responses[0].id), headers=owner_auth_header) + + assert response.status_code == 200 + + event = await build_record_event(db, RecordEvent.updated, record) + + assert HIGH_QUEUE.count == 1 + assert HIGH_QUEUE.jobs[0].args[0] == webhook.id + assert HIGH_QUEUE.jobs[0].args[1] == RecordEvent.updated + assert HIGH_QUEUE.jobs[0].args[3] == jsonable_encoder(event.data) + + async def test_delete_response_enqueue_webhook_record_completed_event( + self, db: AsyncSession, async_client: AsyncClient, owner_auth_header: dict + ): + record = await RecordFactory.create() + responses = await ResponseFactory.create_batch(2, record=record) + webhook = await WebhookFactory.create(events=[RecordEvent.completed]) + + response = await async_client.delete(self.url(responses[0].id), headers=owner_auth_header) + + assert response.status_code == 200 + + event = await build_record_event(db, RecordEvent.completed, record) + + assert HIGH_QUEUE.count == 1 + assert HIGH_QUEUE.jobs[0].args[0] == webhook.id + assert HIGH_QUEUE.jobs[0].args[1] == RecordEvent.completed + assert HIGH_QUEUE.jobs[0].args[3] == jsonable_encoder(event.data) diff --git a/argilla-server/tests/unit/api/handlers/v1/responses/test_update_response.py b/argilla-server/tests/unit/api/handlers/v1/responses/test_update_response.py index d5097f8c7b..4d5f8a4792 100644 --- a/argilla-server/tests/unit/api/handlers/v1/responses/test_update_response.py +++ b/argilla-server/tests/unit/api/handlers/v1/responses/test_update_response.py @@ -12,19 +12,30 @@ # See the License for the specific language governing permissions and # limitations under the License. -from datetime import datetime -from uuid import UUID - import pytest -from httpx import AsyncClient +from uuid import UUID +from datetime import datetime +from httpx import AsyncClient from sqlalchemy import select from sqlalchemy.ext.asyncio.session import AsyncSession +from fastapi.encoders import jsonable_encoder -from argilla_server.enums import ResponseStatus, DatasetDistributionStrategy, RecordStatus from argilla_server.models import Response, User +from argilla_server.jobs.queues import HIGH_QUEUE +from argilla_server.webhooks.v1.enums import RecordEvent, ResponseEvent +from argilla_server.webhooks.v1.responses import build_response_event +from argilla_server.webhooks.v1.records import build_record_event +from argilla_server.enums import ResponseStatus, DatasetDistributionStrategy, RecordStatus -from tests.factories import DatasetFactory, RecordFactory, ResponseFactory, SpanQuestionFactory, TextQuestionFactory +from tests.factories import ( + DatasetFactory, + RecordFactory, + ResponseFactory, + SpanQuestionFactory, + TextQuestionFactory, + WebhookFactory, +) @pytest.mark.asyncio @@ -625,3 +636,147 @@ async def test_update_response_updates_record_status_to_pending( assert resp.status_code == 200 assert record.status == RecordStatus.pending + + async def test_update_response_enqueue_webhook_response_updated_event( + self, db: AsyncSession, async_client: AsyncClient, owner: User, owner_auth_header: dict + ): + dataset = await DatasetFactory.create( + distribution={ + "strategy": DatasetDistributionStrategy.overlap, + "min_submitted": 2, + }, + ) + await TextQuestionFactory.create(name="text-question", dataset=dataset) + + record = await RecordFactory.create(fields={"field-a": "Hello"}, dataset=dataset) + + response = await ResponseFactory.create( + values={ + "text-question": { + "value": "Hello", + }, + }, + status=ResponseStatus.submitted, + user=owner, + record=record, + ) + + webhook = await WebhookFactory.create(events=[ResponseEvent.updated]) + + resp = await async_client.put( + self.url(response.id), + headers=owner_auth_header, + json={ + "values": { + "text-question": { + "value": "Update value", + }, + }, + "status": ResponseStatus.submitted, + }, + ) + + assert resp.status_code == 200 + + event = await build_response_event(db, ResponseEvent.updated, response) + + assert HIGH_QUEUE.count == 1 + assert HIGH_QUEUE.jobs[0].args[0] == webhook.id + assert HIGH_QUEUE.jobs[0].args[1] == ResponseEvent.updated + assert HIGH_QUEUE.jobs[0].args[3] == jsonable_encoder(event.data) + + async def test_update_response_enqueue_webhook_record_updated_event( + self, db: AsyncSession, async_client: AsyncClient, owner: User, owner_auth_header: dict + ): + dataset = await DatasetFactory.create( + distribution={ + "strategy": DatasetDistributionStrategy.overlap, + "min_submitted": 1, + }, + ) + await TextQuestionFactory.create(name="text-question", dataset=dataset) + + record = await RecordFactory.create(fields={"field-a": "Hello"}, dataset=dataset) + + response = await ResponseFactory.create( + values={ + "text-question": { + "value": "Hello", + }, + }, + status=ResponseStatus.draft, + user=owner, + record=record, + ) + + webhook = await WebhookFactory.create(events=[RecordEvent.updated]) + + resp = await async_client.put( + self.url(response.id), + headers=owner_auth_header, + json={ + "values": { + "text-question": { + "value": "Update value", + }, + }, + "status": ResponseStatus.submitted, + }, + ) + + assert resp.status_code == 200 + + event = await build_record_event(db, RecordEvent.updated, record) + + assert HIGH_QUEUE.count == 1 + assert HIGH_QUEUE.jobs[0].args[0] == webhook.id + assert HIGH_QUEUE.jobs[0].args[1] == RecordEvent.updated + assert HIGH_QUEUE.jobs[0].args[3] == jsonable_encoder(event.data) + + async def test_update_response_enqueue_webhook_record_completed_event( + self, db: AsyncSession, async_client: AsyncClient, owner: User, owner_auth_header: dict + ): + dataset = await DatasetFactory.create( + distribution={ + "strategy": DatasetDistributionStrategy.overlap, + "min_submitted": 1, + }, + ) + await TextQuestionFactory.create(name="text-question", dataset=dataset) + + record = await RecordFactory.create(fields={"field-a": "Hello"}, dataset=dataset) + + response = await ResponseFactory.create( + values={ + "text-question": { + "value": "Hello", + }, + }, + status=ResponseStatus.draft, + user=owner, + record=record, + ) + + webhook = await WebhookFactory.create(events=[RecordEvent.completed]) + + resp = await async_client.put( + self.url(response.id), + headers=owner_auth_header, + json={ + "values": { + "text-question": { + "value": "Update value", + }, + }, + "status": ResponseStatus.submitted, + }, + ) + + assert resp.status_code == 200 + + event = await build_record_event(db, RecordEvent.completed, record) + + assert HIGH_QUEUE.count == 1 + assert HIGH_QUEUE.jobs[0].args[0] == webhook.id + assert HIGH_QUEUE.jobs[0].args[1] == RecordEvent.completed + assert HIGH_QUEUE.jobs[0].args[3] == jsonable_encoder(event.data) diff --git a/argilla-server/tests/unit/api/handlers/v1/test_datasets.py b/argilla-server/tests/unit/api/handlers/v1/test_datasets.py index 944e123b03..71262e3159 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_datasets.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_datasets.py @@ -4351,7 +4351,8 @@ async def test_publish_dataset_as_admin(self, async_client: "AsyncClient", db: " admin = await AdminFactory.create(workspaces=[dataset.workspace]) response = await async_client.put( - f"/api/v1/datasets/{dataset.id}/publish", headers={API_KEY_HEADER_NAME: admin.api_key} + f"/api/v1/datasets/{dataset.id}/publish", + headers={API_KEY_HEADER_NAME: admin.api_key}, ) assert response.status_code == 200 @@ -4650,7 +4651,8 @@ async def test_delete_dataset_as_admin(self, async_client: "AsyncClient", db: "A admin = await AdminFactory.create(workspaces=[dataset.workspace]) response = await async_client.delete( - f"/api/v1/datasets/{dataset.id}", headers={API_KEY_HEADER_NAME: admin.api_key} + f"/api/v1/datasets/{dataset.id}", + headers={API_KEY_HEADER_NAME: admin.api_key}, ) assert response.status_code == 200 diff --git a/argilla-server/tests/unit/api/handlers/v1/webhooks/__init__.py b/argilla-server/tests/unit/api/handlers/v1/webhooks/__init__.py new file mode 100644 index 0000000000..4b6cecae7f --- /dev/null +++ b/argilla-server/tests/unit/api/handlers/v1/webhooks/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/argilla-server/tests/unit/api/handlers/v1/webhooks/test_create_webhook.py b/argilla-server/tests/unit/api/handlers/v1/webhooks/test_create_webhook.py new file mode 100644 index 0000000000..6fb2b51717 --- /dev/null +++ b/argilla-server/tests/unit/api/handlers/v1/webhooks/test_create_webhook.py @@ -0,0 +1,265 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from typing import Any +from httpx import AsyncClient +from sqlalchemy import func, select +from sqlalchemy.ext.asyncio import AsyncSession + +from argilla_server.webhooks.v1.enums import WebhookEvent +from argilla_server.models import Webhook +from argilla_server.constants import API_KEY_HEADER_NAME + +from tests.factories import AdminFactory, AnnotatorFactory, WebhookFactory + + +@pytest.mark.asyncio +class TestCreateWebhook: + def url(self) -> str: + return "/api/v1/webhooks" + + async def test_create_webhook(self, db: AsyncSession, async_client: AsyncClient, owner_auth_header: dict): + response = await async_client.post( + self.url(), + headers=owner_auth_header, + json={ + "url": "https://example.com/webhook", + "events": [WebhookEvent.response_created], + "description": "Test webhook", + }, + ) + + assert response.status_code == 201 + + assert (await db.execute(select(func.count(Webhook.id)))).scalar() == 1 + webhook = (await db.execute(select(Webhook))).scalar_one() + + assert response.json() == { + "id": str(webhook.id), + "url": "https://example.com/webhook", + "secret": webhook.secret, + "events": [WebhookEvent.response_created], + "enabled": True, + "description": "Test webhook", + "inserted_at": webhook.inserted_at.isoformat(), + "updated_at": webhook.updated_at.isoformat(), + } + + async def test_create_webhook_with_ip_address_url( + self, db: AsyncSession, async_client: AsyncClient, owner_auth_header: dict + ): + response = await async_client.post( + self.url(), + headers=owner_auth_header, + json={ + "url": "http://1.1.1.1/webhook", + "events": [WebhookEvent.response_created], + "description": "Test webhook", + }, + ) + + assert response.status_code == 201 + + assert (await db.execute(select(func.count(Webhook.id)))).scalar() == 1 + webhook = (await db.execute(select(Webhook))).scalar_one() + + assert response.json()["url"] == "http://1.1.1.1/webhook" + + async def test_create_webhook_as_admin(self, db: AsyncSession, async_client: AsyncClient): + admin = await AdminFactory.create() + + response = await async_client.post( + self.url(), + headers={API_KEY_HEADER_NAME: admin.api_key}, + json={ + "url": "https://example.com/webhook", + "events": [WebhookEvent.response_created], + }, + ) + + assert response.status_code == 403 + assert (await db.execute(select(func.count(Webhook.id)))).scalar() == 0 + + async def test_create_webhook_as_annotator(self, db: AsyncSession, async_client: AsyncClient): + annotator = await AnnotatorFactory.create() + + response = await async_client.post( + self.url(), + headers={API_KEY_HEADER_NAME: annotator.api_key}, + json={ + "url": "https://example.com/webhook", + "events": [WebhookEvent.response_created], + }, + ) + + assert response.status_code == 403 + assert (await db.execute(select(func.count(Webhook.id)))).scalar() == 0 + + async def test_create_webhook_without_authentication(self, db: AsyncSession, async_client: AsyncClient): + response = await async_client.post( + self.url(), + json={ + "url": "https://example.com/webhook", + "events": [WebhookEvent.response_created], + }, + ) + + assert response.status_code == 401 + assert (await db.execute(select(func.count(Webhook.id)))).scalar() == 0 + + @pytest.mark.parametrize( + "invalid_url", + [ + "", + "example.com", + "http:example.com", + "https:example.com", + "http://localhost/webhooks", + "http://localhost:3000/webhooks", + ], + ) + async def test_create_webhook_with_invalid_url( + self, db: AsyncSession, async_client: AsyncClient, owner_auth_header: dict, invalid_url: str + ): + response = await async_client.post( + self.url(), + headers=owner_auth_header, + json={ + "url": invalid_url, + "events": [WebhookEvent.response_created], + }, + ) + + assert response.status_code == 422 + assert (await db.execute(select(func.count(Webhook.id)))).scalar() == 0 + + @pytest.mark.parametrize( + "invalid_events", [[], ["invalid-event"], [WebhookEvent.response_created, "invalid-event"]] + ) + async def test_create_webhook_with_invalid_events( + self, db: AsyncSession, async_client: AsyncClient, owner_auth_header: dict, invalid_events: list + ): + response = await async_client.post( + self.url(), + headers=owner_auth_header, + json={ + "url": "https://example.com/webhook", + "events": invalid_events, + }, + ) + + assert response.status_code == 422 + assert (await db.execute(select(func.count(Webhook.id)))).scalar() == 0 + + async def test_create_webhook_with_duplicated_events( + self, db: AsyncSession, async_client: AsyncClient, owner_auth_header: dict + ): + response = await async_client.post( + self.url(), + headers=owner_auth_header, + json={ + "url": "https://example.com/webhook", + "events": [WebhookEvent.response_created, WebhookEvent.response_created], + }, + ) + + assert response.status_code == 422 + assert (await db.execute(select(func.count(Webhook.id)))).scalar() == 0 + + @pytest.mark.parametrize("invalid_description", ["", "d" * 1001]) + async def test_create_webhook_with_invalid_description( + self, db: AsyncSession, async_client: AsyncClient, owner_auth_header: dict, invalid_description: str + ): + response = await async_client.post( + self.url(), + headers=owner_auth_header, + json={ + "url": "https://example.com/webhook", + "events": [WebhookEvent.response_created], + "description": invalid_description, + }, + ) + + assert response.status_code == 422 + assert (await db.execute(select(func.count(Webhook.id)))).scalar() == 0 + + async def test_create_webhook_with_description_as_none( + self, db: AsyncSession, async_client: AsyncClient, owner_auth_header: dict + ): + response = await async_client.post( + self.url(), + headers=owner_auth_header, + json={ + "url": "https://example.com/webhook", + "events": [WebhookEvent.response_created], + "description": None, + }, + ) + + assert response.status_code == 201 + assert response.json()["description"] == None + + assert (await db.execute(select(func.count(Webhook.id)))).scalar() == 1 + webhook = (await db.execute(select(Webhook))).scalar_one() + assert webhook.description == None + + async def test_create_webhook_without_url( + self, db: AsyncSession, async_client: AsyncClient, owner_auth_header: dict + ): + response = await async_client.post( + self.url(), + headers=owner_auth_header, + json={ + "events": [WebhookEvent.response_created], + }, + ) + + assert response.status_code == 422 + assert (await db.execute(select(func.count(Webhook.id)))).scalar() == 0 + + async def test_create_webhook_without_events( + self, db: AsyncSession, async_client: AsyncClient, owner_auth_header: dict + ): + response = await async_client.post( + self.url(), + headers=owner_auth_header, + json={ + "url": "https://example.com/webhook", + }, + ) + + assert response.status_code == 422 + assert (await db.execute(select(func.count(Webhook.id)))).scalar() == 0 + + async def test_create_webhook_reaching_maximum_number_of_webhooks( + self, db: AsyncSession, async_client: AsyncClient, owner_auth_header: dict + ): + await WebhookFactory.create_batch(10) + + response = await async_client.post( + self.url(), + headers=owner_auth_header, + json={ + "url": "https://example.com/webhook", + "events": [WebhookEvent.response_created], + "description": "Test webhook", + }, + ) + + assert response.status_code == 422 + assert response.json() == {"detail": "You can't create more than 10 webhooks. Please delete some of them first"} + + assert (await db.execute(select(func.count(Webhook.id)))).scalar() == 10 diff --git a/argilla-server/tests/unit/api/handlers/v1/webhooks/test_delete_webhook.py b/argilla-server/tests/unit/api/handlers/v1/webhooks/test_delete_webhook.py new file mode 100644 index 0000000000..f48b12dcb6 --- /dev/null +++ b/argilla-server/tests/unit/api/handlers/v1/webhooks/test_delete_webhook.py @@ -0,0 +1,93 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from httpx import AsyncClient +from uuid import UUID, uuid4 +from sqlalchemy import func, select +from sqlalchemy.ext.asyncio import AsyncSession + +from argilla_server.webhooks.v1.enums import WebhookEvent +from argilla_server.models import Webhook +from argilla_server.constants import API_KEY_HEADER_NAME + +from tests.factories import AdminFactory, AnnotatorFactory, WebhookFactory + + +@pytest.mark.asyncio +class TestDeleteWebhook: + def url(self, webhook_id: UUID) -> str: + return f"/api/v1/webhooks/{webhook_id}" + + async def test_delete_webhook(self, db: AsyncSession, async_client: AsyncClient, owner_auth_header: dict): + webhook = await WebhookFactory.create() + + response = await async_client.delete(self.url(webhook.id), headers=owner_auth_header) + + assert response.status_code == 200 + assert response.json() == { + "id": str(webhook.id), + "url": webhook.url, + "secret": webhook.secret, + "events": [WebhookEvent.response_created], + "enabled": True, + "description": None, + "inserted_at": webhook.inserted_at.isoformat(), + "updated_at": webhook.updated_at.isoformat(), + } + + assert (await db.execute(select(func.count(Webhook.id)))).scalar() == 0 + + async def test_delete_webhook_as_admin(self, db: AsyncSession, async_client: AsyncClient): + admin = await AdminFactory.create() + + webhook = await WebhookFactory.create() + + response = await async_client.delete( + self.url(webhook.id), + headers={API_KEY_HEADER_NAME: admin.api_key}, + ) + + assert response.status_code == 403 + assert (await db.execute(select(func.count(Webhook.id)))).scalar() == 1 + + async def test_delete_webhook_as_annotator(self, db: AsyncSession, async_client: AsyncClient): + annotator = await AnnotatorFactory.create() + + webhook = await WebhookFactory.create() + + response = await async_client.delete( + self.url(webhook.id), + headers={API_KEY_HEADER_NAME: annotator.api_key}, + ) + + assert response.status_code == 403 + assert (await db.execute(select(func.count(Webhook.id)))).scalar() == 1 + + async def test_delete_webhook_without_authentication(self, db: AsyncSession, async_client: AsyncClient): + webhook = await WebhookFactory.create() + + response = await async_client.delete(self.url(webhook.id)) + + assert response.status_code == 401 + assert (await db.execute(select(func.count(Webhook.id)))).scalar() == 1 + + async def test_delete_webhook_with_nonexistent_webhook_id(self, async_client: AsyncClient, owner_auth_header: dict): + webhook_id = uuid4() + + response = await async_client.delete(self.url(webhook_id), headers=owner_auth_header) + + assert response.status_code == 404 + assert response.json() == {"detail": f"Webhook with id `{webhook_id}` not found"} diff --git a/argilla-server/tests/unit/api/handlers/v1/webhooks/test_list_webhooks.py b/argilla-server/tests/unit/api/handlers/v1/webhooks/test_list_webhooks.py new file mode 100644 index 0000000000..5738e23f5a --- /dev/null +++ b/argilla-server/tests/unit/api/handlers/v1/webhooks/test_list_webhooks.py @@ -0,0 +1,90 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from httpx import AsyncClient + +from argilla_server.webhooks.v1.enums import WebhookEvent +from argilla_server.constants import API_KEY_HEADER_NAME + +from tests.factories import AdminFactory, AnnotatorFactory, WebhookFactory + + +@pytest.mark.asyncio +class TestListWebhooks: + def url(self) -> str: + return "/api/v1/webhooks" + + async def test_list_webhooks(self, async_client: AsyncClient, owner_auth_header: dict): + webhooks = await WebhookFactory.create_batch(2) + + response = await async_client.get(self.url(), headers=owner_auth_header) + + assert response.status_code == 200 + assert response.json() == { + "items": [ + { + "id": str(webhooks[0].id), + "url": webhooks[0].url, + "secret": webhooks[0].secret, + "events": [WebhookEvent.response_created], + "enabled": True, + "description": None, + "inserted_at": webhooks[0].inserted_at.isoformat(), + "updated_at": webhooks[0].updated_at.isoformat(), + }, + { + "id": str(webhooks[1].id), + "url": webhooks[1].url, + "secret": webhooks[1].secret, + "events": [WebhookEvent.response_created], + "enabled": True, + "description": None, + "inserted_at": webhooks[1].inserted_at.isoformat(), + "updated_at": webhooks[1].updated_at.isoformat(), + }, + ], + } + + async def test_list_webhooks_without_webhooks(self, async_client: AsyncClient, owner_auth_header: dict): + response = await async_client.get(self.url(), headers=owner_auth_header) + + assert response.status_code == 200 + assert response.json() == {"items": []} + + async def test_list_webhooks_as_admin(self, async_client: AsyncClient): + admin = await AdminFactory.create() + + response = await async_client.get( + self.url(), + headers={API_KEY_HEADER_NAME: admin.api_key}, + ) + + assert response.status_code == 403 + + async def test_list_webhooks_as_annotator(self, async_client: AsyncClient): + annotator = await AnnotatorFactory.create() + + response = await async_client.get( + self.url(), + headers={API_KEY_HEADER_NAME: annotator.api_key}, + ) + + assert response.status_code == 403 + + async def test_list_webhooks_without_authentication(self, async_client: AsyncClient): + response = await async_client.get(self.url()) + + assert response.status_code == 401 diff --git a/argilla-server/tests/unit/api/handlers/v1/webhooks/test_ping_webhook.py b/argilla-server/tests/unit/api/handlers/v1/webhooks/test_ping_webhook.py new file mode 100644 index 0000000000..949c91e5e7 --- /dev/null +++ b/argilla-server/tests/unit/api/handlers/v1/webhooks/test_ping_webhook.py @@ -0,0 +1,98 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +import respx +import json + +from uuid import UUID, uuid4 +from httpx import AsyncClient, Response +from standardwebhooks.webhooks import Webhook + +from argilla_server.contexts import info +from argilla_server.constants import API_KEY_HEADER_NAME + +from tests.factories import AdminFactory, AnnotatorFactory, WebhookFactory + + +@pytest.mark.asyncio +class TestPingWebhook: + def url(self, webhook_id: UUID) -> str: + return f"/api/v1/webhooks/{webhook_id}/ping" + + async def test_ping_webhook(self, async_client: AsyncClient, owner_auth_header: dict, respx_mock): + webhook = await WebhookFactory.create() + + respx_mock.post(webhook.url).mock(return_value=Response(200)) + response = await async_client.post( + self.url(webhook.id), + headers=owner_auth_header, + ) + + assert response.status_code == 204 + + request, _ = respx.calls.last + timestamp = json.loads(request.content)["timestamp"] + + wh = Webhook(webhook.secret) + assert wh.verify(headers=request.headers, data=request.content) == { + "type": "ping", + "version": 1, + "timestamp": timestamp, + "data": { + "agent": "argilla-server", + "version": info.argilla_version(), + }, + } + + async def test_ping_webhook_as_admin(self, async_client: AsyncClient, respx_mock): + admin = await AdminFactory.create() + webhook = await WebhookFactory.create() + + respx_mock.post(webhook.url).mock(return_value=Response(200)) + response = await async_client.post( + self.url(webhook.id), + headers={API_KEY_HEADER_NAME: admin.api_key}, + ) + + assert response.status_code == 403 + + async def test_ping_webhook_as_annotator(self, async_client: AsyncClient): + annotator = await AnnotatorFactory.create() + webhook = await WebhookFactory.create() + + response = await async_client.post( + self.url(webhook.id), + headers={API_KEY_HEADER_NAME: annotator.api_key}, + ) + + assert response.status_code == 403 + + async def test_ping_webhook_without_authentication(self, async_client: AsyncClient): + webhook = await WebhookFactory.create() + + response = await async_client.post(self.url(webhook.id)) + + assert response.status_code == 401 + + async def test_ping_webhook_with_nonexistent_webhook_id(self, async_client: AsyncClient, owner_auth_header: dict): + webhook_id = uuid4() + + response = await async_client.post( + self.url(webhook_id), + headers=owner_auth_header, + ) + + assert response.status_code == 404 + assert response.json() == {"detail": f"Webhook with id `{webhook_id}` not found"} diff --git a/argilla-server/tests/unit/api/handlers/v1/webhooks/test_update_webhook.py b/argilla-server/tests/unit/api/handlers/v1/webhooks/test_update_webhook.py new file mode 100644 index 0000000000..ad2cc0bb30 --- /dev/null +++ b/argilla-server/tests/unit/api/handlers/v1/webhooks/test_update_webhook.py @@ -0,0 +1,439 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from uuid import UUID, uuid4 +from httpx import AsyncClient +from typing import Any + +from argilla_server.webhooks.v1.enums import WebhookEvent +from argilla_server.constants import API_KEY_HEADER_NAME + +from tests.factories import AdminFactory, AnnotatorFactory, WebhookFactory + + +@pytest.mark.asyncio +class TestUpdateWebhook: + def url(self, webhook_id: UUID) -> str: + return f"/api/v1/webhooks/{webhook_id}" + + async def test_update_webhook(self, async_client: AsyncClient, owner_auth_header: dict): + webhook = await WebhookFactory.create() + + response = await async_client.patch( + self.url(webhook.id), + headers=owner_auth_header, + json={ + "url": "https://example.com/webhook", + "events": [ + WebhookEvent.response_created, + WebhookEvent.response_updated, + ], + "enabled": False, + "description": "Test webhook", + }, + ) + + assert response.status_code == 200 + assert response.json() == { + "id": str(webhook.id), + "url": "https://example.com/webhook", + "secret": webhook.secret, + "events": [ + WebhookEvent.response_created, + WebhookEvent.response_updated, + ], + "enabled": False, + "description": "Test webhook", + "inserted_at": webhook.inserted_at.isoformat(), + "updated_at": webhook.updated_at.isoformat(), + } + + assert webhook.url == "https://example.com/webhook" + assert webhook.events == [ + WebhookEvent.response_created, + WebhookEvent.response_updated, + ] + + async def test_update_webhook_with_url(self, async_client: AsyncClient, owner_auth_header: dict): + webhook = await WebhookFactory.create() + + response = await async_client.patch( + self.url(webhook.id), + headers=owner_auth_header, + json={ + "url": "https://example.com/webhook", + }, + ) + + assert response.status_code == 200 + assert response.json() == { + "id": str(webhook.id), + "url": "https://example.com/webhook", + "secret": webhook.secret, + "events": webhook.events, + "enabled": True, + "description": None, + "inserted_at": webhook.inserted_at.isoformat(), + "updated_at": webhook.updated_at.isoformat(), + } + + assert webhook.url == "https://example.com/webhook" + + async def test_update_webhook_with_ip_address_url(self, async_client: AsyncClient, owner_auth_header: dict): + webhook = await WebhookFactory.create() + + response = await async_client.patch( + self.url(webhook.id), + headers=owner_auth_header, + json={ + "url": "https://1.1.1.1:9999/webhook", + }, + ) + + assert response.status_code == 200 + assert response.json()["url"] == "https://1.1.1.1:9999/webhook" + + async def test_update_webhook_with_events(self, async_client: AsyncClient, owner_auth_header: dict): + webhook = await WebhookFactory.create() + + response = await async_client.patch( + self.url(webhook.id), + headers=owner_auth_header, + json={ + "events": [WebhookEvent.response_updated], + }, + ) + + assert response.status_code == 200 + assert response.json() == { + "id": str(webhook.id), + "url": webhook.url, + "secret": webhook.secret, + "events": [WebhookEvent.response_updated], + "enabled": True, + "description": None, + "inserted_at": webhook.inserted_at.isoformat(), + "updated_at": webhook.updated_at.isoformat(), + } + + assert webhook.events == [WebhookEvent.response_updated] + + async def test_update_webhook_with_enabled(self, async_client: AsyncClient, owner_auth_header: dict): + webhook = await WebhookFactory.create() + + response = await async_client.patch( + self.url(webhook.id), + headers=owner_auth_header, + json={ + "enabled": False, + }, + ) + + assert response.status_code == 200 + assert response.json() == { + "id": str(webhook.id), + "url": webhook.url, + "secret": webhook.secret, + "events": webhook.events, + "enabled": False, + "description": None, + "inserted_at": webhook.inserted_at.isoformat(), + "updated_at": webhook.updated_at.isoformat(), + } + + assert webhook.enabled == False + + async def test_update_webhook_with_description(self, async_client: AsyncClient, owner_auth_header: dict): + webhook = await WebhookFactory.create() + + response = await async_client.patch( + self.url(webhook.id), + headers=owner_auth_header, + json={ + "description": "Test webhook", + }, + ) + + assert response.status_code == 200 + assert response.json() == { + "id": str(webhook.id), + "url": webhook.url, + "secret": webhook.secret, + "events": webhook.events, + "enabled": True, + "description": "Test webhook", + "inserted_at": webhook.inserted_at.isoformat(), + "updated_at": webhook.updated_at.isoformat(), + } + + assert webhook.description == "Test webhook" + + async def test_update_webhook_without_changes(self, async_client: AsyncClient, owner_auth_header: dict): + webhook = await WebhookFactory.create() + + response = await async_client.patch( + self.url(webhook.id), + headers=owner_auth_header, + json={}, + ) + + assert response.status_code == 200 + assert response.json() == { + "id": str(webhook.id), + "url": webhook.url, + "secret": webhook.secret, + "events": webhook.events, + "enabled": True, + "description": None, + "inserted_at": webhook.inserted_at.isoformat(), + "updated_at": webhook.updated_at.isoformat(), + } + + async def test_update_webhook_as_admin(self, async_client: AsyncClient): + admin = await AdminFactory.create() + + webhook = await WebhookFactory.create() + + response = await async_client.patch( + self.url(webhook.id), + headers={API_KEY_HEADER_NAME: admin.api_key}, + json={ + "url": "https://example.com/webhook", + "events": [WebhookEvent.response_updated], + }, + ) + + assert response.status_code == 403 + + assert webhook.url != "https://example.com/webhook" + assert webhook.events != [WebhookEvent.response_updated] + + async def test_update_webhook_as_annotator(self, async_client: AsyncClient): + annotator = await AnnotatorFactory.create() + + webhook = await WebhookFactory.create() + + response = await async_client.patch( + self.url(webhook.id), + headers={API_KEY_HEADER_NAME: annotator.api_key}, + json={ + "url": "https://example.com/webhook", + "events": [WebhookEvent.response_updated], + }, + ) + + assert response.status_code == 403 + + assert webhook.url != "https://example.com/webhook" + assert webhook.events != [WebhookEvent.response_updated] + + async def test_update_webhook_without_authentication(self, async_client: AsyncClient): + webhook = await WebhookFactory.create() + + response = await async_client.patch( + self.url(webhook.id), + json={ + "url": "https://example.com/webhook", + "events": [WebhookEvent.response_updated], + }, + ) + + assert response.status_code == 401 + + assert webhook.url != "https://example.com/webhook" + assert webhook.events != [WebhookEvent.response_updated] + + @pytest.mark.parametrize("invalid_url", ["", "example.com", "http:example.com", "https:example.com"]) + async def test_update_webhook_with_invalid_url( + self, async_client: AsyncClient, owner_auth_header: dict, invalid_url: str + ): + webhook = await WebhookFactory.create() + + response = await async_client.patch( + self.url(webhook.id), + headers=owner_auth_header, + json={ + "url": invalid_url, + "events": [WebhookEvent.response_updated], + }, + ) + + assert response.status_code == 422 + + assert webhook.url != invalid_url + assert webhook.events != [WebhookEvent.response_updated] + + @pytest.mark.parametrize( + "invalid_events", [[], ["invalid_event"], [WebhookEvent.response_updated, "invalid_event"]] + ) + async def test_update_webhook_with_invalid_events( + self, async_client: AsyncClient, owner_auth_header: dict, invalid_events: list + ): + webhook = await WebhookFactory.create() + + response = await async_client.patch( + self.url(webhook.id), + headers=owner_auth_header, + json={ + "url": "https://example.com/webhook", + "events": invalid_events, + }, + ) + + assert response.status_code == 422 + + assert webhook.url != "https://example.com/webhook" + assert webhook.events != invalid_events + + async def test_update_webhook_with_duplicated_events(self, async_client: AsyncClient, owner_auth_header: dict): + webhook = await WebhookFactory.create() + + response = await async_client.patch( + self.url(webhook.id), + headers=owner_auth_header, + json={ + "events": [WebhookEvent.response_updated, WebhookEvent.response_updated], + }, + ) + + assert response.status_code == 422 + assert webhook.events != [WebhookEvent.response_updated, WebhookEvent.response_updated] + + @pytest.mark.parametrize("invalid_enabled", ["", "invalid", 123]) + async def test_update_webhook_with_invalid_enabled( + self, async_client: AsyncClient, owner_auth_header: dict, invalid_enabled: Any + ): + webhook = await WebhookFactory.create() + + response = await async_client.patch( + self.url(webhook.id), + headers=owner_auth_header, + json={ + "enabled": invalid_enabled, + }, + ) + + assert response.status_code == 422 + assert webhook.enabled != invalid_enabled + + @pytest.mark.parametrize("invalid_description", ["", "d" * 1001]) + async def test_update_webhook_with_invalid_description( + self, async_client: AsyncClient, owner_auth_header: dict, invalid_description: str + ): + webhook = await WebhookFactory.create() + + response = await async_client.patch( + self.url(webhook.id), + headers=owner_auth_header, + json={ + "description": invalid_description, + }, + ) + + assert response.status_code == 422 + assert webhook.description != invalid_description + + async def test_update_webhook_with_url_as_none(self, async_client: AsyncClient, owner_auth_header: dict): + webhook = await WebhookFactory.create() + + response = await async_client.patch( + self.url(webhook.id), + headers=owner_auth_header, + json={ + "url": None, + "events": [WebhookEvent.response_updated], + }, + ) + + assert response.status_code == 422 + + assert webhook.url != None + assert webhook.events != [WebhookEvent.response_updated] + + async def test_update_webhook_with_enabled_as_none(self, async_client: AsyncClient, owner_auth_header: dict): + webhook = await WebhookFactory.create() + + response = await async_client.patch( + self.url(webhook.id), + headers=owner_auth_header, + json={ + "enabled": None, + }, + ) + + assert response.status_code == 422 + assert webhook.enabled != None + + async def test_update_webhook_with_events_as_none(self, async_client: AsyncClient, owner_auth_header: dict): + webhook = await WebhookFactory.create() + + response = await async_client.patch( + self.url(webhook.id), + headers=owner_auth_header, + json={ + "url": "https://example.com/webhook", + "events": None, + }, + ) + + assert response.status_code == 422 + + assert webhook.url != "https://example.com/webhook" + assert webhook.events != None + + async def test_update_webhook_with_description_as_none(self, async_client: AsyncClient, owner_auth_header: dict): + webhook = await WebhookFactory.create(description="Test webhook") + + response = await async_client.patch( + self.url(webhook.id), + headers=owner_auth_header, + json={ + "url": "https://example.com/webhook", + "events": [WebhookEvent.response_updated], + "description": None, + }, + ) + + assert response.status_code == 200 + assert response.json() == { + "id": str(webhook.id), + "url": "https://example.com/webhook", + "secret": webhook.secret, + "events": [WebhookEvent.response_updated], + "enabled": True, + "description": None, + "inserted_at": webhook.inserted_at.isoformat(), + "updated_at": webhook.updated_at.isoformat(), + } + + assert webhook.url == "https://example.com/webhook" + assert webhook.events == [WebhookEvent.response_updated] + assert webhook.description == None + + async def test_update_webhook_with_nonexistent_webhook_id(self, async_client: AsyncClient, owner_auth_header: dict): + webhook_id = uuid4() + + response = await async_client.patch( + self.url(webhook_id), + headers=owner_auth_header, + json={ + "url": "https://example.com/webhook", + "events": [WebhookEvent.response_updated], + }, + ) + + assert response.status_code == 404 + assert response.json() == {"detail": f"Webhook with id `{webhook_id}` not found"} diff --git a/argilla-server/tests/unit/jobs/__init__.py b/argilla-server/tests/unit/jobs/__init__.py new file mode 100644 index 0000000000..4b6cecae7f --- /dev/null +++ b/argilla-server/tests/unit/jobs/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/argilla-server/tests/unit/jobs/webhook_jobs/__init__.py b/argilla-server/tests/unit/jobs/webhook_jobs/__init__.py new file mode 100644 index 0000000000..4b6cecae7f --- /dev/null +++ b/argilla-server/tests/unit/jobs/webhook_jobs/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/argilla-server/tests/unit/jobs/webhook_jobs/test_enqueue_notify_events.py b/argilla-server/tests/unit/jobs/webhook_jobs/test_enqueue_notify_events.py new file mode 100644 index 0000000000..4f50f00ca2 --- /dev/null +++ b/argilla-server/tests/unit/jobs/webhook_jobs/test_enqueue_notify_events.py @@ -0,0 +1,58 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from datetime import datetime +import pytest + +from fastapi.encoders import jsonable_encoder +from sqlalchemy.ext.asyncio import AsyncSession + +from argilla_server.jobs.queues import HIGH_QUEUE +from argilla_server.jobs.webhook_jobs import enqueue_notify_events +from argilla_server.webhooks.v1.enums import ResponseEvent +from argilla_server.webhooks.v1.responses import build_response_event + +from tests.factories import ResponseFactory, WebhookFactory + + +@pytest.mark.asyncio +class TestEnqueueNotifyEvents: + async def test_enqueue_notify_events(self, db: AsyncSession): + response = await ResponseFactory.create() + + webhooks = await WebhookFactory.create_batch(2, events=[ResponseEvent.created]) + webhooks_disabled = await WebhookFactory.create_batch(2, events=[ResponseEvent.created], enabled=False) + webhooks_with_other_events = await WebhookFactory.create_batch(2, events=[ResponseEvent.deleted]) + + event = await build_response_event(db, ResponseEvent.created, response) + jsonable_data = jsonable_encoder(event.data) + + await enqueue_notify_events( + db=db, + event=ResponseEvent.created, + timestamp=event.timestamp, + data=jsonable_data, + ) + + assert HIGH_QUEUE.count == 2 + + assert HIGH_QUEUE.jobs[0].args[0] == webhooks[0].id + assert HIGH_QUEUE.jobs[0].args[1] == ResponseEvent.created + assert HIGH_QUEUE.jobs[0].args[2] == event.timestamp + assert HIGH_QUEUE.jobs[0].args[3] == jsonable_data + + assert HIGH_QUEUE.jobs[1].args[0] == webhooks[1].id + assert HIGH_QUEUE.jobs[1].args[1] == ResponseEvent.created + assert HIGH_QUEUE.jobs[1].args[2] == event.timestamp + assert HIGH_QUEUE.jobs[1].args[3] == jsonable_data diff --git a/argilla-server/tests/unit/models/__init__.py b/argilla-server/tests/unit/models/__init__.py new file mode 100644 index 0000000000..4b6cecae7f --- /dev/null +++ b/argilla-server/tests/unit/models/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/argilla-server/tests/unit/models/test_webhook.py b/argilla-server/tests/unit/models/test_webhook.py new file mode 100644 index 0000000000..a33e99a0c0 --- /dev/null +++ b/argilla-server/tests/unit/models/test_webhook.py @@ -0,0 +1,30 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from tests.factories import WebhookFactory + + +@pytest.mark.asyncio +class TestWebhook: + async def test_secret_is_generated_by_default(self): + webhook = await WebhookFactory.create() + + assert webhook.secret + + async def test_secret_is_generated_by_default_individually(self): + webhooks = await WebhookFactory.create_batch(2) + + assert webhooks[0].secret != webhooks[1].secret diff --git a/argilla-server/tests/unit/webhooks/__init__.py b/argilla-server/tests/unit/webhooks/__init__.py new file mode 100644 index 0000000000..4b6cecae7f --- /dev/null +++ b/argilla-server/tests/unit/webhooks/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/argilla-server/tests/unit/webhooks/v1/__init__.py b/argilla-server/tests/unit/webhooks/v1/__init__.py new file mode 100644 index 0000000000..4b6cecae7f --- /dev/null +++ b/argilla-server/tests/unit/webhooks/v1/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/argilla-server/tests/unit/webhooks/v1/test_notify_ping_event.py b/argilla-server/tests/unit/webhooks/v1/test_notify_ping_event.py new file mode 100644 index 0000000000..1fc7f4ae26 --- /dev/null +++ b/argilla-server/tests/unit/webhooks/v1/test_notify_ping_event.py @@ -0,0 +1,52 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +import respx +import json + +from httpx import Response +from standardwebhooks.webhooks import Webhook + +from argilla_server.webhooks.v1.enums import WebhookEvent +from argilla_server.webhooks.v1.ping import notify_ping_event +from argilla_server.contexts import info + +from tests.factories import WebhookFactory + + +@pytest.mark.asyncio +class TestNotifyPingEvent: + async def test_notify_ping_event(self, respx_mock): + webhook = await WebhookFactory.create() + + respx_mock.post(webhook.url).mock(return_value=Response(200)) + response = notify_ping_event(webhook) + + assert response.status_code == 200 + + request, _ = respx.calls.last + timestamp = json.loads(request.content)["timestamp"] + + wh = Webhook(webhook.secret) + assert wh.verify(headers=request.headers, data=request.content) == { + "type": "ping", + "version": 1, + "timestamp": timestamp, + "data": { + "agent": "argilla-server", + "version": info.argilla_version(), + }, + } diff --git a/argilla/CHANGELOG.md b/argilla/CHANGELOG.md index b3374854cf..d26a7f4f1d 100644 --- a/argilla/CHANGELOG.md +++ b/argilla/CHANGELOG.md @@ -21,6 +21,7 @@ These are the section headers that we use: ### Added - Added `Argilla.deploy_on_spaces` to deploy the Argilla server on Hugging Face Spaces. ([#5547](https://github.com/argilla-io/argilla/pull/5547)) +- Add support to webhooks. ([#5467](https://github.com/argilla-io/argilla/pull/5467)) ### Changed diff --git a/argilla/docs/how_to_guides/index.md b/argilla/docs/how_to_guides/index.md index ab28c5ba27..75c49cd616 100644 --- a/argilla/docs/how_to_guides/index.md +++ b/argilla/docs/how_to_guides/index.md @@ -90,6 +90,23 @@ These guides provide step-by-step instructions for common scenarios, including d [:octicons-arrow-right-24: How-to guide](custom_fields.md) +- __Use webhooks to respond to server events__ + + --- + + Learn how to use Argilla webhooks to receive notifications about events in your Argilla Server. + + [:octicons-arrow-right-24: How-to guide](webhooks.md) + +- __Webhooks internals__ + + --- + + Learn how Argilla webhooks are implented under the hood and the structure of the different events. + + [:octicons-arrow-right-24: How-to guide](webhooks_internals.md) + + - __Use Markdown to format rich content__ --- @@ -106,4 +123,4 @@ These guides provide step-by-step instructions for common scenarios, including d [:octicons-arrow-right-24: How-to guide](migrate_from_legacy_datasets.md) -
\ No newline at end of file +
diff --git a/argilla/docs/how_to_guides/webhooks.md b/argilla/docs/how_to_guides/webhooks.md new file mode 100644 index 0000000000..0b0ea0f214 --- /dev/null +++ b/argilla/docs/how_to_guides/webhooks.md @@ -0,0 +1,160 @@ +--- +description: In this section, we will provide a step-by-step guide to create a webhook in Argilla. +--- + +# Use Argilla webhooks + +This guide provides an overview of how to create and use webhooks in Argilla. + +A **webhook** allows an application to submit real-time information to other applications whenever a specific event occurs. Unlike traditional APIs, you won’t need to poll for data very frequently in order to get it in real time. This makes webhooks much more efficient for both the provider and the consumer. + +## Creating a webhook listener in Argilla + +The python SDK provides a simple way to create a webhook in Argilla. It allows you to focus on the use case of the webhook and not on the implementation details. You only need to create your event handler function with the `webhook_listener` decorator. + +```python +import argilla as rg + +from datetime import datetime +from argilla import webhook_listener + +@webhook_listener(events="dataset.created") +async def my_webhook_handler(dataset: rg.Dataset, type: str, timestamp: datetime): + print(dataset, type, timestamp) +``` + +In the example above, we have created a webhook that listens to the `dataset.created` event. +> You can find the list of events in the [Events](#events) section. + +The python SDK will automatically create a webhook in Argilla and listen to the specified event. When the event is triggered, +the `my_webhook_handler` function will be called with the event data. The SDK will also parse the incoming webhook event into +a proper resource object (`rg.Dataset`, `rg.Record`, and `rg.Response`). The SDK will also take care of request authentication and error handling. + +## Running the webhook server + +Under the hood, the SDK uses the `FastAPI` framework to create the webhook server and the POST endpoint to receive the webhook events. + +To run the webhook, you need to define the webhook server in your code and start it using the `uvicorn` command. + +```python +# my_webhook.py file +from argilla import get_webhook_server + +server = get_webhook_server() +``` + +```bash +uvicorn my_webhook:server +``` + +You can explore the Swagger UI to explore your defined webhooks by visiting `http://localhost:8000/docs`. + + +The `uvicorn` command will start the webhook server on the default port `8000`. + +By default, the Python SDK will register the webhook using the server URL `http://127.0.0.1:8000/`. If you want to use a different server URL, you can set the `WEBHOOK_SERVER_URL` environment variable. + +```bash +export WEBHOOK_SERVER_URL=http://my-webhook-server.com +``` + +All incoming webhook events will be sent to the specified server URL. + +## Webhooks management + +The Python SDK provides a simple way to manage webhooks in Argilla. You can create, list, update, and delete webhooks using the SDK. + +### Create a webhook + +To create a new webhook in Argilla, you can define it in the `Webhook` class and then call the `create` method. + +```python +import argilla as rg + +client = rg.Argilla(api_url="", api_key="") + +webhook = rg.Webhook( + url="http://127.0.0.1:8000", + events=["dataset.created"], + description="My webhook" +) + +webhook.create() + +``` + +### List webhooks + +You can list all the existing webhooks in Argilla by accessing the `webhooks` attribute on the Argilla class and iterating over them. + +```python +import argilla as rg + +client = rg.Argilla(api_url="", api_key="") + +for webhook in client.webhooks: + print(webhook) + +``` + +### Update a webhook + +You can update a webhook using the `update` method. + +```python +import argilla as rg + +client = rg.Argilla(api_url="", api_key="") + +webhook = rg.Webhook( + url="http://127.0.0.1:8000", + events=["dataset.created"], + description="My webhook" +).create() + +webhook.events = ["dataset.updated"] +webhook.update() + +``` +> You should use IP address instead of localhost since the webhook validation expect a Top Level Domain (TLD) in the URL. + +### Delete a webhook + +You can delete a webhook using the `delete` method. + +```python +import argilla as rg + +client = rg.Argilla(api_url="", api_key="") + +for webhook in client.webhooks: + webhook.delete() + +``` + +## Deploying a webhooks server in a Hugging Face Space + +You can deploy your webhook in a Hugging Face Space. You can visit this [link](https://huggingface.co/spaces/argilla/argilla-webhooks/tree/main) to explore an example of a webhook server deployed in a Hugging Face Space. + + +## Events + +The following is a list of events that you can listen to in Argilla, grouped by resource type. + +### Dataset events + +- `dataset.created`: The Dataset resource was created. +- `dataset.updated`: The Dataset resource was updated. +- `dataset.deleted`: The Dataset resource was deleted. +- `dataset.published`: The Dataset resource was published. + +### Record events +- `record.created`: The Record resource was created. +- `record.updated`: The Record resource was updated. +- `record.deleted`: The Record resource was deleted. +- `record.completed`: The Record resource was completed (status="completed"). + +### Response events +- `response.created`: The Response resource was created. +- `response.updated`: The Response resource was updated. +- `response.deleted`: The Response resource was deleted. diff --git a/argilla/docs/how_to_guides/webhooks_internals.md b/argilla/docs/how_to_guides/webhooks_internals.md new file mode 100644 index 0000000000..180d9a0e28 --- /dev/null +++ b/argilla/docs/how_to_guides/webhooks_internals.md @@ -0,0 +1,1863 @@ +# Webhooks internal + +Argilla Webhooks implements [Standard Webhooks](https://www.standardwebhooks.com) to facilitate the integration of Argilla with listeners written in any language and ensure consistency and security. If you need to do a custom integration with Argilla webhooks take a look to the [specs](https://github.com/standard-webhooks/standard-webhooks/blob/main/spec/standard-webhooks.md) to have a better understanding of how to implement such integration. + +## Events payload + +The payload is the core part of every webhook. It is the actual data being sent as part of the webhook, and usually consists of important information about the event and related information. + +The payloads sent by Argilla webhooks will be a POST request with a JSON body with the following structure: + +```json +{ + "type": "example.event", + "version": 1, + "timestamp": "2022-11-03T20:26:10.344522Z", + "data": { + "foo": "bar", + } +} +``` + +Your listener must return any `2XX` status code value to indicate to Argilla that the webhook message has been successfully received. If a different status code is returned Argilla will retry up to 3 times. You have up to 20 seconds to give a response to an Argilla webhook request. + +The payload attributes are: + +* `type`: a full-stop delimited type string associated with the event. The type indicates the type of the event being sent. (e.g `"dataset.created"` or `"record.completed"`), indicates the schema of the payload (passed in `data` attribute).The following are the values that can be present on this attribute: + * `dataset.created` + * `dataset.updated` + * `dataset.deleted` + * `dataset.published` + * `record.created` + * `record.updated` + * `record.deleted` + * `record.completed` + * `response.created` + * `response.updated` + * `response.deleted` +* `version`: an integer with the version of the webhook payload sent. Right now we only support version `1`. +* `timestamp`: the timestamp of when the event occurred. +* `data`: the actual event data associated with the event. + +## Events payload examples + +In this section we will show payload examples for all the events emitted by Argilla webhooks. + +### Dataset events + +#### Created + +```json +{ + "type": "dataset.created", + "version": 1, + "timestamp": "2024-09-26T14:17:20.488053Z", + "data": { + "id": "3d673549-ad31-4485-97eb-31f9dcd0df71", + "name": "fineweb-edu-min", + "guidelines": null, + "allow_extra_metadata": true, + "status": "draft", + "distribution": { + "strategy": "overlap", + "min_submitted": 1 + }, + "workspace": { + "id": "350bc020-2cd2-4a67-8b23-37a15c4d8139", + "name": "argilla", + "inserted_at": "2024-09-05T11:39:20.377192", + "updated_at": "2024-09-05T11:39:20.377192" + }, + "questions": [], + "fields": [], + "metadata_properties": [], + "vectors_settings": [], + "last_activity_at": "2024-09-26T14:17:20.477163", + "inserted_at": "2024-09-26T14:17:20.477163", + "updated_at": "2024-09-26T14:17:20.477163" + } +} +``` + +#### Updated + +```json +{ + "type": "dataset.updated", + "version": 1, + "timestamp": "2024-09-26T14:17:20.504483Z", + "data": { + "id": "3d673549-ad31-4485-97eb-31f9dcd0df71", + "name": "fineweb-edu-min", + "guidelines": null, + "allow_extra_metadata": false, + "status": "draft", + "distribution": { + "strategy": "overlap", + "min_submitted": 1 + }, + "workspace": { + "id": "350bc020-2cd2-4a67-8b23-37a15c4d8139", + "name": "argilla", + "inserted_at": "2024-09-05T11:39:20.377192", + "updated_at": "2024-09-05T11:39:20.377192" + }, + "questions": [], + "fields": [ + { + "id": "77578693-9925-4c3d-a921-8c964cdd7acd", + "name": "text", + "title": "text", + "required": true, + "settings": { + "type": "text", + "use_markdown": false + }, + "inserted_at": "2024-09-26T14:17:20.528738", + "updated_at": "2024-09-26T14:17:20.528738" + } + ] + "metadata_properties": [], + "vectors_settings": [], + "last_activity_at": "2024-09-26T14:17:20.497343", + "inserted_at": "2024-09-26T14:17:20.477163", + "updated_at": "2024-09-26T14:17:20.497343" + } +} +``` + +#### Deleted + +```json +{ + "type": "dataset.deleted", + "version": 1, + "timestamp": "2024-09-26T14:21:44.261872Z", + "data": { + "id": "3d673549-ad31-4485-97eb-31f9dcd0df71", + "name": "fineweb-edu-min", + "guidelines": null, + "allow_extra_metadata": false, + "status": "ready", + "distribution": { + "strategy": "overlap", + "min_submitted": 1 + }, + "workspace": { + "id": "350bc020-2cd2-4a67-8b23-37a15c4d8139", + "name": "argilla", + "inserted_at": "2024-09-05T11:39:20.377192", + "updated_at": "2024-09-05T11:39:20.377192" + }, + "questions": [ + { + "id": "80069251-4792-49e7-b58a-69a6117e8d32", + "name": "int_score", + "title": "Rate the quality of the text", + "description": null, + "required": true, + "settings": { + "type": "rating", + "options": [ + { + "value": 0 + }, + { + "value": 1 + }, + { + "value": 2 + }, + { + "value": 3 + }, + { + "value": 4 + }, + { + "value": 5 + } + ] + }, + "inserted_at": "2024-09-26T14:17:20.541716", + "updated_at": "2024-09-26T14:17:20.541716" + }, + { + "id": "5e7b45c3-b863-48c8-a1e8-2caa279b71e7", + "name": "comments", + "title": "Comments:", + "description": null, + "required": false, + "settings": { + "type": "text", + "use_markdown": false + }, + "inserted_at": "2024-09-26T14:17:20.551750", + "updated_at": "2024-09-26T14:17:20.551750" + } + ], + "fields": [ + { + "id": "77578693-9925-4c3d-a921-8c964cdd7acd", + "name": "text", + "title": "text", + "required": true, + "settings": { + "type": "text", + "use_markdown": false + }, + "inserted_at": "2024-09-26T14:17:20.528738", + "updated_at": "2024-09-26T14:17:20.528738" + } + ], + "metadata_properties": [ + { + "id": "284945d9-4bda-4fde-9ca0-b3928282ce83", + "name": "dump", + "title": "dump", + "settings": { + "type": "terms", + "values": null + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-26T14:17:20.560704", + "updated_at": "2024-09-26T14:17:20.560704" + }, + { + "id": "5b8f17e5-1be5-4d99-b3d3-567cfaf33fe3", + "name": "url", + "title": "url", + "settings": { + "type": "terms", + "values": null + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-26T14:17:20.570162", + "updated_at": "2024-09-26T14:17:20.570162" + }, + { + "id": "a18c60ca-0212-4b22-b1f4-ab3e0fc5ae95", + "name": "language", + "title": "language", + "settings": { + "type": "terms", + "values": null + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-26T14:17:20.578088", + "updated_at": "2024-09-26T14:17:20.578088" + }, + { + "id": "c5f6d407-87b7-4678-9c7b-28cd002fcefb", + "name": "language_score", + "title": "language_score", + "settings": { + "min": null, + "max": null, + "type": "float" + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-26T14:17:20.585319", + "updated_at": "2024-09-26T14:17:20.585319" + }, + { + "id": "ed3ee682-5d12-4c58-91a2-b1cca89fe62b", + "name": "token_count", + "title": "token_count", + "settings": { + "min": null, + "max": null, + "type": "integer" + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-26T14:17:20.593545", + "updated_at": "2024-09-26T14:17:20.593545" + }, + { + "id": "c807d5dd-3cf0-47b9-b07e-bcf03176115f", + "name": "score", + "title": "score", + "settings": { + "min": null, + "max": null, + "type": "float" + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-26T14:17:20.601316", + "updated_at": "2024-09-26T14:17:20.601316" + } + ], + "vectors_settings": [], + "last_activity_at": "2024-09-26T14:17:20.675364", + "inserted_at": "2024-09-26T14:17:20.477163", + "updated_at": "2024-09-26T14:17:20.675364" + } +} +``` + +#### Published + +```json +{ + "type": "dataset.published", + "version": 1, + "timestamp": "2024-09-26T14:17:20.680921Z", + "data": { + "id": "3d673549-ad31-4485-97eb-31f9dcd0df71", + "name": "fineweb-edu-min", + "guidelines": null, + "allow_extra_metadata": false, + "status": "ready", + "distribution": { + "strategy": "overlap", + "min_submitted": 1 + }, + "workspace": { + "id": "350bc020-2cd2-4a67-8b23-37a15c4d8139", + "name": "argilla", + "inserted_at": "2024-09-05T11:39:20.377192", + "updated_at": "2024-09-05T11:39:20.377192" + }, + "questions": [ + { + "id": "80069251-4792-49e7-b58a-69a6117e8d32", + "name": "int_score", + "title": "Rate the quality of the text", + "description": null, + "required": true, + "settings": { + "type": "rating", + "options": [ + { + "value": 0 + }, + { + "value": 1 + }, + { + "value": 2 + }, + { + "value": 3 + }, + { + "value": 4 + }, + { + "value": 5 + } + ] + }, + "inserted_at": "2024-09-26T14:17:20.541716", + "updated_at": "2024-09-26T14:17:20.541716" + }, + { + "id": "5e7b45c3-b863-48c8-a1e8-2caa279b71e7", + "name": "comments", + "title": "Comments:", + "description": null, + "required": false, + "settings": { + "type": "text", + "use_markdown": false + }, + "inserted_at": "2024-09-26T14:17:20.551750", + "updated_at": "2024-09-26T14:17:20.551750" + } + ], + "fields": [ + { + "id": "77578693-9925-4c3d-a921-8c964cdd7acd", + "name": "text", + "title": "text", + "required": true, + "settings": { + "type": "text", + "use_markdown": false + }, + "inserted_at": "2024-09-26T14:17:20.528738", + "updated_at": "2024-09-26T14:17:20.528738" + } + ], + "metadata_properties": [ + { + "id": "284945d9-4bda-4fde-9ca0-b3928282ce83", + "name": "dump", + "title": "dump", + "settings": { + "type": "terms", + "values": null + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-26T14:17:20.560704", + "updated_at": "2024-09-26T14:17:20.560704" + }, + { + "id": "5b8f17e5-1be5-4d99-b3d3-567cfaf33fe3", + "name": "url", + "title": "url", + "settings": { + "type": "terms", + "values": null + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-26T14:17:20.570162", + "updated_at": "2024-09-26T14:17:20.570162" + }, + { + "id": "a18c60ca-0212-4b22-b1f4-ab3e0fc5ae95", + "name": "language", + "title": "language", + "settings": { + "type": "terms", + "values": null + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-26T14:17:20.578088", + "updated_at": "2024-09-26T14:17:20.578088" + }, + { + "id": "c5f6d407-87b7-4678-9c7b-28cd002fcefb", + "name": "language_score", + "title": "language_score", + "settings": { + "min": null, + "max": null, + "type": "float" + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-26T14:17:20.585319", + "updated_at": "2024-09-26T14:17:20.585319" + }, + { + "id": "ed3ee682-5d12-4c58-91a2-b1cca89fe62b", + "name": "token_count", + "title": "token_count", + "settings": { + "min": null, + "max": null, + "type": "integer" + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-26T14:17:20.593545", + "updated_at": "2024-09-26T14:17:20.593545" + }, + { + "id": "c807d5dd-3cf0-47b9-b07e-bcf03176115f", + "name": "score", + "title": "score", + "settings": { + "min": null, + "max": null, + "type": "float" + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-26T14:17:20.601316", + "updated_at": "2024-09-26T14:17:20.601316" + } + ], + "vectors_settings": [], + "last_activity_at": "2024-09-26T14:17:20.675364", + "inserted_at": "2024-09-26T14:17:20.477163", + "updated_at": "2024-09-26T14:17:20.675364" + } +} +``` + +### Record events + +#### Created + +```json +{ + "type": "record.created", + "version": 1, + "timestamp": "2024-09-26T14:17:43.078165Z", + "data": { + "id": "49e0acda-df13-4f65-8137-2274b3e33c9c", + "status": "pending", + "fields": { + "text": "Taking Play Seriously\nBy ROBIN MARANTZ HENIG\nPublished: February 17, 2008\nOn a drizzly Tuesday night in late January, 200 people came out to hear a psychiatrist talk rhapsodically about play -- not just the intense, joyous play of children, but play for all people, at all ages, at all times." + }, + "metadata": { + "dump": "CC-MAIN-2013-20", + "url": "http://query.nytimes.com/gst/fullpage.html?res=9404E7DA1339F934A25751C0A96E9C8B63&scp=2&sq=taking%20play%20seriously&st=cse", + "language": "en", + "language_score": 0.9614589214324951, + "token_count": 1055, + "score": 2.5625 + }, + "external_id": "", + "dataset": { + "id": "3d673549-ad31-4485-97eb-31f9dcd0df71", + "name": "fineweb-edu-min", + "guidelines": null, + "allow_extra_metadata": false, + "status": "ready", + "distribution": { + "strategy": "overlap", + "min_submitted": 1 + }, + "workspace": { + "id": "350bc020-2cd2-4a67-8b23-37a15c4d8139", + "name": "argilla", + "inserted_at": "2024-09-05T11:39:20.377192", + "updated_at": "2024-09-05T11:39:20.377192" + }, + "questions": [ + { + "id": "80069251-4792-49e7-b58a-69a6117e8d32", + "name": "int_score", + "title": "Rate the quality of the text", + "description": null, + "required": true, + "settings": { + "type": "rating", + "options": [ + { + "value": 0 + }, + { + "value": 1 + }, + { + "value": 2 + }, + { + "value": 3 + }, + { + "value": 4 + }, + { + "value": 5 + } + ] + }, + "inserted_at": "2024-09-26T14:17:20.541716", + "updated_at": "2024-09-26T14:17:20.541716" + }, + { + "id": "5e7b45c3-b863-48c8-a1e8-2caa279b71e7", + "name": "comments", + "title": "Comments:", + "description": null, + "required": false, + "settings": { + "type": "text", + "use_markdown": false + }, + "inserted_at": "2024-09-26T14:17:20.551750", + "updated_at": "2024-09-26T14:17:20.551750" + } + ], + "fields": [ + { + "id": "77578693-9925-4c3d-a921-8c964cdd7acd", + "name": "text", + "title": "text", + "required": true, + "settings": { + "type": "text", + "use_markdown": false + }, + "inserted_at": "2024-09-26T14:17:20.528738", + "updated_at": "2024-09-26T14:17:20.528738" + } + ], + "metadata_properties": [ + { + "id": "284945d9-4bda-4fde-9ca0-b3928282ce83", + "name": "dump", + "title": "dump", + "settings": { + "type": "terms", + "values": null + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-26T14:17:20.560704", + "updated_at": "2024-09-26T14:17:20.560704" + }, + { + "id": "5b8f17e5-1be5-4d99-b3d3-567cfaf33fe3", + "name": "url", + "title": "url", + "settings": { + "type": "terms", + "values": null + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-26T14:17:20.570162", + "updated_at": "2024-09-26T14:17:20.570162" + }, + { + "id": "a18c60ca-0212-4b22-b1f4-ab3e0fc5ae95", + "name": "language", + "title": "language", + "settings": { + "type": "terms", + "values": null + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-26T14:17:20.578088", + "updated_at": "2024-09-26T14:17:20.578088" + }, + { + "id": "c5f6d407-87b7-4678-9c7b-28cd002fcefb", + "name": "language_score", + "title": "language_score", + "settings": { + "min": null, + "max": null, + "type": "float" + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-26T14:17:20.585319", + "updated_at": "2024-09-26T14:17:20.585319" + }, + { + "id": "ed3ee682-5d12-4c58-91a2-b1cca89fe62b", + "name": "token_count", + "title": "token_count", + "settings": { + "min": null, + "max": null, + "type": "integer" + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-26T14:17:20.593545", + "updated_at": "2024-09-26T14:17:20.593545" + }, + { + "id": "c807d5dd-3cf0-47b9-b07e-bcf03176115f", + "name": "score", + "title": "score", + "settings": { + "min": null, + "max": null, + "type": "float" + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-26T14:17:20.601316", + "updated_at": "2024-09-26T14:17:20.601316" + } + ], + "vectors_settings": [], + "last_activity_at": "2024-09-26T14:17:20.675364", + "inserted_at": "2024-09-26T14:17:20.477163", + "updated_at": "2024-09-26T14:17:20.675364" + }, + "inserted_at": "2024-09-26T14:17:43.026852", + "updated_at": "2024-09-26T14:17:43.026852" + } +} +``` + +#### Updated + +```json +{ + "type": "record.updated", + "version": 1, + "timestamp": "2024-09-26T14:05:30.231988Z", + "data": { + "id": "88654411-4eec-4d17-ad73-e5baf59d0efb", + "status": "completed", + "fields": { + "text": "Throughout life there are many times when outside influences change or influence decision-making. The young child has inner motivation to learn and explore, but as he matures, finds outside sources to be a motivating force for development, as well." + }, + "metadata": { + "dump": "CC-MAIN-2013-20", + "url": "http://www.funderstanding.com/category/child-development/brain-child-development/", + "language": "en", + "language_score": 0.9633054733276367, + "token_count": 1062, + "score": 3.8125 + }, + "external_id": "", + "dataset": { + "id": "ae2961f0-18a4-49d5-ba0c-40fa863fc8f2", + "name": "fineweb-edu-min", + "guidelines": null, + "allow_extra_metadata": false, + "status": "ready", + "distribution": { + "strategy": "overlap", + "min_submitted": 1 + }, + "workspace": { + "id": "350bc020-2cd2-4a67-8b23-37a15c4d8139", + "name": "argilla", + "inserted_at": "2024-09-05T11:39:20.377192", + "updated_at": "2024-09-05T11:39:20.377192" + }, + "questions": [ + { + "id": "faeea416-5390-4721-943c-de7d0212ba20", + "name": "int_score", + "title": "Rate the quality of the text", + "description": null, + "required": true, + "settings": { + "type": "rating", + "options": [ + { + "value": 0 + }, + { + "value": 1 + }, + { + "value": 2 + }, + { + "value": 3 + }, + { + "value": 4 + }, + { + "value": 5 + } + ] + }, + "inserted_at": "2024-09-20T09:39:20.481193", + "updated_at": "2024-09-20T09:39:20.481193" + }, + { + "id": "0e14a758-a6d0-43ff-af5b-39f4e4d031ab", + "name": "comments", + "title": "Comments:", + "description": null, + "required": false, + "settings": { + "type": "text", + "use_markdown": false + }, + "inserted_at": "2024-09-20T09:39:20.490851", + "updated_at": "2024-09-20T09:39:20.490851" + } + ], + "fields": [ + { + "id": "a4e81325-7d11-4dcf-af23-d3c867c75c9c", + "name": "text", + "title": "text", + "required": true, + "settings": { + "type": "text", + "use_markdown": false + }, + "inserted_at": "2024-09-20T09:39:20.468254", + "updated_at": "2024-09-20T09:39:20.468254" + } + ], + "metadata_properties": [ + { + "id": "1259d700-2ff6-4315-a3c7-703bce3d65d7", + "name": "dump", + "title": "dump", + "settings": { + "type": "terms", + "values": null + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-20T09:39:20.499466", + "updated_at": "2024-09-20T09:39:20.499466" + }, + { + "id": "9d135f00-5a51-4506-a607-bc463dce1c2f", + "name": "url", + "title": "url", + "settings": { + "type": "terms", + "values": null + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-20T09:39:20.507944", + "updated_at": "2024-09-20T09:39:20.507944" + }, + { + "id": "98eced0d-d92f-486c-841c-a55085c7538b", + "name": "language", + "title": "language", + "settings": { + "type": "terms", + "values": null + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-20T09:39:20.517551", + "updated_at": "2024-09-20T09:39:20.517551" + }, + { + "id": "b9f9a3b9-7186-4e23-9147-b5aa52d0d045", + "name": "language_score", + "title": "language_score", + "settings": { + "min": null, + "max": null, + "type": "float" + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-20T09:39:20.526219", + "updated_at": "2024-09-20T09:39:20.526219" + }, + { + "id": "0585c420-5885-4fce-9757-82c5199304bc", + "name": "token_count", + "title": "token_count", + "settings": { + "min": null, + "max": null, + "type": "integer" + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-20T09:39:20.534559", + "updated_at": "2024-09-20T09:39:20.534559" + }, + { + "id": "ae31acb5-f198-4f0b-8d6c-13fcc80d10d1", + "name": "score", + "title": "score", + "settings": { + "min": null, + "max": null, + "type": "float" + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-20T09:39:20.544562", + "updated_at": "2024-09-20T09:39:20.544562" + } + ], + "vectors_settings": [], + "last_activity_at": "2024-09-26T14:05:30.129734", + "inserted_at": "2024-09-20T09:39:20.433798", + "updated_at": "2024-09-26T14:05:30.130662" + }, + "inserted_at": "2024-09-20T09:39:23.148539", + "updated_at": "2024-09-26T14:05:30.224076" + } +} +``` + +#### Deleted + +```json +{ + "type": "record.deleted", + "version": 1, + "timestamp": "2024-09-26T14:45:30.464503Z", + "data": { + "id": "5b285767-18c9-46ab-a4ec-5e0ee4e26de9", + "status": "pending", + "fields": { + "text": "This tutorial shows how to send modifications of code in the right way: by using patches.\nThe word developer is used here for someone having a KDE SVN account.\nWe suppose that you have modified some code in KDE and that you are ready to share it. First a few important points:\nNow you have the modification as a source file. Sending the source file will not be helpful, as probably someone else has done other modifications to the original file in the meantime. So your modified file could not replace it." + }, + "metadata": { + "dump": "CC-MAIN-2013-20", + "url": "http://techbase.kde.org/index.php?title=Contribute/Send_Patches&oldid=40759", + "language": "en", + "language_score": 0.9597765207290649, + "token_count": 2482, + "score": 3.0625 + }, + "external_id": "", + "dataset": { + "id": "ae2961f0-18a4-49d5-ba0c-40fa863fc8f2", + "name": "fineweb-edu-min", + "guidelines": null, + "allow_extra_metadata": false, + "status": "ready", + "distribution": { + "strategy": "overlap", + "min_submitted": 1 + }, + "workspace": { + "id": "350bc020-2cd2-4a67-8b23-37a15c4d8139", + "name": "argilla", + "inserted_at": "2024-09-05T11:39:20.377192", + "updated_at": "2024-09-05T11:39:20.377192" + }, + "questions": [ + { + "id": "faeea416-5390-4721-943c-de7d0212ba20", + "name": "int_score", + "title": "Rate the quality of the text", + "description": null, + "required": true, + "settings": { + "type": "rating", + "options": [ + { + "value": 0 + }, + { + "value": 1 + }, + { + "value": 2 + }, + { + "value": 3 + }, + { + "value": 4 + }, + { + "value": 5 + } + ] + }, + "inserted_at": "2024-09-20T09:39:20.481193", + "updated_at": "2024-09-20T09:39:20.481193" + }, + { + "id": "0e14a758-a6d0-43ff-af5b-39f4e4d031ab", + "name": "comments", + "title": "Comments:", + "description": null, + "required": false, + "settings": { + "type": "text", + "use_markdown": false + }, + "inserted_at": "2024-09-20T09:39:20.490851", + "updated_at": "2024-09-20T09:39:20.490851" + } + ], + "fields": [ + { + "id": "a4e81325-7d11-4dcf-af23-d3c867c75c9c", + "name": "text", + "title": "text", + "required": true, + "settings": { + "type": "text", + "use_markdown": false + }, + "inserted_at": "2024-09-20T09:39:20.468254", + "updated_at": "2024-09-20T09:39:20.468254" + } + ], + "metadata_properties": [ + { + "id": "1259d700-2ff6-4315-a3c7-703bce3d65d7", + "name": "dump", + "title": "dump", + "settings": { + "type": "terms", + "values": null + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-20T09:39:20.499466", + "updated_at": "2024-09-20T09:39:20.499466" + }, + { + "id": "9d135f00-5a51-4506-a607-bc463dce1c2f", + "name": "url", + "title": "url", + "settings": { + "type": "terms", + "values": null + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-20T09:39:20.507944", + "updated_at": "2024-09-20T09:39:20.507944" + }, + { + "id": "98eced0d-d92f-486c-841c-a55085c7538b", + "name": "language", + "title": "language", + "settings": { + "type": "terms", + "values": null + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-20T09:39:20.517551", + "updated_at": "2024-09-20T09:39:20.517551" + }, + { + "id": "b9f9a3b9-7186-4e23-9147-b5aa52d0d045", + "name": "language_score", + "title": "language_score", + "settings": { + "min": null, + "max": null, + "type": "float" + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-20T09:39:20.526219", + "updated_at": "2024-09-20T09:39:20.526219" + }, + { + "id": "0585c420-5885-4fce-9757-82c5199304bc", + "name": "token_count", + "title": "token_count", + "settings": { + "min": null, + "max": null, + "type": "integer" + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-20T09:39:20.534559", + "updated_at": "2024-09-20T09:39:20.534559" + }, + { + "id": "ae31acb5-f198-4f0b-8d6c-13fcc80d10d1", + "name": "score", + "title": "score", + "settings": { + "min": null, + "max": null, + "type": "float" + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-20T09:39:20.544562", + "updated_at": "2024-09-20T09:39:20.544562" + } + ], + "vectors_settings": [], + "last_activity_at": "2024-09-26T14:15:11.139023", + "inserted_at": "2024-09-20T09:39:20.433798", + "updated_at": "2024-09-26T14:15:11.141067" + }, + "inserted_at": "2024-09-20T09:39:23.148687", + "updated_at": "2024-09-20T09:39:23.148687" + } +} +``` + +#### Completed + +```json +{ + "type": "record.completed", + "version": 1, + "timestamp": "2024-09-26T14:05:30.236958Z", + "data": { + "id": "88654411-4eec-4d17-ad73-e5baf59d0efb", + "status": "completed", + "fields": { + "text": "Throughout life there are many times when outside influences change or influence decision-making. The young child has inner motivation to learn and explore, but as he matures, finds outside sources to be a motivating force for development, as well." + }, + "metadata": { + "dump": "CC-MAIN-2013-20", + "url": "http://www.funderstanding.com/category/child-development/brain-child-development/", + "language": "en", + "language_score": 0.9633054733276367, + "token_count": 1062, + "score": 3.8125 + }, + "external_id": "", + "dataset": { + "id": "ae2961f0-18a4-49d5-ba0c-40fa863fc8f2", + "name": "fineweb-edu-min", + "guidelines": null, + "allow_extra_metadata": false, + "status": "ready", + "distribution": { + "strategy": "overlap", + "min_submitted": 1 + }, + "workspace": { + "id": "350bc020-2cd2-4a67-8b23-37a15c4d8139", + "name": "argilla", + "inserted_at": "2024-09-05T11:39:20.377192", + "updated_at": "2024-09-05T11:39:20.377192" + }, + "questions": [ + { + "id": "faeea416-5390-4721-943c-de7d0212ba20", + "name": "int_score", + "title": "Rate the quality of the text", + "description": null, + "required": true, + "settings": { + "type": "rating", + "options": [ + { + "value": 0 + }, + { + "value": 1 + }, + { + "value": 2 + }, + { + "value": 3 + }, + { + "value": 4 + }, + { + "value": 5 + } + ] + }, + "inserted_at": "2024-09-20T09:39:20.481193", + "updated_at": "2024-09-20T09:39:20.481193" + }, + { + "id": "0e14a758-a6d0-43ff-af5b-39f4e4d031ab", + "name": "comments", + "title": "Comments:", + "description": null, + "required": false, + "settings": { + "type": "text", + "use_markdown": false + }, + "inserted_at": "2024-09-20T09:39:20.490851", + "updated_at": "2024-09-20T09:39:20.490851" + } + ], + "fields": [ + { + "id": "a4e81325-7d11-4dcf-af23-d3c867c75c9c", + "name": "text", + "title": "text", + "required": true, + "settings": { + "type": "text", + "use_markdown": false + }, + "inserted_at": "2024-09-20T09:39:20.468254", + "updated_at": "2024-09-20T09:39:20.468254" + } + ], + "metadata_properties": [ + { + "id": "1259d700-2ff6-4315-a3c7-703bce3d65d7", + "name": "dump", + "title": "dump", + "settings": { + "type": "terms", + "values": null + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-20T09:39:20.499466", + "updated_at": "2024-09-20T09:39:20.499466" + }, + { + "id": "9d135f00-5a51-4506-a607-bc463dce1c2f", + "name": "url", + "title": "url", + "settings": { + "type": "terms", + "values": null + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-20T09:39:20.507944", + "updated_at": "2024-09-20T09:39:20.507944" + }, + { + "id": "98eced0d-d92f-486c-841c-a55085c7538b", + "name": "language", + "title": "language", + "settings": { + "type": "terms", + "values": null + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-20T09:39:20.517551", + "updated_at": "2024-09-20T09:39:20.517551" + }, + { + "id": "b9f9a3b9-7186-4e23-9147-b5aa52d0d045", + "name": "language_score", + "title": "language_score", + "settings": { + "min": null, + "max": null, + "type": "float" + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-20T09:39:20.526219", + "updated_at": "2024-09-20T09:39:20.526219" + }, + { + "id": "0585c420-5885-4fce-9757-82c5199304bc", + "name": "token_count", + "title": "token_count", + "settings": { + "min": null, + "max": null, + "type": "integer" + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-20T09:39:20.534559", + "updated_at": "2024-09-20T09:39:20.534559" + }, + { + "id": "ae31acb5-f198-4f0b-8d6c-13fcc80d10d1", + "name": "score", + "title": "score", + "settings": { + "min": null, + "max": null, + "type": "float" + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-20T09:39:20.544562", + "updated_at": "2024-09-20T09:39:20.544562" + } + ], + "vectors_settings": [], + "last_activity_at": "2024-09-26T14:05:30.129734", + "inserted_at": "2024-09-20T09:39:20.433798", + "updated_at": "2024-09-26T14:05:30.130662" + }, + "inserted_at": "2024-09-20T09:39:23.148539", + "updated_at": "2024-09-26T14:05:30.224076" + } +} +``` + +### Response events + +#### Created + +```json +{ + "type": "response.created", + "version": 1, + "timestamp": "2024-09-26T14:05:30.182364Z", + "data": { + "id": "7164a58e-3611-4b0a-98cc-9184bc92dc5a", + "values": { + "int_score": { + "value": 3 + } + }, + "status": "submitted", + "record": { + "id": "88654411-4eec-4d17-ad73-e5baf59d0efb", + "status": "pending", + "fields": { + "text": "Throughout life there are many times when outside influences change or influence decision-making. The young child has inner motivation to learn and explore, but as he matures, finds outside sources to be a motivating force for development, as well." + }, + "metadata": { + "dump": "CC-MAIN-2013-20", + "url": "http://www.funderstanding.com/category/child-development/brain-child-development/", + "language": "en", + "language_score": 0.9633054733276367, + "token_count": 1062, + "score": 3.8125 + }, + "external_id": "", + "dataset": { + "id": "ae2961f0-18a4-49d5-ba0c-40fa863fc8f2", + "name": "fineweb-edu-min", + "guidelines": null, + "allow_extra_metadata": false, + "status": "ready", + "distribution": { + "strategy": "overlap", + "min_submitted": 1 + }, + "workspace": { + "id": "350bc020-2cd2-4a67-8b23-37a15c4d8139", + "name": "argilla", + "inserted_at": "2024-09-05T11:39:20.377192", + "updated_at": "2024-09-05T11:39:20.377192" + }, + "questions": [ + { + "id": "faeea416-5390-4721-943c-de7d0212ba20", + "name": "int_score", + "title": "Rate the quality of the text", + "description": null, + "required": true, + "settings": { + "type": "rating", + "options": [ + { + "value": 0 + }, + { + "value": 1 + }, + { + "value": 2 + }, + { + "value": 3 + }, + { + "value": 4 + }, + { + "value": 5 + } + ] + }, + "inserted_at": "2024-09-20T09:39:20.481193", + "updated_at": "2024-09-20T09:39:20.481193" + }, + { + "id": "0e14a758-a6d0-43ff-af5b-39f4e4d031ab", + "name": "comments", + "title": "Comments:", + "description": null, + "required": false, + "settings": { + "type": "text", + "use_markdown": false + }, + "inserted_at": "2024-09-20T09:39:20.490851", + "updated_at": "2024-09-20T09:39:20.490851" + } + ], + "fields": [ + { + "id": "a4e81325-7d11-4dcf-af23-d3c867c75c9c", + "name": "text", + "title": "text", + "required": true, + "settings": { + "type": "text", + "use_markdown": false + }, + "inserted_at": "2024-09-20T09:39:20.468254", + "updated_at": "2024-09-20T09:39:20.468254" + } + ], + "metadata_properties": [ + { + "id": "1259d700-2ff6-4315-a3c7-703bce3d65d7", + "name": "dump", + "title": "dump", + "settings": { + "type": "terms", + "values": null + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-20T09:39:20.499466", + "updated_at": "2024-09-20T09:39:20.499466" + }, + { + "id": "9d135f00-5a51-4506-a607-bc463dce1c2f", + "name": "url", + "title": "url", + "settings": { + "type": "terms", + "values": null + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-20T09:39:20.507944", + "updated_at": "2024-09-20T09:39:20.507944" + }, + { + "id": "98eced0d-d92f-486c-841c-a55085c7538b", + "name": "language", + "title": "language", + "settings": { + "type": "terms", + "values": null + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-20T09:39:20.517551", + "updated_at": "2024-09-20T09:39:20.517551" + }, + { + "id": "b9f9a3b9-7186-4e23-9147-b5aa52d0d045", + "name": "language_score", + "title": "language_score", + "settings": { + "min": null, + "max": null, + "type": "float" + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-20T09:39:20.526219", + "updated_at": "2024-09-20T09:39:20.526219" + }, + { + "id": "0585c420-5885-4fce-9757-82c5199304bc", + "name": "token_count", + "title": "token_count", + "settings": { + "min": null, + "max": null, + "type": "integer" + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-20T09:39:20.534559", + "updated_at": "2024-09-20T09:39:20.534559" + }, + { + "id": "ae31acb5-f198-4f0b-8d6c-13fcc80d10d1", + "name": "score", + "title": "score", + "settings": { + "min": null, + "max": null, + "type": "float" + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-20T09:39:20.544562", + "updated_at": "2024-09-20T09:39:20.544562" + } + ], + "vectors_settings": [], + "last_activity_at": "2024-09-26T14:05:30.129734", + "inserted_at": "2024-09-20T09:39:20.433798", + "updated_at": "2024-09-23T11:08:30.392833" + }, + "inserted_at": "2024-09-20T09:39:23.148539", + "updated_at": "2024-09-20T09:39:23.148539" + }, + "user": { + "id": "df114042-958d-42c6-9f03-ab49bd451c6c", + "first_name": "", + "last_name": null, + "username": "argilla", + "role": "owner", + "inserted_at": "2024-09-05T11:39:20.376463", + "updated_at": "2024-09-05T11:39:20.376463" + }, + "inserted_at": "2024-09-26T14:05:30.128332", + "updated_at": "2024-09-26T14:05:30.128332" + } +} +``` + +#### Updated + +```json +{ + "type": "response.updated", + "version": 1, + "timestamp": "2024-09-26T14:13:22.256501Z", + "data": { + "id": "38e4d537-c768-4ced-916e-31b74b220c36", + "values": { + "int_score": { + "value": 5 + } + }, + "status": "discarded", + "record": { + "id": "54b137ae-68a4-4aa4-ab2f-ef350ca96a6b", + "status": "completed", + "fields": { + "text": "Bolivia: Coca-chewing protest outside US embassy\nIndigenous activists in Bolivia have been holding a mass coca-chewing protest as part of campaign to end an international ban on the practice.\nHundreds of people chewed the leaf outside the US embassy in La Paz and in other cities across the country." + }, + "metadata": { + "dump": "CC-MAIN-2013-20", + "url": "http://www.bbc.co.uk/news/world-latin-america-12292661", + "language": "en", + "language_score": 0.9660392999649048, + "token_count": 484, + "score": 2.703125 + }, + "external_id": "", + "dataset": { + "id": "ae2961f0-18a4-49d5-ba0c-40fa863fc8f2", + "name": "fineweb-edu-min", + "guidelines": null, + "allow_extra_metadata": false, + "status": "ready", + "distribution": { + "strategy": "overlap", + "min_submitted": 1 + }, + "workspace": { + "id": "350bc020-2cd2-4a67-8b23-37a15c4d8139", + "name": "argilla", + "inserted_at": "2024-09-05T11:39:20.377192", + "updated_at": "2024-09-05T11:39:20.377192" + }, + "questions": [ + { + "id": "faeea416-5390-4721-943c-de7d0212ba20", + "name": "int_score", + "title": "Rate the quality of the text", + "description": null, + "required": true, + "settings": { + "type": "rating", + "options": [ + { + "value": 0 + }, + { + "value": 1 + }, + { + "value": 2 + }, + { + "value": 3 + }, + { + "value": 4 + }, + { + "value": 5 + } + ] + }, + "inserted_at": "2024-09-20T09:39:20.481193", + "updated_at": "2024-09-20T09:39:20.481193" + }, + { + "id": "0e14a758-a6d0-43ff-af5b-39f4e4d031ab", + "name": "comments", + "title": "Comments:", + "description": null, + "required": false, + "settings": { + "type": "text", + "use_markdown": false + }, + "inserted_at": "2024-09-20T09:39:20.490851", + "updated_at": "2024-09-20T09:39:20.490851" + } + ], + "fields": [ + { + "id": "a4e81325-7d11-4dcf-af23-d3c867c75c9c", + "name": "text", + "title": "text", + "required": true, + "settings": { + "type": "text", + "use_markdown": false + }, + "inserted_at": "2024-09-20T09:39:20.468254", + "updated_at": "2024-09-20T09:39:20.468254" + } + ], + "metadata_properties": [ + { + "id": "1259d700-2ff6-4315-a3c7-703bce3d65d7", + "name": "dump", + "title": "dump", + "settings": { + "type": "terms", + "values": null + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-20T09:39:20.499466", + "updated_at": "2024-09-20T09:39:20.499466" + }, + { + "id": "9d135f00-5a51-4506-a607-bc463dce1c2f", + "name": "url", + "title": "url", + "settings": { + "type": "terms", + "values": null + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-20T09:39:20.507944", + "updated_at": "2024-09-20T09:39:20.507944" + }, + { + "id": "98eced0d-d92f-486c-841c-a55085c7538b", + "name": "language", + "title": "language", + "settings": { + "type": "terms", + "values": null + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-20T09:39:20.517551", + "updated_at": "2024-09-20T09:39:20.517551" + }, + { + "id": "b9f9a3b9-7186-4e23-9147-b5aa52d0d045", + "name": "language_score", + "title": "language_score", + "settings": { + "min": null, + "max": null, + "type": "float" + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-20T09:39:20.526219", + "updated_at": "2024-09-20T09:39:20.526219" + }, + { + "id": "0585c420-5885-4fce-9757-82c5199304bc", + "name": "token_count", + "title": "token_count", + "settings": { + "min": null, + "max": null, + "type": "integer" + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-20T09:39:20.534559", + "updated_at": "2024-09-20T09:39:20.534559" + }, + { + "id": "ae31acb5-f198-4f0b-8d6c-13fcc80d10d1", + "name": "score", + "title": "score", + "settings": { + "min": null, + "max": null, + "type": "float" + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-20T09:39:20.544562", + "updated_at": "2024-09-20T09:39:20.544562" + } + ], + "vectors_settings": [], + "last_activity_at": "2024-09-26T14:13:22.204670", + "inserted_at": "2024-09-20T09:39:20.433798", + "updated_at": "2024-09-26T14:07:09.788573" + }, + "inserted_at": "2024-09-20T09:39:23.148505", + "updated_at": "2024-09-26T14:06:06.726296" + }, + "user": { + "id": "df114042-958d-42c6-9f03-ab49bd451c6c", + "first_name": "", + "last_name": null, + "username": "argilla", + "role": "owner", + "inserted_at": "2024-09-05T11:39:20.376463", + "updated_at": "2024-09-05T11:39:20.376463" + }, + "inserted_at": "2024-09-26T14:06:06.672138", + "updated_at": "2024-09-26T14:13:22.206179" + } +} +``` + +#### Deleted + +```json +{ + "type": "response.deleted", + "version": 1, + "timestamp": "2024-09-26T14:15:11.138363Z", + "data": { + "id": "7164a58e-3611-4b0a-98cc-9184bc92dc5a", + "values": { + "int_score": { + "value": 3 + } + }, + "status": "submitted", + "record": { + "id": "88654411-4eec-4d17-ad73-e5baf59d0efb", + "status": "completed", + "fields": { + "text": "Throughout life there are many times when outside influences change or influence decision-making. The young child has inner motivation to learn and explore, but as he matures, finds outside sources to be a motivating force for development, as well." + }, + "metadata": { + "dump": "CC-MAIN-2013-20", + "url": "http://www.funderstanding.com/category/child-development/brain-child-development/", + "language": "en", + "language_score": 0.9633054733276367, + "token_count": 1062, + "score": 3.8125 + }, + "external_id": "", + "dataset": { + "id": "ae2961f0-18a4-49d5-ba0c-40fa863fc8f2", + "name": "fineweb-edu-min", + "guidelines": null, + "allow_extra_metadata": false, + "status": "ready", + "distribution": { + "strategy": "overlap", + "min_submitted": 1 + }, + "workspace": { + "id": "350bc020-2cd2-4a67-8b23-37a15c4d8139", + "name": "argilla", + "inserted_at": "2024-09-05T11:39:20.377192", + "updated_at": "2024-09-05T11:39:20.377192" + }, + "questions": [ + { + "id": "faeea416-5390-4721-943c-de7d0212ba20", + "name": "int_score", + "title": "Rate the quality of the text", + "description": null, + "required": true, + "settings": { + "type": "rating", + "options": [ + { + "value": 0 + }, + { + "value": 1 + }, + { + "value": 2 + }, + { + "value": 3 + }, + { + "value": 4 + }, + { + "value": 5 + } + ] + }, + "inserted_at": "2024-09-20T09:39:20.481193", + "updated_at": "2024-09-20T09:39:20.481193" + }, + { + "id": "0e14a758-a6d0-43ff-af5b-39f4e4d031ab", + "name": "comments", + "title": "Comments:", + "description": null, + "required": false, + "settings": { + "type": "text", + "use_markdown": false + }, + "inserted_at": "2024-09-20T09:39:20.490851", + "updated_at": "2024-09-20T09:39:20.490851" + } + ], + "fields": [ + { + "id": "a4e81325-7d11-4dcf-af23-d3c867c75c9c", + "name": "text", + "title": "text", + "required": true, + "settings": { + "type": "text", + "use_markdown": false + }, + "inserted_at": "2024-09-20T09:39:20.468254", + "updated_at": "2024-09-20T09:39:20.468254" + } + ], + "metadata_properties": [ + { + "id": "1259d700-2ff6-4315-a3c7-703bce3d65d7", + "name": "dump", + "title": "dump", + "settings": { + "type": "terms", + "values": null + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-20T09:39:20.499466", + "updated_at": "2024-09-20T09:39:20.499466" + }, + { + "id": "9d135f00-5a51-4506-a607-bc463dce1c2f", + "name": "url", + "title": "url", + "settings": { + "type": "terms", + "values": null + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-20T09:39:20.507944", + "updated_at": "2024-09-20T09:39:20.507944" + }, + { + "id": "98eced0d-d92f-486c-841c-a55085c7538b", + "name": "language", + "title": "language", + "settings": { + "type": "terms", + "values": null + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-20T09:39:20.517551", + "updated_at": "2024-09-20T09:39:20.517551" + }, + { + "id": "b9f9a3b9-7186-4e23-9147-b5aa52d0d045", + "name": "language_score", + "title": "language_score", + "settings": { + "min": null, + "max": null, + "type": "float" + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-20T09:39:20.526219", + "updated_at": "2024-09-20T09:39:20.526219" + }, + { + "id": "0585c420-5885-4fce-9757-82c5199304bc", + "name": "token_count", + "title": "token_count", + "settings": { + "min": null, + "max": null, + "type": "integer" + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-20T09:39:20.534559", + "updated_at": "2024-09-20T09:39:20.534559" + }, + { + "id": "ae31acb5-f198-4f0b-8d6c-13fcc80d10d1", + "name": "score", + "title": "score", + "settings": { + "min": null, + "max": null, + "type": "float" + }, + "visible_for_annotators": true, + "inserted_at": "2024-09-20T09:39:20.544562", + "updated_at": "2024-09-20T09:39:20.544562" + } + ], + "vectors_settings": [], + "last_activity_at": "2024-09-26T14:13:22.204670", + "inserted_at": "2024-09-20T09:39:20.433798", + "updated_at": "2024-09-26T14:13:22.207478" + }, + "inserted_at": "2024-09-20T09:39:23.148539", + "updated_at": "2024-09-26T14:05:30.224076" + }, + "user": { + "id": "df114042-958d-42c6-9f03-ab49bd451c6c", + "first_name": "", + "last_name": null, + "username": "argilla", + "role": "owner", + "inserted_at": "2024-09-05T11:39:20.376463", + "updated_at": "2024-09-05T11:39:20.376463" + }, + "inserted_at": "2024-09-26T14:05:30.128332", + "updated_at": "2024-09-26T14:05:30.128332" + } +} +``` + +## How to implement a listener + +Argilla webhooks implements [Standard Webhooks](https://www.standardwebhooks.com) so you can use one of their libraries to implement the verification of webhooks events coming from Argilla, available in many different languages. + +The following example is a simple listener written in Ruby, using [sinatra](https://sinatrarb.com) and [standardwebhooks Ruby library](https://github.com/standard-webhooks/standard-webhooks/tree/main/libraries/ruby): + +```ruby +require "sinatra" +require "standardwebhooks" + +post "/webhook" do + wh = StandardWebhooks::Webhook.new("YOUR_SECRET") + + headers = { + "webhook-id" => env["HTTP_WEBHOOK_ID"], + "webhook-signature" => env["HTTP_WEBHOOK_SIGNATURE"], + "webhook-timestamp" => env["HTTP_WEBHOOK_TIMESTAMP"], + } + + puts wh.verify(request.body.read.to_s, headers) +end +``` + +In the previous snippet we are creating a [sinatra](https://sinatrarb.com) application that listens for `POST` requests on `/webhook` endpoint. We are using the [standardwebhooks Ruby library](https://github.com/standard-webhooks/standard-webhooks/tree/main/libraries/ruby) to verify the incoming webhook event and printing the verified payload on the console. diff --git a/argilla/docs/reference/argilla/SUMMARY.md b/argilla/docs/reference/argilla/SUMMARY.md index cfe33198e5..49d0ce459d 100644 --- a/argilla/docs/reference/argilla/SUMMARY.md +++ b/argilla/docs/reference/argilla/SUMMARY.md @@ -15,4 +15,5 @@ * [rg.Vector](records/vectors.md) * [rg.Metadata](records/metadata.md) * [rg.Query](search.md) +* [Webhooks](webhooks.md) * [rg.markdown](markdown.md) diff --git a/argilla/docs/reference/argilla/webhooks.md b/argilla/docs/reference/argilla/webhooks.md new file mode 100644 index 0000000000..3f71a4fb32 --- /dev/null +++ b/argilla/docs/reference/argilla/webhooks.md @@ -0,0 +1,61 @@ +--- +hide: footer +--- + +# `argilla.webhooks` + +Webhooks are a way for web applications to notify each other when something happens. For example, you might want to be +notified when a new dataset is created in Argilla. + +## Usage Examples + +To listen for incoming webhooks, you can use the `webhook_listener` decorator function to register a function to be called +when a webhook is received: + +```python +from argilla.webhooks import webhook_listener + +@webhook_listener(events="dataset.created") +async def my_webhook_listener(dataset): + print(dataset) +``` + +To manually create a new webhook, instantiate the `Webhook` object with the client and the name: + +```python +webhook = rg.Webhook( + url="https://somehost.com/webhook", + events=["dataset.created"], + description="My webhook" +) +webhook.create() +``` + +To retrieve a list of existing webhooks, use the `client.webhooks` attribute: + +```python +for webhook in client.webhooks(): + print(webhook) +``` + +--- + +::: src.argilla.webhooks._resource.Webhook + +::: src.argilla.webhooks._helpers.webhook_listener + +::: src.argilla.webhooks._helpers.get_webhook_server + +::: src.argilla.webhooks._helpers.set_webhook_server + +::: src.argilla.webhooks._handler.WebhookHandler + +::: src.argilla.webhooks._event.WebhookEvent + +::: src.argilla.webhooks._event.DatasetEvent + +::: src.argilla.webhooks._event.RecordEvent + +::: src.argilla.webhooks._event.UserResponseEvent + + diff --git a/argilla/mkdocs.yml b/argilla/mkdocs.yml index 03fbf91d4e..fa98ee4e95 100644 --- a/argilla/mkdocs.yml +++ b/argilla/mkdocs.yml @@ -174,6 +174,7 @@ nav: - Import and export datasets: how_to_guides/import_export.md - Advanced: - Custom fields with layout templates: how_to_guides/custom_fields.md + - Use webhooks to respond to server events: how_to_guides/webhooks.md - Use Markdown to format rich content: how_to_guides/use_markdown_to_format_rich_content.md - Migrate users, workspaces and datasets to Argilla V2: how_to_guides/migrate_from_legacy_datasets.md - Tutorials: diff --git a/argilla/pdm.lock b/argilla/pdm.lock index 5b23b84233..ad9998a2ba 100644 --- a/argilla/pdm.lock +++ b/argilla/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "dev"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:61de6748832ba9a87d522476b8f3af1a80d83fbea5389c2c3c47dbf60fdffc66" +content_hash = "sha256:154336258f112fb111f039e0099a194a54ee424d267a3d70290e115acda22154" [[metadata.targets]] requires_python = ">=3.9" @@ -560,6 +560,20 @@ files = [ {file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"}, ] +[[package]] +name = "deprecated" +version = "1.2.15" +requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" +summary = "Python @deprecated decorator to deprecate old python classes, functions or methods." +groups = ["default"] +dependencies = [ + "wrapt<2,>=1.10", +] +files = [ + {file = "Deprecated-1.2.15-py2.py3-none-any.whl", hash = "sha256:353bc4a8ac4bfc96800ddab349d89c25dec1079f65fd53acdcc1e0b975b21320"}, + {file = "deprecated-1.2.15.tar.gz", hash = "sha256:683e561a90de76239796e6b6feac66b99030d2dd3fcf61ef996330f14bbb9b0d"}, +] + [[package]] name = "dill" version = "0.3.8" @@ -2682,6 +2696,24 @@ files = [ {file = "stack_data-0.6.3.tar.gz", hash = "sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9"}, ] +[[package]] +name = "standardwebhooks" +version = "1.0.0" +requires_python = ">=3.6" +summary = "Standard Webhooks" +groups = ["default"] +dependencies = [ + "Deprecated", + "attrs>=21.3.0", + "httpx>=0.23.0", + "python-dateutil", + "types-Deprecated", + "types-python-dateutil", +] +files = [ + {file = "standardwebhooks-1.0.0.tar.gz", hash = "sha256:d94b99c0dcea84156e03adad94f8dba32d5454cc68e12ec2c824051b55bb67ff"}, +] + [[package]] name = "tinycss2" version = "1.3.0" @@ -2753,6 +2785,28 @@ files = [ {file = "traitlets-5.14.3.tar.gz", hash = "sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7"}, ] +[[package]] +name = "types-deprecated" +version = "1.2.15.20241117" +requires_python = ">=3.8" +summary = "Typing stubs for Deprecated" +groups = ["default"] +files = [ + {file = "types-Deprecated-1.2.15.20241117.tar.gz", hash = "sha256:924002c8b7fddec51ba4949788a702411a2e3636cd9b2a33abd8ee119701d77e"}, + {file = "types_Deprecated-1.2.15.20241117-py3-none-any.whl", hash = "sha256:a0cc5e39f769fc54089fd8e005416b55d74aa03f6964d2ed1a0b0b2e28751884"}, +] + +[[package]] +name = "types-python-dateutil" +version = "2.9.0.20241003" +requires_python = ">=3.8" +summary = "Typing stubs for python-dateutil" +groups = ["default"] +files = [ + {file = "types-python-dateutil-2.9.0.20241003.tar.gz", hash = "sha256:58cb85449b2a56d6684e41aeefb4c4280631246a0da1a719bdbe6f3fb0317446"}, + {file = "types_python_dateutil-2.9.0.20241003-py3-none-any.whl", hash = "sha256:250e1d8e80e7bbc3a6c99b907762711d1a1cdd00e978ad39cb5940f6f0a87f3d"}, +] + [[package]] name = "typing-extensions" version = "4.12.2" @@ -2872,6 +2926,57 @@ files = [ {file = "webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"}, ] +[[package]] +name = "wrapt" +version = "1.16.0" +requires_python = ">=3.6" +summary = "Module for decorators, wrappers and monkey patching." +groups = ["default"] +files = [ + {file = "wrapt-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ffa565331890b90056c01db69c0fe634a776f8019c143a5ae265f9c6bc4bd6d4"}, + {file = "wrapt-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e4fdb9275308292e880dcbeb12546df7f3e0f96c6b41197e0cf37d2826359020"}, + {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb2dee3874a500de01c93d5c71415fcaef1d858370d405824783e7a8ef5db440"}, + {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2a88e6010048489cda82b1326889ec075a8c856c2e6a256072b28eaee3ccf487"}, + {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac83a914ebaf589b69f7d0a1277602ff494e21f4c2f743313414378f8f50a4cf"}, + {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:73aa7d98215d39b8455f103de64391cb79dfcad601701a3aa0dddacf74911d72"}, + {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:807cc8543a477ab7422f1120a217054f958a66ef7314f76dd9e77d3f02cdccd0"}, + {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bf5703fdeb350e36885f2875d853ce13172ae281c56e509f4e6eca049bdfb136"}, + {file = "wrapt-1.16.0-cp310-cp310-win32.whl", hash = "sha256:f6b2d0c6703c988d334f297aa5df18c45e97b0af3679bb75059e0e0bd8b1069d"}, + {file = "wrapt-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:decbfa2f618fa8ed81c95ee18a387ff973143c656ef800c9f24fb7e9c16054e2"}, + {file = "wrapt-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1a5db485fe2de4403f13fafdc231b0dbae5eca4359232d2efc79025527375b09"}, + {file = "wrapt-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:75ea7d0ee2a15733684badb16de6794894ed9c55aa5e9903260922f0482e687d"}, + {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a452f9ca3e3267cd4d0fcf2edd0d035b1934ac2bd7e0e57ac91ad6b95c0c6389"}, + {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:43aa59eadec7890d9958748db829df269f0368521ba6dc68cc172d5d03ed8060"}, + {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:72554a23c78a8e7aa02abbd699d129eead8b147a23c56e08d08dfc29cfdddca1"}, + {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d2efee35b4b0a347e0d99d28e884dfd82797852d62fcd7ebdeee26f3ceb72cf3"}, + {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:6dcfcffe73710be01d90cae08c3e548d90932d37b39ef83969ae135d36ef3956"}, + {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:eb6e651000a19c96f452c85132811d25e9264d836951022d6e81df2fff38337d"}, + {file = "wrapt-1.16.0-cp311-cp311-win32.whl", hash = "sha256:66027d667efe95cc4fa945af59f92c5a02c6f5bb6012bff9e60542c74c75c362"}, + {file = "wrapt-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:aefbc4cb0a54f91af643660a0a150ce2c090d3652cf4052a5397fb2de549cd89"}, + {file = "wrapt-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5eb404d89131ec9b4f748fa5cfb5346802e5ee8836f57d516576e61f304f3b7b"}, + {file = "wrapt-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9090c9e676d5236a6948330e83cb89969f433b1943a558968f659ead07cb3b36"}, + {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94265b00870aa407bd0cbcfd536f17ecde43b94fb8d228560a1e9d3041462d73"}, + {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2058f813d4f2b5e3a9eb2eb3faf8f1d99b81c3e51aeda4b168406443e8ba809"}, + {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98b5e1f498a8ca1858a1cdbffb023bfd954da4e3fa2c0cb5853d40014557248b"}, + {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:14d7dc606219cdd7405133c713f2c218d4252f2a469003f8c46bb92d5d095d81"}, + {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:49aac49dc4782cb04f58986e81ea0b4768e4ff197b57324dcbd7699c5dfb40b9"}, + {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:418abb18146475c310d7a6dc71143d6f7adec5b004ac9ce08dc7a34e2babdc5c"}, + {file = "wrapt-1.16.0-cp312-cp312-win32.whl", hash = "sha256:685f568fa5e627e93f3b52fda002c7ed2fa1800b50ce51f6ed1d572d8ab3e7fc"}, + {file = "wrapt-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:dcdba5c86e368442528f7060039eda390cc4091bfd1dca41e8046af7c910dda8"}, + {file = "wrapt-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9b201ae332c3637a42f02d1045e1d0cccfdc41f1f2f801dafbaa7e9b4797bfc2"}, + {file = "wrapt-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2076fad65c6736184e77d7d4729b63a6d1ae0b70da4868adeec40989858eb3fb"}, + {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5cd603b575ebceca7da5a3a251e69561bec509e0b46e4993e1cac402b7247b8"}, + {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b47cfad9e9bbbed2339081f4e346c93ecd7ab504299403320bf85f7f85c7d46c"}, + {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8212564d49c50eb4565e502814f694e240c55551a5f1bc841d4fcaabb0a9b8a"}, + {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:5f15814a33e42b04e3de432e573aa557f9f0f56458745c2074952f564c50e664"}, + {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:db2e408d983b0e61e238cf579c09ef7020560441906ca990fe8412153e3b291f"}, + {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:edfad1d29c73f9b863ebe7082ae9321374ccb10879eeabc84ba3b69f2579d537"}, + {file = "wrapt-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed867c42c268f876097248e05b6117a65bcd1e63b779e916fe2e33cd6fd0d3c3"}, + {file = "wrapt-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:eb1b046be06b0fce7249f1d025cd359b4b80fc1c3e24ad9eca33e0dcdb2e4a35"}, + {file = "wrapt-1.16.0-py3-none-any.whl", hash = "sha256:6906c4100a8fcbf2fa735f6059214bb13b97f75b1a61777fcf6432121ef12ef1"}, + {file = "wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d"}, +] + [[package]] name = "xxhash" version = "3.5.0" diff --git a/argilla/pyproject.toml b/argilla/pyproject.toml index 9febd5f7df..1872e0a575 100644 --- a/argilla/pyproject.toml +++ b/argilla/pyproject.toml @@ -1,12 +1,10 @@ [project] name = "argilla" description = "The Argilla python server SDK" -authors = [ - {name = "Argilla", email = "contact@argilla.io"}, -] +authors = [{ name = "Argilla", email = "contact@argilla.io" }] requires-python = ">= 3.9" readme = "README.md" -license = {text = "Apache 2.0"} +license = { text = "Apache 2.0" } dynamic = ["version"] @@ -18,11 +16,10 @@ dependencies = [ "rich>=10.0.0", "datasets>=2.0.0", "pillow>=9.5.0", + "standardwebhooks>=1.0.0", ] -legacy = [ - "argilla-v1[listeners]", -] +legacy = ["argilla-v1[listeners]"] [build-system] requires = ["pdm-backend"] @@ -71,6 +68,4 @@ dev = [ test = { cmd = "pytest tests", env_file = ".env.test" } lint = "ruff check" format = "black ." -all = {composite = ["format", "lint", "test"]} - - +all = { composite = ["format", "lint", "test"] } diff --git a/argilla/src/argilla/__init__.py b/argilla/src/argilla/__init__.py index cac91a6e05..b8b3e53c47 100644 --- a/argilla/src/argilla/__init__.py +++ b/argilla/src/argilla/__init__.py @@ -21,3 +21,4 @@ from argilla.responses import * # noqa from argilla.records import * # noqa from argilla.vectors import * # noqa +from argilla.webhooks import * # noqa diff --git a/argilla/src/argilla/_api/_client.py b/argilla/src/argilla/_api/_client.py index 8c9fda4701..680b425187 100644 --- a/argilla/src/argilla/_api/_client.py +++ b/argilla/src/argilla/_api/_client.py @@ -16,6 +16,8 @@ from typing import Optional import httpx + +from argilla._api._webhooks import WebhooksAPI from argilla._exceptions._api import UnauthorizedError from argilla._exceptions._client import ArgillaCredentialsError @@ -47,15 +49,19 @@ class ArgillaAPI: def __init__(self, http_client: httpx.Client): self.http_client = http_client + self.__users = UsersAPI(http_client=self.http_client) self.__workspaces = WorkspacesAPI(http_client=self.http_client) + self.__datasets = DatasetsAPI(http_client=self.http_client) - self.__users = UsersAPI(http_client=self.http_client) self.__fields = FieldsAPI(http_client=self.http_client) self.__questions = QuestionsAPI(http_client=self.http_client) - self.__records = RecordsAPI(http_client=self.http_client) self.__vectors = VectorsAPI(http_client=self.http_client) self.__metadata = MetadataAPI(http_client=self.http_client) + self.__records = RecordsAPI(http_client=self.http_client) + + self.__webhooks = WebhooksAPI(http_client=self.http_client) + @property def workspaces(self) -> "WorkspacesAPI": return self.__workspaces @@ -88,6 +94,10 @@ def vectors(self) -> "VectorsAPI": def metadata(self) -> "MetadataAPI": return self.__metadata + @property + def webhooks(self) -> "WebhooksAPI": + return self.__webhooks + class APIClient: """Initialize the SDK with the given API URL and API key. diff --git a/argilla/src/argilla/_api/_webhooks.py b/argilla/src/argilla/_api/_webhooks.py new file mode 100644 index 0000000000..868abeb1c7 --- /dev/null +++ b/argilla/src/argilla/_api/_webhooks.py @@ -0,0 +1,122 @@ +# Copyright 2024-present, Argilla, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ["WebhooksAPI"] + +from typing import List + +import httpx + +from argilla._api._base import ResourceAPI +from argilla._exceptions import api_error_handler +from argilla._models._webhook import WebhookModel + + +class WebhooksAPI(ResourceAPI[WebhookModel]): + http_client: httpx.Client + url_stub = "/api/v1/webhooks" + + @api_error_handler + def list(self) -> List[WebhookModel]: + """ + Get a list of all webhooks + + Returns: + List[WebhookModel]: List of webhooks + + """ + response = self.http_client.get(url=self.url_stub) + response.raise_for_status() + response_json = response.json() + webhooks = self._model_from_jsons(json_data=response_json["items"]) + self._log_message(message=f"Got {len(webhooks)} webhooks") + return webhooks + + @api_error_handler + def create(self, webhook: WebhookModel) -> WebhookModel: + """ + Create a webhook + + Args: + webhook (WebhookModel): Webhook to create + + Returns: + WebhookModel: Created webhook + + """ + response = self.http_client.post( + url=self.url_stub, + json={ + "url": webhook.url, + "events": webhook.events, + "description": webhook.description, + }, + ) + response.raise_for_status() + response_json = response.json() + webhook = self._model_from_json(json_data=response_json) + self._log_message(message=f"Created webhook with id {webhook.id}") + return webhook + + @api_error_handler + def delete(self, webhook_id: str) -> None: + """ + Delete a webhook + + Args: + webhook_id (str): ID of the webhook to delete + + """ + response = self.http_client.delete(url=f"{self.url_stub}/{webhook_id}") + response.raise_for_status() + self._log_message(message=f"Deleted webhook with id {webhook_id}") + + @api_error_handler + def update(self, webhook: WebhookModel) -> WebhookModel: + """ + Update a webhook + + Args: + webhook (WebhookModel): Webhook to update + + Returns: + WebhookModel: Updated webhook + + """ + response = self.http_client.patch(url=f"{self.url_stub}/{webhook.id}", json=webhook.model_dump()) + response.raise_for_status() + response_json = response.json() + webhook = self._model_from_json(json_data=response_json) + self._log_message(message=f"Updated webhook with id {webhook.id}") + return webhook + + @api_error_handler + def ping(self, webhook_id: str) -> None: + """ + Ping a webhook + + Args: + webhook_id (str): ID of the webhook to ping + + """ + response = self.http_client.post(url=f"{self.url_stub}/{webhook_id}/ping") + response.raise_for_status() + self._log_message(message=f"Pinged webhook with id {webhook_id}") + + @staticmethod + def _model_from_json(json_data: dict) -> WebhookModel: + return WebhookModel.model_validate(json_data) + + def _model_from_jsons(self, json_data: List[dict]) -> List[WebhookModel]: + return list(map(self._model_from_json, json_data)) diff --git a/argilla/src/argilla/_helpers/_resource_repr.py b/argilla/src/argilla/_helpers/_resource_repr.py index 4fc554cd55..7c2ce8145e 100644 --- a/argilla/src/argilla/_helpers/_resource_repr.py +++ b/argilla/src/argilla/_helpers/_resource_repr.py @@ -28,6 +28,7 @@ # "len_column": "datasets", }, "User": {"columns": ["username", "id", "role", "updated_at"], "table_name": "Users"}, + "Webhook": {"columns": ["url", "id", "events", "enabled", "updated_at"], "table_name": "Webhooks"}, } diff --git a/argilla/src/argilla/_models/__init__.py b/argilla/src/argilla/_models/__init__.py index 4302e4259a..553296d6dd 100644 --- a/argilla/src/argilla/_models/__init__.py +++ b/argilla/src/argilla/_models/__init__.py @@ -62,3 +62,4 @@ IntegerMetadataPropertySettings, ) from argilla._models._settings._vectors import VectorFieldModel +from argilla._models._webhook import WebhookModel, EventType diff --git a/argilla/src/argilla/_models/_webhook.py b/argilla/src/argilla/_models/_webhook.py new file mode 100644 index 0000000000..747162aec9 --- /dev/null +++ b/argilla/src/argilla/_models/_webhook.py @@ -0,0 +1,72 @@ +# Copyright 2024-present, Argilla, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from enum import Enum +from typing import List, Optional + +from pydantic import Field, ConfigDict + +from argilla._models._base import ResourceModel + + +class EventType(str, Enum): + dataset_created = "dataset.created" + dataset_updated = "dataset.updated" + dataset_deleted = "dataset.deleted" + dataset_published = "dataset.published" + + record_created = "record.created" + record_updated = "record.updated" + record_deleted = "record.deleted" + record_completed = "record.completed" + + response_created = "response.created" + response_updated = "response.updated" + response_deleted = "response.deleted" + + @property + def resource(self) -> str: + """ + Get the instance type of the event. + + Returns: + str: The instance type. It can be "dataset", "record", or "response". + + """ + return self.split(".")[0] + + @property + def action(self) -> str: + """ + Get the action type of the event. + + Returns: + str: The action type. It can be "created", "updated", "deleted", "published", or "completed". + + """ + return self.split(".")[1] + + +class WebhookModel(ResourceModel): + url: str + events: List[EventType] + enabled: bool = True + description: Optional[str] = None + + secret: Optional[str] = Field(None, description="Webhook secret. Read-only.") + + model_config = ConfigDict( + validate_assignment=True, + str_strip_whitespace=True, + ) diff --git a/argilla/src/argilla/client.py b/argilla/src/argilla/client.py index e9bf232de4..9d0c666304 100644 --- a/argilla/src/argilla/client.py +++ b/argilla/src/argilla/client.py @@ -22,6 +22,7 @@ from argilla import _api from argilla._api._base import ResourceAPI from argilla._api._client import DEFAULT_HTTP_CONFIG +from argilla._api._webhooks import WebhookModel from argilla._exceptions import ArgillaError, NotFoundError from argilla._helpers import GenericIterator from argilla._helpers._deploy import SpacesDeploymentMixin @@ -29,7 +30,7 @@ from argilla._models import DatasetModel, ResourceModel, UserModel, WorkspaceModel if TYPE_CHECKING: - from argilla import Dataset, User, Workspace + from argilla import Dataset, User, Workspace, Webhook __all__ = ["Argilla"] @@ -88,6 +89,11 @@ def users(self) -> "Users": """A collection of users on the server.""" return Users(client=self) + @property + def webhooks(self) -> "Webhooks": + """A collection of webhooks on the server.""" + return Webhooks(client=self) + @cached_property def me(self) -> "User": from argilla.users import User @@ -396,6 +402,69 @@ def _from_model(self, model: DatasetModel) -> "Dataset": return Dataset.from_model(model=model, client=self._client) +class Webhooks(Sequence["Webhook"], ResourceHTMLReprMixin): + """A webhooks class. It can be used to create a new webhook or to get an existing one.""" + + class _Iterator(GenericIterator["Webhook"]): + pass + + def __init__(self, client: "Argilla") -> None: + self._client = client + self._api = client.api.webhooks + + def __call__(self, id: Union[UUID, str]) -> Optional["Webhook"]: + """Get a webhook by id if exists. Otherwise, returns `None`""" + + model = _get_model_by_id(self._api, id) + if model: + return self._from_model(model) # noqa + warnings.warn(f"Webhook with id {id!r} not found") + + def __iter__(self): + return self._Iterator(self.list()) + + @overload + @abstractmethod + def __getitem__(self, index: int) -> "Webhook": ... + + @overload + @abstractmethod + def __getitem__(self, index: slice) -> Sequence["Webhook"]: ... + + def __getitem__(self, index) -> "Webhook": + model = self._api.list()[index] + return self._from_model(model) + + def __len__(self) -> int: + return len(self._api.list()) + + def add(self, webhook: "Webhook") -> "Webhook": + """Add a new webhook to the Argilla platform. + Args: + webhook: Webhook object. + + Returns: + Webhook: The created webhook. + """ + webhook._client = self._client + return webhook.create() + + def list(self) -> List["Webhook"]: + return [self._from_model(model) for model in self._api.list()] + + ############################ + # Private methods + ############################ + + def _repr_html_(self) -> str: + return self._represent_as_html(resources=self.list()) + + def _from_model(self, model: WebhookModel) -> "Webhook": + from argilla.webhooks import Webhook + + return Webhook.from_model(client=self._client, model=model) + + def _get_model_by_id(api: ResourceAPI, resource_id: Union[UUID, str]) -> Optional[ResourceModel]: """Get a resource model by id if found. Otherwise, `None`.""" try: diff --git a/argilla/src/argilla/responses.py b/argilla/src/argilla/responses.py index 2e4915e2f9..807627f624 100644 --- a/argilla/src/argilla/responses.py +++ b/argilla/src/argilla/responses.py @@ -189,6 +189,16 @@ def record(self, record: "Record") -> None: """Sets the record associated with the response""" self._record = record + @property + def record(self) -> "Record": + """Returns the record associated with the UserResponse""" + return self._record + + @record.setter + def record(self, record: "Record") -> None: + """Sets the record associated with the UserResponse""" + self._record = record + @classmethod def from_model(cls, model: UserResponseModel, record: "Record") -> "UserResponse": """Creates a UserResponse from a ResponseModel""" diff --git a/argilla/src/argilla/webhooks/__init__.py b/argilla/src/argilla/webhooks/__init__.py new file mode 100644 index 0000000000..4055cfb96b --- /dev/null +++ b/argilla/src/argilla/webhooks/__init__.py @@ -0,0 +1,43 @@ +# Copyright 2024-present, Argilla, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import TYPE_CHECKING + +from argilla.webhooks._event import RecordEvent, DatasetEvent, UserResponseEvent, WebhookEvent +from argilla.webhooks._handler import WebhookHandler +from argilla.webhooks._helpers import ( + webhook_listener, + get_webhook_server, + set_webhook_server, + start_webhook_server, + stop_webhook_server, +) +from argilla.webhooks._resource import Webhook + +if TYPE_CHECKING: + pass + +__all__ = [ + "Webhook", + "WebhookHandler", + "RecordEvent", + "DatasetEvent", + "UserResponseEvent", + "WebhookEvent", + "webhook_listener", + "get_webhook_server", + "set_webhook_server", + "start_webhook_server", + "stop_webhook_server", +] diff --git a/argilla/src/argilla/webhooks/_event.py b/argilla/src/argilla/webhooks/_event.py new file mode 100644 index 0000000000..8c329e22e7 --- /dev/null +++ b/argilla/src/argilla/webhooks/_event.py @@ -0,0 +1,179 @@ +# Copyright 2024-present, Argilla, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from datetime import datetime +from typing import TYPE_CHECKING, Union +from uuid import UUID + +from pydantic import BaseModel, ConfigDict + +from argilla import Dataset, Record, UserResponse, Workspace +from argilla._exceptions import ArgillaAPIError +from argilla._models import RecordModel, UserResponseModel, WorkspaceModel, EventType + +if TYPE_CHECKING: + from argilla import Argilla + +__all__ = ["RecordEvent", "DatasetEvent", "UserResponseEvent", "WebhookEvent"] + + +class RecordEvent(BaseModel): + """ + A parsed record event. + + Attributes: + type (EventType): The type of the event. + timestamp (datetime): The timestamp of the event. + record (Record): The record of the event. + """ + + type: EventType + timestamp: datetime + record: Record + + model_config = ConfigDict(arbitrary_types_allowed=True) + + +class DatasetEvent(BaseModel): + """ + A parsed dataset event. + + Attributes: + type (EventType): The type of the event. + timestamp (datetime): The timestamp of the event. + dataset (Dataset): The dataset of the event. + """ + + type: EventType + timestamp: datetime + dataset: Dataset + + model_config = ConfigDict(arbitrary_types_allowed=True) + + +class UserResponseEvent(BaseModel): + """ + A parsed user response event. + + Attributes: + type (EventType): The type of the event. + timestamp (datetime): The timestamp of the event. + response (UserResponse): The user response of the event. + """ + + type: EventType + timestamp: datetime + response: UserResponse + + model_config = ConfigDict(arbitrary_types_allowed=True) + + +class WebhookEvent(BaseModel): + """ + A webhook event. + + Attributes: + type (EventType): The type of the event. + timestamp (datetime): The timestamp of the event. + data (dict): The data of the event. + """ + + type: EventType + timestamp: datetime + data: dict + + def parsed(self, client: "Argilla") -> Union[RecordEvent, DatasetEvent, UserResponseEvent, "WebhookEvent"]: + """ + Parse the webhook event. + + Args: + client: The Argilla client. + + Returns: + Event: The parsed event. + + """ + resource = self.type.resource + data = self.data or {} + + if resource == "dataset": + dataset = self._parse_dataset_from_webhook_data(data, client) + return DatasetEvent( + type=self.type, + timestamp=self.timestamp, + dataset=dataset, + ) + + elif resource == "record": + record = self._parse_record_from_webhook_data(data, client) + return RecordEvent( + type=self.type, + timestamp=self.timestamp, + record=record, + ) + + elif resource == "response": + user_response = self._parse_response_from_webhook_data(data, client) + return UserResponseEvent( + type=self.type, + timestamp=self.timestamp, + response=user_response, + ) + + return self + + @classmethod + def _parse_dataset_from_webhook_data(cls, data: dict, client: "Argilla") -> Dataset: + workspace = Workspace.from_model(WorkspaceModel.model_validate(data["workspace"]), client=client) + # TODO: Parse settings from the data + # settings = Settings._from_dict(data) + + dataset = Dataset(name=data["name"], workspace=workspace, client=client) + dataset.id = UUID(data["id"]) + + try: + dataset.get() + except ArgillaAPIError as _: + # TODO: Show notification + pass + finally: + return dataset + + @classmethod + def _parse_record_from_webhook_data(cls, data: dict, client: "Argilla") -> Record: + dataset = cls._parse_dataset_from_webhook_data(data["dataset"], client) + + record = Record.from_model(RecordModel.model_validate(data), dataset=dataset) + try: + record.get() + except ArgillaAPIError as _: + # TODO: Show notification + pass + finally: + return record + + @classmethod + def _parse_response_from_webhook_data(cls, data: dict, client: "Argilla") -> UserResponse: + record = cls._parse_record_from_webhook_data(data["record"], client) + + # TODO: Link the user resource to the response + user_response = UserResponse.from_model( + model=UserResponseModel(**data, user_id=data["user"]["id"]), + record=record, + ) + + return user_response + + +Event = Union[RecordEvent, DatasetEvent, UserResponseEvent, WebhookEvent] diff --git a/argilla/src/argilla/webhooks/_handler.py b/argilla/src/argilla/webhooks/_handler.py new file mode 100644 index 0000000000..ca6ca9a915 --- /dev/null +++ b/argilla/src/argilla/webhooks/_handler.py @@ -0,0 +1,78 @@ +# Copyright 2024-present, Argilla, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Callable, TYPE_CHECKING + +from argilla.webhooks._event import WebhookEvent + +if TYPE_CHECKING: + from fastapi import Request + from argilla.webhooks._resource import Webhook + + +class WebhookHandler: + """ + The `WebhookHandler` class is used to handle incoming webhook requests. This class handles the + request verification and event object creation. + + Attributes: + webhook (Webhook): The webhook object. + """ + + def __init__(self, webhook: "Webhook"): + self.webhook = webhook + + def handle(self, func: Callable, raw_event: bool = False) -> Callable: + """ + This method handles the incoming webhook requests and calls the provided function. + + Parameters: + func (Callable): The function to be called when a webhook event is received. + raw_event (bool): Whether to pass the raw event object to the function. + + Returns: + + """ + from fastapi import Request + + async def request_handler(request: Request): + event = await self._verify_request(request) + if event.type not in self.webhook.events: + return + + if raw_event: + return await func(event) + + return await func(**event.parsed(self.webhook._client).model_dump()) + + return request_handler + + async def _verify_request(self, request: "Request") -> WebhookEvent: + """ + Verify the request signature and return the event object. + + Arguments: + request (Request): The request object. + + Returns: + WebhookEvent: The event object. + """ + + from standardwebhooks.webhooks import Webhook + + body = await request.body() + headers = dict(request.headers) + + json = Webhook(whsecret=self.webhook.secret).verify(body, headers) + return WebhookEvent.model_validate(json) diff --git a/argilla/src/argilla/webhooks/_helpers.py b/argilla/src/argilla/webhooks/_helpers.py new file mode 100644 index 0000000000..f25c834d55 --- /dev/null +++ b/argilla/src/argilla/webhooks/_helpers.py @@ -0,0 +1,202 @@ +# Copyright 2024-present, Argilla, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import time +import warnings +from threading import Thread +from typing import TYPE_CHECKING, Optional, Callable, Union, List + +import argilla as rg +from argilla import Argilla +from argilla.webhooks._handler import WebhookHandler +from argilla.webhooks._resource import Webhook + +if TYPE_CHECKING: + from fastapi import FastAPI + +__all__ = ["webhook_listener", "get_webhook_server", "set_webhook_server", "start_webhook_server"] + + +def _compute_default_webhook_server_url() -> str: + """ + Compute the webhook server URL. + + Returns: + str: The webhook server URL. If the environment variable `SPACE_HOST` is set, it will return `https://`. + Otherwise, it will return the value of the environment variable `WEBHOOK_SERVER_URL` or `http://127.0.0.1:8000`. + + """ + if space_host := os.getenv("SPACE_HOST"): + return f"https://{space_host}" + + return os.getenv("WEBHOOK_SERVER_URL", "http://127.0.0.1:8000") + + +def _webhook_url_for_func(func: Callable) -> str: + """ + Compute the full webhook URL for a given function. + + Parameters: + func (Callable): The function to compute the webhook URL for. + + Returns: + str: The full webhook URL. + + """ + webhook_server_url = _compute_default_webhook_server_url() + + return f"{webhook_server_url}/{func.__name__}" + + +def webhook_listener( + events: Union[str, List[str]], + description: Optional[str] = None, + client: Optional["Argilla"] = None, + server: Optional["FastAPI"] = None, + raw_event: bool = False, +) -> Callable: + """ + Decorator to create a webhook listener for a function. + + Parameters: + events (Union[str, List[str]]): The events to listen to. + description (Optional[str]): The description of the webhook. + client (Optional[Argilla]): The Argilla client to use. Defaults to the default client. + server (Optional[FastAPI]): The FastAPI server to use. Defaults to the default server. + raw_event (bool): Whether to pass the raw event to the function. Defaults to False. + + Returns: + Callable: The decorated function. + + """ + + client = client or rg.Argilla._get_default() + server = server or get_webhook_server() + + if isinstance(events, str): + events = [events] + + def wrapper(func: Callable) -> Callable: + webhook_url = _webhook_url_for_func(func) + + webhook = None + for argilla_webhook in client.webhooks: + if argilla_webhook.url == webhook_url and argilla_webhook.events == events: + warnings.warn(f"Found existing webhook with for URL {argilla_webhook.url}: {argilla_webhook}") + webhook = argilla_webhook + webhook.description = description or webhook.description + webhook.enabled = True + webhook.update() + break + + if not webhook: + webhook = Webhook( + url=webhook_url, + events=events, + description=description or f"Webhook for {func.__name__}", + ).create() + + request_handler = WebhookHandler(webhook).handle(func, raw_event) + server.post(f"/{func.__name__}", tags=["Argilla Webhooks"])(request_handler) + + return request_handler + + return wrapper + + +def get_webhook_server() -> "FastAPI": + """ + Get the current webhook server. If it does not exist, it will create one. + + Returns: + FastAPI: The webhook server. + + """ + from fastapi import FastAPI + + global _server + if not _server: + _server = FastAPI() + return _server + + +def set_webhook_server(app: "FastAPI"): + """ + Set the webhook server. This should only be called once. + + Parameters: + app (FastAPI): The webhook server. + + """ + global _server + + if _server: + raise ValueError("Server already set") + + _server = app + + +class _WebhookServerRunner: + """ + Class to run the webhook server in a separate thread. + """ + + def __init__(self, server: "FastAPI"): + import uvicorn + + self._server = uvicorn.Server(uvicorn.Config(app=server)) + self._thread = Thread(target=self._server.run, daemon=True) + + def start(self): + """Start the webhook server""" + self._thread.start() + while not self._server.started and self._thread.is_alive(): + time.sleep(1e-3) + + def stop(self): + """Stop the webhook server""" + self._server.should_exit = True + self._thread.join() + + +def start_webhook_server(): + """Start the webhook runner.""" + + global _server_runner + + if _server_runner: + warnings.warn("Server already started") + else: + server = get_webhook_server() + + _server_runner = _WebhookServerRunner(server) + _server_runner.start() + + +def stop_webhook_server(): + """Stop the webhook runner.""" + + global _server_runner + + if not _server_runner: + warnings.warn("Server not started") + else: + try: + _server_runner.stop() + finally: + _server_runner = None + + +_server: Optional["FastAPI"] = None +_server_runner: Optional[_WebhookServerRunner] = None diff --git a/argilla/src/argilla/webhooks/_resource.py b/argilla/src/argilla/webhooks/_resource.py new file mode 100644 index 0000000000..61c8302b4c --- /dev/null +++ b/argilla/src/argilla/webhooks/_resource.py @@ -0,0 +1,98 @@ +# Copyright 2024-present, Argilla, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import List, Optional + +from argilla import Argilla +from argilla._api._webhooks import WebhookModel, WebhooksAPI +from argilla._models import EventType +from argilla._resource import Resource + + +class Webhook(Resource): + """ + The `Webhook` resource. It represents a webhook that can be used to receive events from the Argilla Server. + + Args: + url (str): The URL of the webhook endpoint. + events (List[EventType]): The events that the webhook is subscribed to. + description (Optional[str]): The description of the webhook. + _client (Argilla): The client used to interact with the Argilla Server. + + """ + + _model: WebhookModel + _api: WebhooksAPI + + def __init__(self, url: str, events: List[EventType], description: Optional[str] = None, _client: Argilla = None): + client = _client or Argilla._get_default() + api = client.api.webhooks + events = events or [] + + super().__init__(api=api, client=client) + + self._model = WebhookModel(url=url, events=list(events), description=description) + + @property + def url(self) -> str: + """The URL of the webhook.""" + return self._model.url + + @url.setter + def url(self, value: str): + self._model.url = value + + @property + def events(self) -> List[EventType]: + """The events that the webhook is subscribed to.""" + return self._model.events + + @events.setter + def events(self, value: List[EventType]): + self._model.events = value + + @property + def enabled(self) -> bool: + """Whether the webhook is enabled.""" + return self._model.enabled + + @enabled.setter + def enabled(self, value: bool): + self._model.enabled = value + + @property + def description(self) -> Optional[str]: + """The description of the webhook.""" + return self._model.description + + @description.setter + def description(self, value: Optional[str]): + self._model.description = value + + @property + def secret(self) -> str: + """The secret of the webhook.""" + return self._model.secret + + @classmethod + def from_model(cls, model: WebhookModel, client: Optional["Argilla"] = None) -> "Webhook": + instance = cls(url=model.url, events=model.events, _client=client) + instance._model = model + + return instance + + def _with_client(self, client: "Argilla") -> "Webhook": + self._client = client + self._api = client.api.webhooks + + return self diff --git a/examples/webhooks/basic-webhooks/README.md b/examples/webhooks/basic-webhooks/README.md new file mode 100644 index 0000000000..24f64a0c3e --- /dev/null +++ b/examples/webhooks/basic-webhooks/README.md @@ -0,0 +1,20 @@ +## Description + +This is a basic webhook example to show how to setup webhook listeners using the argilla SDK + +## Running the app + +1. Start argilla server and argilla worker +```bash +pdm server start +pdm worker +``` + +2. Start the app +```bash +uvicorn main:server +``` + +## Testing the app + +You can see in se server logs traces when working with dataset, records and responses in the argilla server diff --git a/examples/webhooks/basic-webhooks/main.py b/examples/webhooks/basic-webhooks/main.py new file mode 100644 index 0000000000..7b0050de2c --- /dev/null +++ b/examples/webhooks/basic-webhooks/main.py @@ -0,0 +1,76 @@ +import os +from datetime import datetime + +import argilla as rg + +# Environment variables with defaults +API_KEY = os.environ.get("ARGILLA_API_KEY", "argilla.apikey") +API_URL = os.environ.get("ARGILLA_API_URL", "http://localhost:6900") + +# Initialize Argilla client +client = rg.Argilla(api_key=API_KEY, api_url=API_URL) + +# Show the existing webhooks in the argilla server +for webhook in client.webhooks: + print(webhook.url) + + +# Create a webhook listener using the decorator +# This decorator will : +# 1. Create the webhook in the argilla server +# 2. Create a POST endpoint in the server +# 3. Handle the incoming requests to verify the webhook signature +# 4. Ignoring the events other than the ones specified in the `events` argument +# 5. Parse the incoming request and call the decorated function with the parsed data +# +# Each event will be passed as a keyword argument to the decorated function depending on the event type. +# The event types are: +# - record: created, updated, deleted and completed +# - response: created, updated, deleted +# - dataset: created, updated, published, deleted +# Related resources will be passed as keyword arguments to the decorated function +# (for example the dataset for a record-related event, or the record for a response-related event) +# When a resource is deleted +@rg.webhook_listener(events=["record.created", "record.completed"]) +async def listen_record( + record: rg.Record, dataset: rg.Dataset, type: str, timestamp: datetime +): + print(f"Received record event of type {type} at {timestamp}") + + action = "completed" if type == "record.completed" else "created" + print(f"A record with id {record.id} has been {action} for dataset {dataset.name}!") + + +@rg.webhook_listener(events="response.updated") +async def trigger_something_on_response_updated(response: rg.UserResponse, **kwargs): + print( + f"The user response {response.id} has been updated with the following responses:" + ) + print([response.serialize() for response in response.responses]) + + +@rg.webhook_listener(events=["dataset.created", "dataset.updated", "dataset.published"]) +async def with_raw_payload( + type: str, + timestamp: datetime, + dataset: rg.Dataset, + **kwargs, +): + print(f"Event type {type} at {timestamp}") + print(dataset.settings) + + +@rg.webhook_listener(events="dataset.deleted") +async def on_dataset_deleted( + data: dict, + **kwargs, +): + print(f"Dataset {data} has been deleted!") + + +# Set the webhook server. The server is a FastAPI instance, so you need to expose it in order to run it using uvicorn: +# ```bash +# uvicorn main:webhook_server --reload +# ``` + +server = rg.get_webhook_server() diff --git a/examples/webhooks/basic-webhooks/requirements.txt b/examples/webhooks/basic-webhooks/requirements.txt new file mode 100644 index 0000000000..11f77bdd21 --- /dev/null +++ b/examples/webhooks/basic-webhooks/requirements.txt @@ -0,0 +1,3 @@ +argilla @ git+https://github.com/argilla-io/argilla.git@feat/argilla/working-with-webhooks#subdirectory=argilla +fastapi +uvicorn[standard] From c6e3654d169520efdb71271bbf0b41e48d5b1f86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dami=C3=A1n=20Pumar?= Date: Tue, 19 Nov 2024 16:59:53 +0100 Subject: [PATCH 27/50] =?UTF-8?q?=F0=9F=9A=91=20Add=20missing=20translatio?= =?UTF-8?q?n=20(#5696)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix https://github.com/argilla-io/argilla/issues/5692 --- .../configuration/questions/DatasetConfigurationLabels.vue | 4 +++- .../configuration/questions/DatasetConfigurationSpan.vue | 4 +++- argilla-frontend/translation/de.js | 1 + argilla-frontend/translation/en.js | 1 + argilla-frontend/translation/es.js | 1 + 5 files changed, 9 insertions(+), 2 deletions(-) diff --git a/argilla-frontend/components/features/dataset-creation/configuration/questions/DatasetConfigurationLabels.vue b/argilla-frontend/components/features/dataset-creation/configuration/questions/DatasetConfigurationLabels.vue index 1ab24d2c12..987dbed628 100644 --- a/argilla-frontend/components/features/dataset-creation/configuration/questions/DatasetConfigurationLabels.vue +++ b/argilla-frontend/components/features/dataset-creation/configuration/questions/DatasetConfigurationLabels.vue @@ -18,7 +18,9 @@
diff --git a/argilla-frontend/components/features/dataset-creation/configuration/questions/DatasetConfigurationSpan.vue b/argilla-frontend/components/features/dataset-creation/configuration/questions/DatasetConfigurationSpan.vue index 2106fce530..6f69d2a028 100644 --- a/argilla-frontend/components/features/dataset-creation/configuration/questions/DatasetConfigurationSpan.vue +++ b/argilla-frontend/components/features/dataset-creation/configuration/questions/DatasetConfigurationSpan.vue @@ -18,7 +18,9 @@
diff --git a/argilla-frontend/translation/de.js b/argilla-frontend/translation/de.js index 8226fa127d..82452b2bed 100644 --- a/argilla-frontend/translation/de.js +++ b/argilla-frontend/translation/de.js @@ -287,6 +287,7 @@ export default { labelSelection: { atLeastTwoOptions: "Mindestens zwei Optionen müssen vorhanden sein", optionsWithoutLabel: "Optionen ohne Label sind nicht erlaubt", + optionsSeparatedByComma: "Optionen müssen durch Kommas getrennt sein", }, }, atLeastOneQuestion: "Mindestens eine Frage wird benötigt", diff --git a/argilla-frontend/translation/en.js b/argilla-frontend/translation/en.js index 448064600b..c35f76a59c 100644 --- a/argilla-frontend/translation/en.js +++ b/argilla-frontend/translation/en.js @@ -282,6 +282,7 @@ export default { labelSelection: { atLeastTwoOptions: "At least two options are required", optionsWithoutLabel: "Empty options are not allowed", + optionsSeparatedByComma: "Use comma to separate labels", }, rating: { atLeastTwoOptions: "At least two options are required", diff --git a/argilla-frontend/translation/es.js b/argilla-frontend/translation/es.js index 3bc4d8c922..a5b86960ff 100644 --- a/argilla-frontend/translation/es.js +++ b/argilla-frontend/translation/es.js @@ -281,6 +281,7 @@ export default { labelSelection: { atLeastTwoOptions: "Se requieren al menos dos opciones", optionsWithoutLabel: "No se permiten opciones vacías", + optionsSeparatedByComma: "Use comas para separar las etiquetas", }, rating: { atLeastTwoOptions: "Se requieren al menos dos opciones", From f47f20d63f1032741a9d1cd4468db5cb6fd88101 Mon Sep 17 00:00:00 2001 From: Paul Bauriegel Date: Tue, 19 Nov 2024 17:35:46 +0100 Subject: [PATCH 28/50] Docs - Add docs for adding a language (#5640) # Description Adds a small guide to the community docs on how to add a new language. I hope the community docs are the right place for that. **Type of change** - Documentation update **Checklist** - I added relevant documentation --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Sara Han <127759186+sdiazlor@users.noreply.github.com> Co-authored-by: Natalia Elvira <126158523+nataliaElv@users.noreply.github.com> --- argilla/docs/community/adding_language.md | 165 ++++++++++++++++++++++ argilla/mkdocs.yml | 1 + 2 files changed, 166 insertions(+) create mode 100644 argilla/docs/community/adding_language.md diff --git a/argilla/docs/community/adding_language.md b/argilla/docs/community/adding_language.md new file mode 100644 index 0000000000..401d6d92b4 --- /dev/null +++ b/argilla/docs/community/adding_language.md @@ -0,0 +1,165 @@ +# Adding a new language to Argilla + +If you want to add a new language to Argilla you need to go to two places: + +1. Add a new translation specification in the folder: `argilla-frontend/translation` E.g. for Korean with Code `ko` add a `ko.js` file by coping the `en.js` file. The text values need to be translated: +```javascript +export default { + multi_label_selection: "다중 라벨", + ranking: "순위", + label_selection: "라벨", + span: "범위", + text: "텍스트", + ... +``` +2. Then update the i18n Nuxt: `argilla-frontend/nuxt.config.ts` + +```javascript + i18n: { + locales: [ + { + code: "en", + file: "en.js", + }, + ... + { + code: "ko", + file: "ko.js", + }, + ], +``` + +### How to test it + +1. Start a local instance of Argilla, easiest by just using the docker recipe [here](../getting_started/how-to-deploy-argilla-with-docker.md). It will give you a backend API for the frontend. +2. Compile a new version of the frontend. Check [this guide](https://github.com/argilla-io/argilla/tree/develop/argilla-frontend). This is basically: + - `git clone https://github.com/argilla-io/argilla` + - `cd argilla-frontend` + - Install the dependencies: `npm i` + - Build the new frontend with the updates: `npm run build` + - Serve the UI via `npm run start`. You can reach it under localhost:3000 by default. + - Check the translations. +3. Deploy a small test dataset to test the translation on a dataset too: +```python +import argilla as rg + +client_local = rg.Argilla(api_url="http://localhost:6900/", api_key="argilla.apikey") + +sample_questions = [ + rg.SpanQuestion( + name="question1", + field="text", + labels={ + "PERSON": "Person", + "ORG": "Organization", + "LOC": "Location", + "MISC": "Miscellaneous" + }, # or ["PERSON", "ORG", "LOC", "MISC"] + title="Select the entities in the text", + description="Select the entities in the text", + required=True, + allow_overlapping=False, + ), + rg.LabelQuestion( + name="question2", + labels={"YES": "Yes", "NO": "No"}, # or ["YES", "NO"] + title="Is the answer relevant to the given prompt?", + description="Choose the option that applies.", + required=True, + ), + rg.MultiLabelQuestion( + name="question3", + labels={ + "hate": "Hate speech", + "sexual": "Sexual content", + "violent": "Violent content", + "pii": "Personal information", + "untruthful": "False information", + "not_english": "Not English", + "inappropriate": "Inappropriate content" + }, # or ["hate", "sexual", "violent", "pii", "untruthful", "not_english", "inappropriate"] + title="Does the response contain any of the following?", + description="Select all applicable options.", + required=True, + visible_labels=3, + labels_order="natural" + ), + rg.RankingQuestion( + name="question4", + values={ + "reply-1": "Answer 1", + "reply-2": "Answer 2", + "reply-3": "Answer 3" + }, # or ["reply-1", "reply-2", "reply-3"] + title="Rank the answers by your preference", + description="1 = best, 3 = worst. Equal ratings are allowed.", + required=True, + ), + rg.RatingQuestion( + name="question5", + values=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + title="How satisfied are you with the answer?", + description="1 = very dissatisfied, 10 = very satisfied", + required=True, + ), + rg.TextQuestion( + name="question6", + title="Please provide your feedback on the answer", + description="Please provide your feedback on the answer", + required=True, + use_markdown=True + ) +] + +sample_fields = [ + rg.ChatField( + name="chat", + title="Previous conversation with the customer", + use_markdown=True, + required=True, + description="Dialog between AI & customer up to the last question", + ), + rg.TextField( + name="text", + title="Customer's question", + use_markdown=False, + required=True, + description="This is a question from the customer", + ), + rg.ImageField( + name="image", + title="Image related to the question", + required=True, + description="Image sent by the customer", + ), +] + +# Create a new dataset with the same settings as the original +settings = rg.Settings( + fields=sample_fields, + questions=sample_questions, +) +new_dataset = rg.Dataset( + name="demo_dataset", + workspace="default", + settings=settings, + client=client_local, +) +new_dataset.create() + +def fix_record(): + return rg.Record( + fields={ + "chat": [ + {"role": "user", "content": "What is Argilla?"}, + {"role": "assistant", "content": "Argilla is a collaboration tool for AI engineers and domain experts to build high-quality datasets"}, + ], + "image": "https://images.unsplash.com/photo-1523567353-71ea31cb9f73?w=900&auto=format&fit=crop&q=60&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxzZWFyY2h8MTJ8fGNvcmdpfGVufDB8fDB8fHww", + "text": "Which town has a greater population as of the 2010 census, Minden, Nevada or Gardnerville, Nevada?", + }, + ) + +new_records = [fix_record() for _ in range(10)] +new_dataset.records.log(new_records) +``` +4. Test if your translation also works with the dataset and in the dataset settings. diff --git a/argilla/mkdocs.yml b/argilla/mkdocs.yml index fa98ee4e95..645b72f514 100644 --- a/argilla/mkdocs.yml +++ b/argilla/mkdocs.yml @@ -193,6 +193,7 @@ nav: - community/index.md - How to contribute?: community/contributor.md - Developer documentation: community/developer.md + - Add a new language to Argilla: community/adding_language.md - Issue dashboard: community/popular_issues.md - Changelog: community/changelog.md - Integrations: From 0b5b009c5ff731d6b9f2050f75418f424175de83 Mon Sep 17 00:00:00 2001 From: Paco Aranda Date: Wed, 20 Nov 2024 10:53:53 +0100 Subject: [PATCH 29/50] [BUGFIX] `argilla server`: Prevent passing non-string values to text fields (#5682) # Description This PR prevents creating records with non-string values for text fields. Currently, users can create records with dictionary values as values for text fields. **Type of change** - Bug fix (non-breaking change which fixes an issue) **How Has This Been Tested** **Checklist** - I added relevant documentation - I followed the style guidelines of this project - I did a self-review of my code - I made corresponding changes to the documentation - I confirm My changes generate no new warnings - I have added tests that prove my fix is effective or that my feature works - I have added relevant notes to the CHANGELOG.md file (See https://keepachangelog.com/) --- argilla-server/CHANGELOG.md | 4 + .../src/argilla_server/validators/records.py | 14 ++ .../test_create_dataset_records_bulk.py | 144 +++++++++++++++++- 3 files changed, 161 insertions(+), 1 deletion(-) diff --git a/argilla-server/CHANGELOG.md b/argilla-server/CHANGELOG.md index c1fee89500..688e8e0e95 100644 --- a/argilla-server/CHANGELOG.md +++ b/argilla-server/CHANGELOG.md @@ -28,6 +28,10 @@ These are the section headers that we use: - Changed default python version to 3.13. ([#5649](https://github.com/argilla-io/argilla/pull/5649)) +### Fixed + +- Fixed error to prevent creating record fields including non-string values for text fields. ([#5682](https://github.com/argilla-io/argilla/pull/5682)) + ## [2.4.1](https://github.com/argilla-io/argilla/compare/v2.4.0...v2.4.1) ### Fixed diff --git a/argilla-server/src/argilla_server/validators/records.py b/argilla-server/src/argilla_server/validators/records.py index ec28f389b3..c22b2fe0c2 100644 --- a/argilla-server/src/argilla_server/validators/records.py +++ b/argilla-server/src/argilla_server/validators/records.py @@ -55,6 +55,7 @@ def _validate_fields(cls, fields: dict, dataset: Dataset) -> None: cls._validate_non_empty_fields(fields=fields) cls._validate_required_fields(dataset=dataset, fields=fields) cls._validate_extra_fields(dataset=dataset, fields=fields) + cls._validate_text_fields(dataset=dataset, fields=fields) cls._validate_image_fields(dataset=dataset, fields=fields) cls._validate_chat_fields(dataset=dataset, fields=fields) cls._validate_custom_fields(dataset=dataset, fields=fields) @@ -99,6 +100,11 @@ def _validate_metadata(cls, metadata: dict, dataset: Dataset) -> None: "and extra metadata is not allowed for this dataset" ) + @classmethod + def _validate_text_fields(cls, dataset: Dataset, fields: Dict[str, str]) -> None: + for field in filter(lambda field: field.is_text, dataset.fields): + cls._validate_text_field(field.name, fields.get(field.name)) + @classmethod def _validate_image_fields(cls, dataset: Dataset, fields: Dict[str, str]) -> None: for field in filter(lambda field: field.is_image, dataset.fields): @@ -109,6 +115,14 @@ def _validate_chat_fields(cls, dataset: Dataset, fields: Dict[str, Any]) -> None for field in filter(lambda field: field.is_chat, dataset.fields): cls._validate_chat_field(field.name, fields.get(field.name)) + @classmethod + def _validate_text_field(cls, field_name: str, field_value: Any) -> None: + if field_value is None: + return + + if not isinstance(field_value, str): + raise UnprocessableEntityError(f"text field {field_name!r} value must be a string") + @classmethod def _validate_image_field(cls, field_name: str, field_value: Union[str, None]) -> None: if field_value is None: diff --git a/argilla-server/tests/unit/api/handlers/v1/datasets/records/records_bulk/test_create_dataset_records_bulk.py b/argilla-server/tests/unit/api/handlers/v1/datasets/records/records_bulk/test_create_dataset_records_bulk.py index decdec847e..65cb298f0a 100644 --- a/argilla-server/tests/unit/api/handlers/v1/datasets/records/records_bulk/test_create_dataset_records_bulk.py +++ b/argilla-server/tests/unit/api/handlers/v1/datasets/records/records_bulk/test_create_dataset_records_bulk.py @@ -11,9 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import pytest +from typing import Any from uuid import UUID from httpx import AsyncClient from sqlalchemy import func, select @@ -682,6 +682,148 @@ async def test_create_dataset_records_bulk_with_wrong_custom_field_value( assert response.status_code == 422 assert (await db.execute(select(func.count(Record.id)))).scalar_one() == 0 + @pytest.mark.parametrize( + "value,expected_error", + [ + ( + 1, + { + "detail": { + "code": "argilla.api.errors::ValidationError", + "params": { + "errors": [ + { + "loc": ["body", "items", 0, "fields", "text-field"], + "msg": "str type expected", + "type": "type_error.str", + }, + { + "loc": ["body", "items", 0, "fields", "text-field"], + "msg": "value is not a valid list", + "type": "type_error.list", + }, + { + "loc": ["body", "items", 0, "fields", "text-field"], + "msg": "value is not a valid dict", + "type": "type_error.dict", + }, + ] + }, + } + }, + ), + ( + 1.0, + { + "detail": { + "code": "argilla.api.errors::ValidationError", + "params": { + "errors": [ + { + "loc": ["body", "items", 0, "fields", "text-field"], + "msg": "str type expected", + "type": "type_error.str", + }, + { + "loc": ["body", "items", 0, "fields", "text-field"], + "msg": "value is not a valid list", + "type": "type_error.list", + }, + { + "loc": ["body", "items", 0, "fields", "text-field"], + "msg": "value is not a valid dict", + "type": "type_error.dict", + }, + ] + }, + } + }, + ), + ( + True, + { + "detail": { + "code": "argilla.api.errors::ValidationError", + "params": { + "errors": [ + { + "loc": ["body", "items", 0, "fields", "text-field"], + "msg": "str type expected", + "type": "type_error.str", + }, + { + "loc": ["body", "items", 0, "fields", "text-field"], + "msg": "value is not a valid list", + "type": "type_error.list", + }, + { + "loc": ["body", "items", 0, "fields", "text-field"], + "msg": "value is not a valid dict", + "type": "type_error.dict", + }, + ] + }, + } + }, + ), + ( + ["wrong", "value"], + { + "detail": { + "code": "argilla.api.errors::ValidationError", + "params": { + "errors": [ + { + "loc": ["body", "items", 0, "fields"], + "msg": "argilla_server.api.schemas.v1.chat.ChatFieldValue() argument after ** must be a mapping, not str", + "type": "type_error", + } + ] + }, + } + }, + ), + ( + {"wrong": "value"}, + {"detail": "Record at position 0 is not valid because text field 'text-field' value must be a string"}, + ), # Valid value for custom fields wrong value for text fields + ( + [{"role": "user", "content": "Hello!"}], + {"detail": "Record at position 0 is not valid because text field 'text-field' value must be a string"}, + ), # Valid value for chat fields wrong value for text fields + ], + ) + async def test_create_dataset_records_bulk_with_wrong_text_field_value( + self, + db: AsyncSession, + async_client: AsyncClient, + owner_auth_header: dict, + value: Any, + expected_error: dict, + ): + dataset = await DatasetFactory.create(status=DatasetStatus.ready) + + await TextFieldFactory.create(name="text-field", dataset=dataset) + await LabelSelectionQuestionFactory.create(dataset=dataset) + + response = await async_client.post( + self.url(dataset.id), + headers=owner_auth_header, + json={ + "items": [ + { + "fields": { + "text-field": value, + }, + }, + ], + }, + ) + + assert response.status_code == 422 + assert response.json() == expected_error + assert (await db.execute(select(func.count(Record.id)))).scalar_one() == 0 + async def test_create_dataset_records_bulk_updates_records_status( self, db: AsyncSession, async_client: AsyncClient, owner: User, owner_auth_header: dict ): From 9cf0a63795e8ba19744346d3e5020c125f0469d1 Mon Sep 17 00:00:00 2001 From: Paco Aranda Date: Wed, 20 Nov 2024 10:56:06 +0100 Subject: [PATCH 30/50] [REFACTOR] `argilla server`: using pydantic v2 (#5666) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description An alternative work of pydantic v2 upgrade with the latest changes from develop. The main problems I found: - The Optional values are now required. This will affect how schemas are returned since default values are sets, including them as part of the schema result (record without responses or suggestions, for example) - I found some serialization errors when the response body (an error) contains a ValueError object. The same is working with pydantic <2 - Automatic type conversion is not working anymore. This means that many client requests may fail ( values are not automatically converted to str, or some complex structures (record fields may be affected by this). We can still define custom model_validators, but we must apply them carefully. Closes https://github.com/argilla-io/argilla/issues/4935 Refs https://github.com/argilla-io/argilla/pull/5508 **Type of change** - Refactor (change restructuring the codebase without changing functionality) **How Has This Been Tested** **Checklist** - I added relevant documentation - I followed the style guidelines of this project - I did a self-review of my code - I made corresponding changes to the documentation - I confirm My changes generate no new warnings - I have added tests that prove my fix is effective or that my feature works - I have added relevant notes to the CHANGELOG.md file (See https://keepachangelog.com/) --------- Co-authored-by: José Francisco Calvo Co-authored-by: José Francisco Calvo Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- argilla-server/.env.dev | 2 +- argilla-server/pdm.lock | 171 ++++++++++++++---- argilla-server/pyproject.toml | 3 +- argilla-server/src/argilla_server/__init__.py | 9 +- .../api/handlers/v1/datasets/datasets.py | 7 +- .../api/handlers/v1/datasets/records.py | 4 +- .../argilla_server/api/handlers/v1/oauth2.py | 5 +- .../argilla_server/api/handlers/v1/users.py | 2 +- .../api/handlers/v1/webhooks.py | 4 +- .../api/handlers/v1/workspaces.py | 2 +- .../src/argilla_server/api/schemas/v1/chat.py | 7 +- .../argilla_server/api/schemas/v1/commons.py | 13 +- .../argilla_server/api/schemas/v1/datasets.py | 40 ++-- .../argilla_server/api/schemas/v1/fields.py | 12 +- .../src/argilla_server/api/schemas/v1/info.py | 2 +- .../api/schemas/v1/metadata_properties.py | 44 ++--- .../api/schemas/v1/questions.py | 90 +++++---- .../argilla_server/api/schemas/v1/records.py | 144 ++++++++++----- .../api/schemas/v1/records_bulk.py | 9 +- .../api/schemas/v1/responses.py | 48 ++--- .../argilla_server/api/schemas/v1/settings.py | 19 +- .../api/schemas/v1/suggestions.py | 43 +++-- .../argilla_server/api/schemas/v1/users.py | 14 +- .../api/schemas/v1/vector_settings.py | 5 +- .../argilla_server/api/schemas/v1/vectors.py | 2 +- .../argilla_server/api/schemas/v1/webhooks.py | 57 ++++-- .../api/schemas/v1/workspaces.py | 5 +- .../src/argilla_server/bulk/records_bulk.py | 6 +- .../cli/database/users/create.py | 2 +- .../cli/database/users/create_default.py | 2 +- .../cli/database/users/migrate.py | 2 +- .../src/argilla_server/contexts/datasets.py | 16 +- .../src/argilla_server/contexts/hub.py | 2 +- .../src/argilla_server/contexts/questions.py | 4 +- .../src/argilla_server/contexts/settings.py | 6 +- argilla-server/src/argilla_server/database.py | 3 +- .../src/argilla_server/errors/base_errors.py | 17 +- .../argilla_server/errors/error_handler.py | 24 ++- .../integrations/huggingface/spaces.py | 18 +- .../src/argilla_server/jobs/dataset_jobs.py | 2 +- .../src/argilla_server/jobs/hub_jobs.py | 2 +- .../src/argilla_server/models/database.py | 7 +- .../models/metadata_properties.py | 5 +- .../src/argilla_server/models/mixins.py | 6 +- .../argilla_server/pydantic_v1/__init__.py | 24 --- .../src/argilla_server/pydantic_v1/errors.py | 18 -- .../argilla_server/pydantic_v1/generics.py | 18 -- .../src/argilla_server/pydantic_v1/utils.py | 18 -- .../src/argilla_server/search_engine/base.py | 18 +- .../src/argilla_server/security/settings.py | 3 +- argilla-server/src/argilla_server/settings.py | 52 ++++-- .../responses/upsert_responses_in_bulk.py | 2 +- argilla-server/src/argilla_server/utils.py | 4 +- .../src/argilla_server/utils/params.py | 4 +- .../src/argilla_server/validators/datasets.py | 1 - .../src/argilla_server/validators/records.py | 7 +- .../argilla_server/webhooks/v1/datasets.py | 2 +- .../src/argilla_server/webhooks/v1/records.py | 2 +- .../argilla_server/webhooks/v1/responses.py | 2 +- .../src/argilla_server/webhooks/v1/schemas.py | 39 ++-- argilla-server/tests/pydantic_v1/__init__.py | 25 --- argilla-server/tests/pydantic_v1/generics.py | 18 -- argilla-server/tests/pydantic_v1/utils.py | 18 -- .../test_create_dataset_records_bulk.py | 53 +++++- .../datasets/test_create_dataset_question.py | 2 +- .../test_list_current_user_datasets.py | 5 +- ...test_create_current_user_responses_bulk.py | 2 +- .../unit/api/handlers/v1/test_datasets.py | 44 ++--- .../handlers/v1/test_metadata_properties.py | 2 + .../v1/webhooks/test_create_webhook.py | 6 +- .../v1/webhooks/test_update_webhook.py | 2 +- .../schemas/v1/records/test_record_create.py | 2 +- .../test_search_records_query_validator.py | 18 +- .../tests/unit/security/test_model.py | 6 +- argilla-server/tests/unit/test_utils.py | 2 +- 75 files changed, 721 insertions(+), 585 deletions(-) delete mode 100644 argilla-server/src/argilla_server/pydantic_v1/__init__.py delete mode 100644 argilla-server/src/argilla_server/pydantic_v1/errors.py delete mode 100644 argilla-server/src/argilla_server/pydantic_v1/generics.py delete mode 100644 argilla-server/src/argilla_server/pydantic_v1/utils.py delete mode 100644 argilla-server/tests/pydantic_v1/__init__.py delete mode 100644 argilla-server/tests/pydantic_v1/generics.py delete mode 100644 argilla-server/tests/pydantic_v1/utils.py diff --git a/argilla-server/.env.dev b/argilla-server/.env.dev index 76c10523d0..4935d33ed7 100644 --- a/argilla-server/.env.dev +++ b/argilla-server/.env.dev @@ -1,4 +1,4 @@ OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES # Needed by RQ to work with forked processes on MacOS ALEMBIC_CONFIG=src/argilla_server/alembic.ini ARGILLA_AUTH_SECRET_KEY=8VO7na5N/jQx+yP/N+HlE8q51vPdrxqlh6OzoebIyko= # With this we avoid using a different key every time the server is reloaded -ARGILLA_DATABASE_URL=sqlite+aiosqlite:///${HOME}/.argilla/argilla.db?check_same_thread=False +ARGILLA_DATABASE_URL=sqlite+aiosqlite:///${HOME}/.argilla/argilla-dev.db?check_same_thread=False diff --git a/argilla-server/pdm.lock b/argilla-server/pdm.lock index 8f6cdd1005..e2885e4858 100644 --- a/argilla-server/pdm.lock +++ b/argilla-server/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "postgresql", "test"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:c25b92ead1fbe755395a5c38f8c182e43f78bbf735e8393aa844b7299ee55fdd" +content_hash = "sha256:f727a613054d4b3bc4eabdbc76b688e49758df5b6f81eec1d6d34faf0a1563c8" [[metadata.targets]] requires_python = ">=3.9" @@ -172,6 +172,20 @@ files = [ {file = "alembic-1.13.3.tar.gz", hash = "sha256:203503117415561e203aa14541740643a611f641517f0209fcae63e9fa09f1a2"}, ] +[[package]] +name = "annotated-types" +version = "0.7.0" +requires_python = ">=3.8" +summary = "Reusable constraint types to use with typing.Annotated" +groups = ["default"] +dependencies = [ + "typing-extensions>=4.0.0; python_version < \"3.9\"", +] +files = [ + {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"}, + {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, +] + [[package]] name = "anyio" version = "4.6.2.post1" @@ -1976,44 +1990,123 @@ files = [ [[package]] name = "pydantic" -version = "1.10.18" -requires_python = ">=3.7" -summary = "Data validation and settings management using python type hints" -groups = ["default"] -dependencies = [ - "typing-extensions>=4.2.0", -] -files = [ - {file = "pydantic-1.10.18-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e405ffcc1254d76bb0e760db101ee8916b620893e6edfbfee563b3c6f7a67c02"}, - {file = "pydantic-1.10.18-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e306e280ebebc65040034bff1a0a81fd86b2f4f05daac0131f29541cafd80b80"}, - {file = "pydantic-1.10.18-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11d9d9b87b50338b1b7de4ebf34fd29fdb0d219dc07ade29effc74d3d2609c62"}, - {file = "pydantic-1.10.18-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b661ce52c7b5e5f600c0c3c5839e71918346af2ef20062705ae76b5c16914cab"}, - {file = "pydantic-1.10.18-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:c20f682defc9ef81cd7eaa485879ab29a86a0ba58acf669a78ed868e72bb89e0"}, - {file = "pydantic-1.10.18-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c5ae6b7c8483b1e0bf59e5f1843e4fd8fd405e11df7de217ee65b98eb5462861"}, - {file = "pydantic-1.10.18-cp310-cp310-win_amd64.whl", hash = "sha256:74fe19dda960b193b0eb82c1f4d2c8e5e26918d9cda858cbf3f41dd28549cb70"}, - {file = "pydantic-1.10.18-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:72fa46abace0a7743cc697dbb830a41ee84c9db8456e8d77a46d79b537efd7ec"}, - {file = "pydantic-1.10.18-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ef0fe7ad7cbdb5f372463d42e6ed4ca9c443a52ce544472d8842a0576d830da5"}, - {file = "pydantic-1.10.18-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a00e63104346145389b8e8f500bc6a241e729feaf0559b88b8aa513dd2065481"}, - {file = "pydantic-1.10.18-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae6fa2008e1443c46b7b3a5eb03800121868d5ab6bc7cda20b5df3e133cde8b3"}, - {file = "pydantic-1.10.18-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:9f463abafdc92635da4b38807f5b9972276be7c8c5121989768549fceb8d2588"}, - {file = "pydantic-1.10.18-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3445426da503c7e40baccefb2b2989a0c5ce6b163679dd75f55493b460f05a8f"}, - {file = "pydantic-1.10.18-cp311-cp311-win_amd64.whl", hash = "sha256:467a14ee2183bc9c902579bb2f04c3d3dac00eff52e252850509a562255b2a33"}, - {file = "pydantic-1.10.18-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:efbc8a7f9cb5fe26122acba1852d8dcd1e125e723727c59dcd244da7bdaa54f2"}, - {file = "pydantic-1.10.18-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:24a4a159d0f7a8e26bf6463b0d3d60871d6a52eac5bb6a07a7df85c806f4c048"}, - {file = "pydantic-1.10.18-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b74be007703547dc52e3c37344d130a7bfacca7df112a9e5ceeb840a9ce195c7"}, - {file = "pydantic-1.10.18-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fcb20d4cb355195c75000a49bb4a31d75e4295200df620f454bbc6bdf60ca890"}, - {file = "pydantic-1.10.18-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:46f379b8cb8a3585e3f61bf9ae7d606c70d133943f339d38b76e041ec234953f"}, - {file = "pydantic-1.10.18-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:cbfbca662ed3729204090c4d09ee4beeecc1a7ecba5a159a94b5a4eb24e3759a"}, - {file = "pydantic-1.10.18-cp312-cp312-win_amd64.whl", hash = "sha256:c6d0a9f9eccaf7f438671a64acf654ef0d045466e63f9f68a579e2383b63f357"}, - {file = "pydantic-1.10.18-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:80b982d42515632eb51f60fa1d217dfe0729f008e81a82d1544cc392e0a50ddf"}, - {file = "pydantic-1.10.18-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:aad8771ec8dbf9139b01b56f66386537c6fe4e76c8f7a47c10261b69ad25c2c9"}, - {file = "pydantic-1.10.18-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:941a2eb0a1509bd7f31e355912eb33b698eb0051730b2eaf9e70e2e1589cae1d"}, - {file = "pydantic-1.10.18-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:65f7361a09b07915a98efd17fdec23103307a54db2000bb92095457ca758d485"}, - {file = "pydantic-1.10.18-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:6951f3f47cb5ca4da536ab161ac0163cab31417d20c54c6de5ddcab8bc813c3f"}, - {file = "pydantic-1.10.18-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7a4c5eec138a9b52c67f664c7d51d4c7234c5ad65dd8aacd919fb47445a62c86"}, - {file = "pydantic-1.10.18-cp39-cp39-win_amd64.whl", hash = "sha256:49e26c51ca854286bffc22b69787a8d4063a62bf7d83dc21d44d2ff426108518"}, - {file = "pydantic-1.10.18-py3-none-any.whl", hash = "sha256:06a189b81ffc52746ec9c8c007f16e5167c8b0a696e1a726369327e3db7b2a82"}, - {file = "pydantic-1.10.18.tar.gz", hash = "sha256:baebdff1907d1d96a139c25136a9bb7d17e118f133a76a2ef3b845e831e3403a"}, +version = "2.9.2" +requires_python = ">=3.8" +summary = "Data validation using Python type hints" +groups = ["default"] +dependencies = [ + "annotated-types>=0.6.0", + "pydantic-core==2.23.4", + "typing-extensions>=4.12.2; python_version >= \"3.13\"", + "typing-extensions>=4.6.1; python_version < \"3.13\"", +] +files = [ + {file = "pydantic-2.9.2-py3-none-any.whl", hash = "sha256:f048cec7b26778210e28a0459867920654d48e5e62db0958433636cde4254f12"}, + {file = "pydantic-2.9.2.tar.gz", hash = "sha256:d155cef71265d1e9807ed1c32b4c8deec042a44a50a4188b25ac67ecd81a9c0f"}, +] + +[[package]] +name = "pydantic-core" +version = "2.23.4" +requires_python = ">=3.8" +summary = "Core functionality for Pydantic validation and serialization" +groups = ["default"] +dependencies = [ + "typing-extensions!=4.7.0,>=4.6.0", +] +files = [ + {file = "pydantic_core-2.23.4-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:b10bd51f823d891193d4717448fab065733958bdb6a6b351967bd349d48d5c9b"}, + {file = "pydantic_core-2.23.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4fc714bdbfb534f94034efaa6eadd74e5b93c8fa6315565a222f7b6f42ca1166"}, + {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63e46b3169866bd62849936de036f901a9356e36376079b05efa83caeaa02ceb"}, + {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed1a53de42fbe34853ba90513cea21673481cd81ed1be739f7f2efb931b24916"}, + {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cfdd16ab5e59fc31b5e906d1a3f666571abc367598e3e02c83403acabc092e07"}, + {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:255a8ef062cbf6674450e668482456abac99a5583bbafb73f9ad469540a3a232"}, + {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a7cd62e831afe623fbb7aabbb4fe583212115b3ef38a9f6b71869ba644624a2"}, + {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f09e2ff1f17c2b51f2bc76d1cc33da96298f0a036a137f5440ab3ec5360b624f"}, + {file = "pydantic_core-2.23.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e38e63e6f3d1cec5a27e0afe90a085af8b6806ee208b33030e65b6516353f1a3"}, + {file = "pydantic_core-2.23.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0dbd8dbed2085ed23b5c04afa29d8fd2771674223135dc9bc937f3c09284d071"}, + {file = "pydantic_core-2.23.4-cp310-none-win32.whl", hash = "sha256:6531b7ca5f951d663c339002e91aaebda765ec7d61b7d1e3991051906ddde119"}, + {file = "pydantic_core-2.23.4-cp310-none-win_amd64.whl", hash = "sha256:7c9129eb40958b3d4500fa2467e6a83356b3b61bfff1b414c7361d9220f9ae8f"}, + {file = "pydantic_core-2.23.4-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:77733e3892bb0a7fa797826361ce8a9184d25c8dffaec60b7ffe928153680ba8"}, + {file = "pydantic_core-2.23.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b84d168f6c48fabd1f2027a3d1bdfe62f92cade1fb273a5d68e621da0e44e6d"}, + {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df49e7a0861a8c36d089c1ed57d308623d60416dab2647a4a17fe050ba85de0e"}, + {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ff02b6d461a6de369f07ec15e465a88895f3223eb75073ffea56b84d9331f607"}, + {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:996a38a83508c54c78a5f41456b0103c30508fed9abcad0a59b876d7398f25fd"}, + {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d97683ddee4723ae8c95d1eddac7c192e8c552da0c73a925a89fa8649bf13eea"}, + {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:216f9b2d7713eb98cb83c80b9c794de1f6b7e3145eef40400c62e86cee5f4e1e"}, + {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6f783e0ec4803c787bcea93e13e9932edab72068f68ecffdf86a99fd5918878b"}, + {file = "pydantic_core-2.23.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d0776dea117cf5272382634bd2a5c1b6eb16767c223c6a5317cd3e2a757c61a0"}, + {file = "pydantic_core-2.23.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d5f7a395a8cf1621939692dba2a6b6a830efa6b3cee787d82c7de1ad2930de64"}, + {file = "pydantic_core-2.23.4-cp311-none-win32.whl", hash = "sha256:74b9127ffea03643e998e0c5ad9bd3811d3dac8c676e47db17b0ee7c3c3bf35f"}, + {file = "pydantic_core-2.23.4-cp311-none-win_amd64.whl", hash = "sha256:98d134c954828488b153d88ba1f34e14259284f256180ce659e8d83e9c05eaa3"}, + {file = "pydantic_core-2.23.4-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f3e0da4ebaef65158d4dfd7d3678aad692f7666877df0002b8a522cdf088f231"}, + {file = "pydantic_core-2.23.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f69a8e0b033b747bb3e36a44e7732f0c99f7edd5cea723d45bc0d6e95377ffee"}, + {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:723314c1d51722ab28bfcd5240d858512ffd3116449c557a1336cbe3919beb87"}, + {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bb2802e667b7051a1bebbfe93684841cc9351004e2badbd6411bf357ab8d5ac8"}, + {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d18ca8148bebe1b0a382a27a8ee60350091a6ddaf475fa05ef50dc35b5df6327"}, + {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:33e3d65a85a2a4a0dc3b092b938a4062b1a05f3a9abde65ea93b233bca0e03f2"}, + {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:128585782e5bfa515c590ccee4b727fb76925dd04a98864182b22e89a4e6ed36"}, + {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:68665f4c17edcceecc112dfed5dbe6f92261fb9d6054b47d01bf6371a6196126"}, + {file = "pydantic_core-2.23.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:20152074317d9bed6b7a95ade3b7d6054845d70584216160860425f4fbd5ee9e"}, + {file = "pydantic_core-2.23.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9261d3ce84fa1d38ed649c3638feefeae23d32ba9182963e465d58d62203bd24"}, + {file = "pydantic_core-2.23.4-cp312-none-win32.whl", hash = "sha256:4ba762ed58e8d68657fc1281e9bb72e1c3e79cc5d464be146e260c541ec12d84"}, + {file = "pydantic_core-2.23.4-cp312-none-win_amd64.whl", hash = "sha256:97df63000f4fea395b2824da80e169731088656d1818a11b95f3b173747b6cd9"}, + {file = "pydantic_core-2.23.4-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:7530e201d10d7d14abce4fb54cfe5b94a0aefc87da539d0346a484ead376c3cc"}, + {file = "pydantic_core-2.23.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:df933278128ea1cd77772673c73954e53a1c95a4fdf41eef97c2b779271bd0bd"}, + {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cb3da3fd1b6a5d0279a01877713dbda118a2a4fc6f0d821a57da2e464793f05"}, + {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:42c6dcb030aefb668a2b7009c85b27f90e51e6a3b4d5c9bc4c57631292015b0d"}, + {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:696dd8d674d6ce621ab9d45b205df149399e4bb9aa34102c970b721554828510"}, + {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2971bb5ffe72cc0f555c13e19b23c85b654dd2a8f7ab493c262071377bfce9f6"}, + {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8394d940e5d400d04cad4f75c0598665cbb81aecefaca82ca85bd28264af7f9b"}, + {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0dff76e0602ca7d4cdaacc1ac4c005e0ce0dcfe095d5b5259163a80d3a10d327"}, + {file = "pydantic_core-2.23.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:7d32706badfe136888bdea71c0def994644e09fff0bfe47441deaed8e96fdbc6"}, + {file = "pydantic_core-2.23.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ed541d70698978a20eb63d8c5d72f2cc6d7079d9d90f6b50bad07826f1320f5f"}, + {file = "pydantic_core-2.23.4-cp313-none-win32.whl", hash = "sha256:3d5639516376dce1940ea36edf408c554475369f5da2abd45d44621cb616f769"}, + {file = "pydantic_core-2.23.4-cp313-none-win_amd64.whl", hash = "sha256:5a1504ad17ba4210df3a045132a7baeeba5a200e930f57512ee02909fc5c4cb5"}, + {file = "pydantic_core-2.23.4-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:a4fa4fc04dff799089689f4fd502ce7d59de529fc2f40a2c8836886c03e0175a"}, + {file = "pydantic_core-2.23.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0a7df63886be5e270da67e0966cf4afbae86069501d35c8c1b3b6c168f42cb36"}, + {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dcedcd19a557e182628afa1d553c3895a9f825b936415d0dbd3cd0bbcfd29b4b"}, + {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5f54b118ce5de9ac21c363d9b3caa6c800341e8c47a508787e5868c6b79c9323"}, + {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86d2f57d3e1379a9525c5ab067b27dbb8a0642fb5d454e17a9ac434f9ce523e3"}, + {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:de6d1d1b9e5101508cb37ab0d972357cac5235f5c6533d1071964c47139257df"}, + {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1278e0d324f6908e872730c9102b0112477a7f7cf88b308e4fc36ce1bdb6d58c"}, + {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9a6b5099eeec78827553827f4c6b8615978bb4b6a88e5d9b93eddf8bb6790f55"}, + {file = "pydantic_core-2.23.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:e55541f756f9b3ee346b840103f32779c695a19826a4c442b7954550a0972040"}, + {file = "pydantic_core-2.23.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a5c7ba8ffb6d6f8f2ab08743be203654bb1aaa8c9dcb09f82ddd34eadb695605"}, + {file = "pydantic_core-2.23.4-cp39-none-win32.whl", hash = "sha256:37b0fe330e4a58d3c58b24d91d1eb102aeec675a3db4c292ec3928ecd892a9a6"}, + {file = "pydantic_core-2.23.4-cp39-none-win_amd64.whl", hash = "sha256:1498bec4c05c9c787bde9125cfdcc63a41004ff167f495063191b863399b1a29"}, + {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f455ee30a9d61d3e1a15abd5068827773d6e4dc513e795f380cdd59932c782d5"}, + {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:1e90d2e3bd2c3863d48525d297cd143fe541be8bbf6f579504b9712cb6b643ec"}, + {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e203fdf807ac7e12ab59ca2bfcabb38c7cf0b33c41efeb00f8e5da1d86af480"}, + {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e08277a400de01bc72436a0ccd02bdf596631411f592ad985dcee21445bd0068"}, + {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f220b0eea5965dec25480b6333c788fb72ce5f9129e8759ef876a1d805d00801"}, + {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:d06b0c8da4f16d1d1e352134427cb194a0a6e19ad5db9161bf32b2113409e728"}, + {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:ba1a0996f6c2773bd83e63f18914c1de3c9dd26d55f4ac302a7efe93fb8e7433"}, + {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:9a5bce9d23aac8f0cf0836ecfc033896aa8443b501c58d0602dbfd5bd5b37753"}, + {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:78ddaaa81421a29574a682b3179d4cf9e6d405a09b99d93ddcf7e5239c742e21"}, + {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:883a91b5dd7d26492ff2f04f40fbb652de40fcc0afe07e8129e8ae779c2110eb"}, + {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88ad334a15b32a791ea935af224b9de1bf99bcd62fabf745d5f3442199d86d59"}, + {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:233710f069d251feb12a56da21e14cca67994eab08362207785cf8c598e74577"}, + {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:19442362866a753485ba5e4be408964644dd6a09123d9416c54cd49171f50744"}, + {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:624e278a7d29b6445e4e813af92af37820fafb6dcc55c012c834f9e26f9aaaef"}, + {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f5ef8f42bec47f21d07668a043f077d507e5bf4e668d5c6dfe6aaba89de1a5b8"}, + {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:aea443fffa9fbe3af1a9ba721a87f926fe548d32cab71d188a6ede77d0ff244e"}, + {file = "pydantic_core-2.23.4.tar.gz", hash = "sha256:2584f7cf844ac4d970fba483a717dbe10c1c1c96a969bf65d61ffe94df1b2863"}, +] + +[[package]] +name = "pydantic-settings" +version = "2.6.1" +requires_python = ">=3.8" +summary = "Settings management using Pydantic" +groups = ["default"] +dependencies = [ + "pydantic>=2.7.0", + "python-dotenv>=0.21.0", +] +files = [ + {file = "pydantic_settings-2.6.1-py3-none-any.whl", hash = "sha256:7fb0637c786a558d3103436278a7c4f1cfd29ba8973238a50c5bb9a55387da87"}, + {file = "pydantic_settings-2.6.1.tar.gz", hash = "sha256:e0f92546d8a9923cb8941689abf85d6601a8c19a23e97a34b2964a2e3f813ca0"}, ] [[package]] diff --git a/argilla-server/pyproject.toml b/argilla-server/pyproject.toml index 4e0e0adaab..590fee1232 100644 --- a/argilla-server/pyproject.toml +++ b/argilla-server/pyproject.toml @@ -21,7 +21,8 @@ maintainers = [{ name = "argilla", email = "contact@argilla.io" }] dependencies = [ # Basic dependencies "fastapi ~= 0.115.0", - "pydantic ~= 1.10.18", + "pydantic ~= 2.9.0", + "pydantic-settings ~= 2.6.0", "uvicorn[standard] ~= 0.32.0", "opensearch-py ~= 2.0.0", "elasticsearch8[async] ~= 8.7.0", diff --git a/argilla-server/src/argilla_server/__init__.py b/argilla-server/src/argilla_server/__init__.py index cf4c6e98b8..2d943a2cd7 100644 --- a/argilla-server/src/argilla_server/__init__.py +++ b/argilla-server/src/argilla_server/__init__.py @@ -12,12 +12,5 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# Remove me -import warnings -from argilla_server.pydantic_v1 import PYDANTIC_MAJOR_VERSION - -if PYDANTIC_MAJOR_VERSION >= 2: - warnings.warn("The argilla_server package is not compatible with Pydantic 2. " "Please use Pydantic 1.x instead.") -else: - from argilla_server._app import app # noqa +from argilla_server._app import app # noqa diff --git a/argilla-server/src/argilla_server/api/handlers/v1/datasets/datasets.py b/argilla-server/src/argilla_server/api/handlers/v1/datasets/datasets.py index 75ffda240c..55a563c738 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/datasets/datasets.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/datasets/datasets.py @@ -50,7 +50,6 @@ get_search_engine, ) from argilla_server.security import auth -from argilla_server.telemetry import TelemetryClient, get_telemetry_client router = APIRouter() @@ -203,7 +202,7 @@ async def create_dataset( ): await authorize(current_user, DatasetPolicy.create(dataset_create.workspace_id)) - return await datasets.create_dataset(db, dataset_create.dict()) + return await datasets.create_dataset(db, dataset_create.model_dump()) @router.post("/datasets/{dataset_id}/fields", status_code=status.HTTP_201_CREATED, response_model=Field) @@ -310,7 +309,7 @@ async def update_dataset( await authorize(current_user, DatasetPolicy.update(dataset)) - return await datasets.update_dataset(db, dataset, dataset_update.dict(exclude_unset=True)) + return await datasets.update_dataset(db, dataset, dataset_update.model_dump(exclude_unset=True)) @router.post("/datasets/{dataset_id}/import", status_code=status.HTTP_202_ACCEPTED, response_model=JobSchema) @@ -330,7 +329,7 @@ async def import_dataset_from_hub( subset=hub_dataset.subset, split=hub_dataset.split, dataset_id=dataset.id, - mapping=hub_dataset.mapping.dict(), + mapping=hub_dataset.mapping.model_dump(), ) return JobSchema(id=job.id, status=job.get_status()) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/datasets/records.py b/argilla-server/src/argilla_server/api/handlers/v1/datasets/records.py index 240952fa49..90baf9d85a 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/datasets/records.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/datasets/records.py @@ -326,7 +326,7 @@ async def search_current_user_dataset_records( record.metadata_ = await _filter_record_metadata_for_user(record, current_user) record_id_score_map[record.id]["search_record"] = SearchRecord( - record=RecordSchema.from_orm(record), + record=RecordSchema.model_validate(record), query_score=record_id_score_map[record.id]["query_score"], ) @@ -382,7 +382,7 @@ async def search_dataset_records( for record in records: record_id_score_map[record.id]["search_record"] = SearchRecord( - record=RecordSchema.from_orm(record), + record=RecordSchema.model_validate(record), query_score=record_id_score_map[record.id]["query_score"], ) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/oauth2.py b/argilla-server/src/argilla_server/api/handlers/v1/oauth2.py index 5f34c57072..6257da9601 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/oauth2.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/oauth2.py @@ -17,7 +17,6 @@ from fastapi.responses import RedirectResponse from sqlalchemy.ext.asyncio import AsyncSession -from argilla_server import telemetry from argilla_server.api.schemas.v1.oauth2 import Provider, Providers, Token from argilla_server.api.schemas.v1.users import UserCreate from argilla_server.contexts import accounts @@ -25,7 +24,7 @@ from argilla_server.enums import UserRole from argilla_server.errors.future import NotFoundError from argilla_server.models import User -from argilla_server.pydantic_v1 import Field +from pydantic import Field from argilla_server.security.authentication.oauth2 import OAuth2ClientProvider from argilla_server.security.authentication.userinfo import UserInfo from argilla_server.security.settings import settings @@ -86,7 +85,7 @@ async def get_access_token( username=userinfo.username, first_name=userinfo.first_name, role=userinfo.role, - ).dict(exclude_unset=True), + ).model_dump(exclude_unset=True), workspaces=[workspace.name for workspace in settings.oauth.allowed_workspaces], ) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/users.py b/argilla-server/src/argilla_server/api/handlers/v1/users.py index 0ee6a23ee2..7548b54520 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/users.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/users.py @@ -70,7 +70,7 @@ async def create_user( ): await authorize(current_user, UserPolicy.create) - user = await accounts.create_user(db, user_create.dict()) + user = await accounts.create_user(db, user_create.model_dump()) return user diff --git a/argilla-server/src/argilla_server/api/handlers/v1/webhooks.py b/argilla-server/src/argilla_server/api/handlers/v1/webhooks.py index 54513dbc04..11ca71554c 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/webhooks.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/webhooks.py @@ -53,7 +53,7 @@ async def create_webhook( ): await authorize(current_user, WebhookPolicy.create) - return await webhooks.create_webhook(db, webhook_create.dict()) + return await webhooks.create_webhook(db, webhook_create.model_dump()) @router.patch("/webhooks/{webhook_id}", response_model=WebhookSchema) @@ -68,7 +68,7 @@ async def update_webhook( await authorize(current_user, WebhookPolicy.update) - return await webhooks.update_webhook(db, webhook, webhook_update.dict(exclude_unset=True)) + return await webhooks.update_webhook(db, webhook, webhook_update.model_dump(exclude_unset=True)) @router.delete("/webhooks/{webhook_id}", response_model=WebhookSchema) diff --git a/argilla-server/src/argilla_server/api/handlers/v1/workspaces.py b/argilla-server/src/argilla_server/api/handlers/v1/workspaces.py index 1636998ea4..db56218c83 100644 --- a/argilla-server/src/argilla_server/api/handlers/v1/workspaces.py +++ b/argilla-server/src/argilla_server/api/handlers/v1/workspaces.py @@ -58,7 +58,7 @@ async def create_workspace( ): await authorize(current_user, WorkspacePolicy.create) - return await accounts.create_workspace(db, workspace_create.dict()) + return await accounts.create_workspace(db, workspace_create.model_dump()) @router.delete("/workspaces/{workspace_id}", response_model=WorkspaceSchema) diff --git a/argilla-server/src/argilla_server/api/schemas/v1/chat.py b/argilla-server/src/argilla_server/api/schemas/v1/chat.py index 25d708edd8..11f5184ac1 100644 --- a/argilla-server/src/argilla_server/api/schemas/v1/chat.py +++ b/argilla-server/src/argilla_server/api/schemas/v1/chat.py @@ -12,16 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -from argilla_server.pydantic_v1 import BaseModel, Field +from pydantic import BaseModel, Field MIN_MESSAGE_LENGTH = 1 MAX_MESSAGE_LENGTH = 20000 MIN_ROLE_LENGTH = 1 MAX_ROLE_LENGTH = 20 -MAX_ROLE_REGEX = r"^\S+$" class ChatFieldValue(BaseModel): - role: str = Field(..., min_role_length=MIN_ROLE_LENGTH, max_length=MAX_ROLE_LENGTH, regex=MAX_ROLE_REGEX) - content: str = Field(..., min_message_length=MIN_MESSAGE_LENGTH, max_length=MAX_MESSAGE_LENGTH) + role: str = Field(..., min_length=MIN_ROLE_LENGTH, max_length=MAX_ROLE_LENGTH) + content: str = Field(..., min_length=MIN_MESSAGE_LENGTH, max_length=MAX_MESSAGE_LENGTH) diff --git a/argilla-server/src/argilla_server/api/schemas/v1/commons.py b/argilla-server/src/argilla_server/api/schemas/v1/commons.py index b8c6aa7f92..18c6fc54fc 100644 --- a/argilla-server/src/argilla_server/api/schemas/v1/commons.py +++ b/argilla-server/src/argilla_server/api/schemas/v1/commons.py @@ -14,7 +14,7 @@ from typing import Any, Dict, Set, Union -from argilla_server.pydantic_v1 import BaseModel, root_validator +from pydantic import BaseModel, model_validator class UpdateSchema(BaseModel): @@ -25,17 +25,18 @@ class UpdateSchema(BaseModel): __non_nullable_fields__: Union[Set[str], None] = None - @root_validator(pre=True) - def validate_non_nullable_fields(cls, values: Dict[str, Any]) -> Dict[str, Any]: + @model_validator(mode="before") + @classmethod + def validate_non_nullable_fields(cls, data: Dict[str, Any]) -> Dict[str, Any]: if cls.__non_nullable_fields__ is None: - return values + return data invalid_keys = [] for key in cls.__non_nullable_fields__: - if key in values and values[key] is None: + if key in data and data[key] is None: invalid_keys.append(key) if invalid_keys: raise ValueError(f"The following keys must have non-null values: {', '.join(invalid_keys)}") - return values + return data diff --git a/argilla-server/src/argilla_server/api/schemas/v1/datasets.py b/argilla-server/src/argilla_server/api/schemas/v1/datasets.py index 60272dc331..32c7148750 100644 --- a/argilla-server/src/argilla_server/api/schemas/v1/datasets.py +++ b/argilla-server/src/argilla_server/api/schemas/v1/datasets.py @@ -13,13 +13,14 @@ # limitations under the License. from datetime import datetime -from typing import List, Literal, Optional, Union, Dict, Any +from typing import List, Literal, Optional, Dict, Any from uuid import UUID +from pydantic.v1.utils import GetterDict + from argilla_server.api.schemas.v1.commons import UpdateSchema from argilla_server.enums import DatasetDistributionStrategy, DatasetStatus -from argilla_server.pydantic_v1 import BaseModel, Field, constr -from argilla_server.pydantic_v1.utils import GetterDict +from pydantic import BaseModel, Field, constr, ConfigDict, model_validator try: from typing import Annotated @@ -106,7 +107,7 @@ class UsersProgress(BaseModel): class DatasetGetterDict(GetterDict): - def get(self, key: str, default: Any) -> Any: + def get(self, key: Any, default: Any = None) -> Any: if key == "metadata": return getattr(self._obj, "metadata_", None) @@ -116,19 +117,28 @@ def get(self, key: str, default: Any) -> Any: class Dataset(BaseModel): id: UUID name: str - guidelines: Optional[str] + guidelines: Optional[str] = None allow_extra_metadata: bool status: DatasetStatus distribution: DatasetDistribution - metadata: Optional[Dict[str, Any]] + metadata: Optional[Dict[str, Any]] = None workspace_id: UUID last_activity_at: datetime inserted_at: datetime updated_at: datetime - class Config: - orm_mode = True - getter_dict = DatasetGetterDict + model_config = ConfigDict(from_attributes=True) + + @model_validator(mode="before") + @classmethod + def validate(cls, value) -> dict: + getter = DatasetGetterDict(value) + + data = {} + for field in cls.model_fields: + data[field] = getter.get(field) + + return data class Datasets(BaseModel): @@ -137,7 +147,7 @@ class Datasets(BaseModel): class DatasetCreate(BaseModel): name: DatasetName - guidelines: Optional[DatasetGuidelines] + guidelines: Optional[DatasetGuidelines] = None allow_extra_metadata: bool = True distribution: DatasetDistributionCreate = DatasetOverlapDistributionCreate( strategy=DatasetDistributionStrategy.overlap, @@ -148,10 +158,10 @@ class DatasetCreate(BaseModel): class DatasetUpdate(UpdateSchema): - name: Optional[DatasetName] - guidelines: Optional[DatasetGuidelines] - allow_extra_metadata: Optional[bool] - distribution: Optional[DatasetDistributionUpdate] + name: Optional[DatasetName] = None + guidelines: Optional[DatasetGuidelines] = None + allow_extra_metadata: Optional[bool] = None + distribution: Optional[DatasetDistributionUpdate] = None metadata_: Optional[Dict[str, Any]] = Field(None, alias="metadata") __non_nullable_fields__ = {"name", "allow_extra_metadata", "distribution"} @@ -163,7 +173,7 @@ class HubDatasetMappingItem(BaseModel): class HubDatasetMapping(BaseModel): - fields: List[HubDatasetMappingItem] = Field(..., min_items=1) + fields: List[HubDatasetMappingItem] = Field(..., min_length=1) metadata: Optional[List[HubDatasetMappingItem]] = [] suggestions: Optional[List[HubDatasetMappingItem]] = [] external_id: Optional[str] = None diff --git a/argilla-server/src/argilla_server/api/schemas/v1/fields.py b/argilla-server/src/argilla_server/api/schemas/v1/fields.py index ecc7b4eb33..fd72a25f5a 100644 --- a/argilla-server/src/argilla_server/api/schemas/v1/fields.py +++ b/argilla-server/src/argilla_server/api/schemas/v1/fields.py @@ -18,8 +18,7 @@ from argilla_server.api.schemas.v1.commons import UpdateSchema from argilla_server.enums import FieldType -from argilla_server.pydantic_v1 import BaseModel, constr -from argilla_server.pydantic_v1 import Field as PydanticField +from pydantic import BaseModel, constr, Field as PydanticField, ConfigDict FIELD_CREATE_NAME_MIN_LENGTH = 1 FIELD_CREATE_NAME_MAX_LENGTH = 200 @@ -145,8 +144,7 @@ class Field(BaseModel): inserted_at: datetime updated_at: datetime - class Config: - orm_mode = True + model_config = ConfigDict(from_attributes=True) class Fields(BaseModel): @@ -156,12 +154,12 @@ class Fields(BaseModel): class FieldCreate(BaseModel): name: FieldName title: FieldTitle - required: Optional[bool] + required: Optional[bool] = None settings: FieldSettingsCreate class FieldUpdate(UpdateSchema): - title: Optional[FieldTitle] - settings: Optional[FieldSettingsUpdate] + title: Optional[FieldTitle] = None + settings: Optional[FieldSettingsUpdate] = None __non_nullable_fields__ = {"title", "settings"} diff --git a/argilla-server/src/argilla_server/api/schemas/v1/info.py b/argilla-server/src/argilla_server/api/schemas/v1/info.py index 8ec7da13ce..307955ebde 100644 --- a/argilla-server/src/argilla_server/api/schemas/v1/info.py +++ b/argilla-server/src/argilla_server/api/schemas/v1/info.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from argilla_server.pydantic_v1 import BaseModel +from pydantic import BaseModel class Version(BaseModel): diff --git a/argilla-server/src/argilla_server/api/schemas/v1/metadata_properties.py b/argilla-server/src/argilla_server/api/schemas/v1/metadata_properties.py index 55397a7a30..bbb26bf1f3 100644 --- a/argilla-server/src/argilla_server/api/schemas/v1/metadata_properties.py +++ b/argilla-server/src/argilla_server/api/schemas/v1/metadata_properties.py @@ -18,8 +18,7 @@ from argilla_server.api.schemas.v1.commons import UpdateSchema from argilla_server.enums import MetadataPropertyType -from argilla_server.pydantic_v1 import BaseModel, Field, constr, root_validator, validator -from argilla_server.pydantic_v1.generics import GenericModel +from pydantic import BaseModel, Field, constr, ConfigDict, field_validator, model_validator FLOAT_METADATA_METRICS_PRECISION = 5 @@ -43,7 +42,7 @@ class TermCount(BaseModel): term: Any count: int - type: Literal[MetadataPropertyType.terms] = Field(MetadataPropertyType.terms, const=True) + type: Literal[MetadataPropertyType.terms] = MetadataPropertyType.terms total: int values: List[TermCount] = Field(default_factory=list) @@ -51,20 +50,21 @@ class TermCount(BaseModel): NT = TypeVar("NT", int, float) -class NumericMetadataMetrics(GenericModel, Generic[NT]): - min: Optional[NT] - max: Optional[NT] +class NumericMetadataMetrics(BaseModel, Generic[NT]): + min: Optional[NT] = None + max: Optional[NT] = None class IntegerMetadataMetrics(NumericMetadataMetrics[int]): - type: Literal[MetadataPropertyType.integer] = Field(MetadataPropertyType.integer, const=True) + type: Literal[MetadataPropertyType.integer] = MetadataPropertyType.integer class FloatMetadataMetrics(NumericMetadataMetrics[float]): - type: Literal[MetadataPropertyType.float] = Field(MetadataPropertyType.float, const=True) + type: Literal[MetadataPropertyType.float] = MetadataPropertyType.float - @validator("min", "max") - def round_result(cls, v: float): + @field_validator("min", "max") + @classmethod + def round_result(cls, v: Optional[float]) -> Optional[float]: if v is not None: return round(v, FLOAT_METADATA_METRICS_PRECISION) return v @@ -117,25 +117,28 @@ class FloatMetadataProperty(BaseModel): ] -class NumericMetadataProperty(GenericModel, Generic[NT]): +class NumericMetadataProperty(BaseModel, Generic[NT]): min: Optional[NT] = None max: Optional[NT] = None - @root_validator(skip_on_failure=True) - def check_bounds(cls, values: Dict[str, Any]) -> Dict[str, Any]: - min = values.get("min") - max = values.get("max") + @model_validator(mode="after") + @classmethod + def check_bounds(cls, instance: "NumericMetadataProperty") -> "NumericMetadataProperty": + min = instance.min + max = instance.max if min is not None and max is not None and min >= max: raise ValueError(f"'min' ({min}) must be lower than 'max' ({max})") - return values + return instance class TermsMetadataPropertyCreate(BaseModel): type: Literal[MetadataPropertyType.terms] values: Optional[List[Any]] = Field( - None, min_items=TERMS_METADATA_PROPERTY_VALUES_MIN_ITEMS, max_items=TERMS_METADATA_PROPERTY_VALUES_MAX_ITEMS + None, + min_length=TERMS_METADATA_PROPERTY_VALUES_MIN_ITEMS, + max_length=TERMS_METADATA_PROPERTY_VALUES_MAX_ITEMS, ) @@ -163,8 +166,7 @@ class MetadataProperty(BaseModel): inserted_at: datetime updated_at: datetime - class Config: - orm_mode = True + model_config = ConfigDict(from_attributes=True) class MetadataProperties(BaseModel): @@ -179,7 +181,7 @@ class MetadataPropertyCreate(BaseModel): class MetadataPropertyUpdate(UpdateSchema): - title: Optional[MetadataPropertyTitle] - visible_for_annotators: Optional[bool] + title: Optional[MetadataPropertyTitle] = None + visible_for_annotators: Optional[bool] = None __non_nullable_fields__ = {"title", "visible_for_annotators"} diff --git a/argilla-server/src/argilla_server/api/schemas/v1/questions.py b/argilla-server/src/argilla_server/api/schemas/v1/questions.py index 02e0ff3189..90755c947a 100644 --- a/argilla-server/src/argilla_server/api/schemas/v1/questions.py +++ b/argilla-server/src/argilla_server/api/schemas/v1/questions.py @@ -19,7 +19,7 @@ from argilla_server.api.schemas.v1.commons import UpdateSchema from argilla_server.api.schemas.v1.fields import FieldName from argilla_server.enums import OptionsOrder, QuestionType -from argilla_server.pydantic_v1 import BaseModel, Field, conlist, constr, root_validator +from pydantic import BaseModel, Field, conlist, constr, root_validator, ConfigDict, model_validator from argilla_server.settings import settings try: @@ -27,7 +27,6 @@ except ImportError: from typing_extensions import Annotated - QUESTION_CREATE_NAME_MIN_LENGTH = 1 QUESTION_CREATE_NAME_MAX_LENGTH = 200 @@ -60,9 +59,10 @@ class UniqueValuesCheckerMixin(BaseModel): - @root_validator(skip_on_failure=True) - def check_unique_values(cls, values: Dict[str, Any]) -> Dict[str, Any]: - options = values.get("options", []) + @model_validator(mode="after") + @classmethod + def check_unique_values(cls, instance: "UniqueValuesCheckerMixin") -> "UniqueValuesCheckerMixin": + options = instance.options or [] seen = set() duplicates = set() for option in options: @@ -72,7 +72,7 @@ def check_unique_values(cls, values: Dict[str, Any]) -> Dict[str, Any]: seen.add(option.value) if duplicates: raise ValueError(f"Option values must be unique, found duplicates: {duplicates}") - return values + return instance # Option-based settings @@ -112,7 +112,7 @@ class TextQuestionSettingsCreate(BaseModel): class TextQuestionSettingsUpdate(UpdateSchema): type: Literal[QuestionType.text] - use_markdown: Optional[bool] + use_markdown: Optional[bool] = None __non_nullable_fields__ = {"use_markdown"} @@ -134,8 +134,8 @@ class RatingQuestionSettings(BaseModel): class RatingQuestionSettingsCreate(UniqueValuesCheckerMixin): type: Literal[QuestionType.rating] options: List[RatingQuestionSettingsOptionCreate] = Field( - min_items=RATING_OPTIONS_MIN_ITEMS, - max_items=RATING_OPTIONS_MAX_ITEMS, + min_length=RATING_OPTIONS_MIN_ITEMS, + max_length=RATING_OPTIONS_MAX_ITEMS, ) @@ -154,23 +154,26 @@ class LabelSelectionQuestionSettingsCreate(UniqueValuesCheckerMixin): type: Literal[QuestionType.label_selection] options: conlist( item_type=OptionSettingsCreate, - min_items=LABEL_SELECTION_OPTIONS_MIN_ITEMS, - max_items=settings.label_selection_options_max_items, + min_length=LABEL_SELECTION_OPTIONS_MIN_ITEMS, + max_length=settings.label_selection_options_max_items, ) visible_options: Optional[int] = Field(None, ge=LABEL_SELECTION_MIN_VISIBLE_OPTIONS) - @root_validator(skip_on_failure=True) - def check_visible_options_value(cls, values: Dict[str, Any]) -> Dict[str, Any]: - visible_options = values.get("visible_options") + @model_validator(mode="after") + @classmethod + def check_visible_options_value( + cls, instance: "LabelSelectionQuestionSettingsCreate" + ) -> "LabelSelectionQuestionSettingsCreate": + visible_options = instance.visible_options if visible_options is not None: - num_options = len(values["options"]) + num_options = len(instance.options) if visible_options > num_options: raise ValueError( "the value for 'visible_options' must be less or equal to the number of items in 'options'" f" ({num_options})" ) - return values + return instance class LabelSelectionSettingsUpdate(UpdateSchema): @@ -179,10 +182,10 @@ class LabelSelectionSettingsUpdate(UpdateSchema): options: Optional[ conlist( item_type=OptionSettings, - min_items=LABEL_SELECTION_OPTIONS_MIN_ITEMS, - max_items=settings.label_selection_options_max_items, + min_length=LABEL_SELECTION_OPTIONS_MIN_ITEMS, + max_length=settings.label_selection_options_max_items, ) - ] + ] = None # Multi-label selection question @@ -198,7 +201,7 @@ class MultiLabelSelectionQuestionSettingsCreate(LabelSelectionQuestionSettingsCr class MultiLabelSelectionQuestionSettingsUpdate(LabelSelectionSettingsUpdate): type: Literal[QuestionType.multi_label_selection] - options_order: Optional[OptionsOrder] + options_order: Optional[OptionsOrder] = None __non_nullable_fields__ = {"options_order"} @@ -213,8 +216,8 @@ class RankingQuestionSettingsCreate(UniqueValuesCheckerMixin): type: Literal[QuestionType.ranking] options: conlist( item_type=OptionSettingsCreate, - min_items=RANKING_OPTIONS_MIN_ITEMS, - max_items=RANKING_OPTIONS_MAX_ITEMS, + min_length=RANKING_OPTIONS_MIN_ITEMS, + max_length=RANKING_OPTIONS_MAX_ITEMS, ) @@ -238,24 +241,25 @@ class SpanQuestionSettingsCreate(UniqueValuesCheckerMixin): field: FieldName options: conlist( item_type=OptionSettingsCreate, - min_items=SPAN_OPTIONS_MIN_ITEMS, - max_items=settings.span_options_max_items, + min_length=SPAN_OPTIONS_MIN_ITEMS, + max_length=settings.span_options_max_items, ) visible_options: Optional[int] = Field(None, ge=SPAN_MIN_VISIBLE_OPTIONS) allow_overlapping: bool = False - @root_validator(skip_on_failure=True) - def check_visible_options_value(cls, values: Dict[str, Any]) -> Dict[str, Any]: - visible_options = values.get("visible_options") + @model_validator(mode="after") + @classmethod + def check_visible_options_value(cls, instance: "SpanQuestionSettingsCreate") -> "SpanQuestionSettingsCreate": + visible_options = instance.visible_options if visible_options is not None: - num_options = len(values["options"]) + num_options = len(instance.options) if visible_options > num_options: raise ValueError( "the value for 'visible_options' must be less or equal to the number of items in 'options'" f" ({num_options})" ) - return values + return instance class SpanQuestionSettingsUpdate(UpdateSchema): @@ -263,12 +267,12 @@ class SpanQuestionSettingsUpdate(UpdateSchema): options: Optional[ conlist( item_type=OptionSettings, - min_items=SPAN_OPTIONS_MIN_ITEMS, - max_items=settings.span_options_max_items, + min_length=SPAN_OPTIONS_MIN_ITEMS, + max_length=settings.span_options_max_items, ) - ] + ] = None visible_options: Optional[int] = Field(None, ge=SPAN_MIN_VISIBLE_OPTIONS) - allow_overlapping: Optional[bool] + allow_overlapping: Optional[bool] = None QuestionSettings = Annotated[ @@ -283,7 +287,6 @@ class SpanQuestionSettingsUpdate(UpdateSchema): Field(..., discriminator="type"), ] - QuestionName = Annotated[ constr( min_length=QUESTION_CREATE_NAME_MIN_LENGTH, @@ -292,7 +295,6 @@ class SpanQuestionSettingsUpdate(UpdateSchema): Field(..., description="The name of the question"), ] - QuestionTitle = Annotated[ constr( min_length=QUESTION_CREATE_TITLE_MIN_LENGTH, @@ -301,7 +303,6 @@ class SpanQuestionSettingsUpdate(UpdateSchema): Field(..., description="The title of the question"), ] - QuestionDescription = Annotated[ constr( min_length=QUESTION_CREATE_DESCRIPTION_MIN_LENGTH, @@ -310,7 +311,6 @@ class SpanQuestionSettingsUpdate(UpdateSchema): Field(..., description="The description of the question"), ] - QuestionSettingsCreate = Annotated[ Union[ TextQuestionSettingsCreate, @@ -323,7 +323,6 @@ class SpanQuestionSettingsUpdate(UpdateSchema): Field(discriminator="type"), ] - QuestionSettingsUpdate = Annotated[ Union[ TextQuestionSettingsUpdate, @@ -341,15 +340,14 @@ class Question(BaseModel): id: UUID name: str title: str - description: Optional[str] + description: Optional[str] = None required: bool settings: QuestionSettings dataset_id: UUID inserted_at: datetime updated_at: datetime - class Config: - orm_mode = True + model_config = ConfigDict(from_attributes=True) class Questions(BaseModel): @@ -359,14 +357,14 @@ class Questions(BaseModel): class QuestionCreate(BaseModel): name: QuestionName title: QuestionTitle - description: Optional[QuestionDescription] - required: Optional[bool] + description: Optional[QuestionDescription] = None + required: Optional[bool] = None settings: QuestionSettingsCreate class QuestionUpdate(UpdateSchema): - title: Optional[QuestionTitle] - description: Optional[QuestionDescription] - settings: Optional[QuestionSettingsUpdate] + title: Optional[QuestionTitle] = None + description: Optional[QuestionDescription] = None + settings: Optional[QuestionSettingsUpdate] = None __non_nullable_fields__ = {"title", "settings"} diff --git a/argilla-server/src/argilla_server/api/schemas/v1/records.py b/argilla-server/src/argilla_server/api/schemas/v1/records.py index 65967a75dd..28ca12e455 100644 --- a/argilla-server/src/argilla_server/api/schemas/v1/records.py +++ b/argilla-server/src/argilla_server/api/schemas/v1/records.py @@ -11,20 +11,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - from datetime import datetime from typing import Annotated, Any, Dict, List, Literal, Optional, Union from uuid import UUID - from argilla_server.api.schemas.v1.chat import ChatFieldValue from argilla_server.api.schemas.v1.commons import UpdateSchema from argilla_server.api.schemas.v1.metadata_properties import MetadataPropertyName from argilla_server.api.schemas.v1.responses import Response, ResponseFilterScope, UserResponseCreate from argilla_server.api.schemas.v1.suggestions import Suggestion, SuggestionCreate, SuggestionFilterScope from argilla_server.enums import RecordInclude, RecordSortField, SimilarityOrder, SortOrder, RecordStatus -from argilla_server.pydantic_v1 import BaseModel, Field, StrictStr, root_validator, validator, ValidationError -from argilla_server.pydantic_v1.utils import GetterDict +from pydantic import ( + BaseModel, + Field, + StrictStr, + root_validator, + validator, + ValidationError, + ConfigDict, + model_validator, + field_validator, +) +from pydantic.v1.utils import GetterDict from argilla_server.search_engine import TextQuery RECORDS_CREATE_MIN_ITEMS = 1 @@ -48,7 +56,7 @@ class RecordGetterDict(GetterDict): - def get(self, key: str, default: Any) -> Any: + def get(self, key: Any, default: Any = None) -> Any: if key == "metadata": return getattr(self._obj, "metadata_", None) @@ -71,20 +79,39 @@ class Record(BaseModel): id: UUID status: RecordStatus fields: Dict[str, Any] - metadata: Optional[Dict[str, Any]] - external_id: Optional[str] + metadata: Optional[Dict[str, Any]] = None + external_id: Optional[str] = None # TODO: move `responses` to `response` since contextualized endpoint will contains only the user response # response: Optional[Response] - responses: Optional[List[Response]] - suggestions: Optional[List[Suggestion]] - vectors: Optional[Dict[str, List[float]]] + responses: Optional[List[Response]] = None + suggestions: Optional[List[Suggestion]] = None + vectors: Optional[Dict[str, List[float]]] = None dataset_id: UUID inserted_at: datetime updated_at: datetime - class Config: - orm_mode = True - getter_dict = RecordGetterDict + model_config = ConfigDict(from_attributes=True) + + @model_validator(mode="before") + @classmethod + def validate(cls, value) -> dict: + getter = RecordGetterDict(value) + + data = {} + for field in cls.model_fields: + data[field] = getter.get(field) + + # TODO: This is a workaround to avoid sending None when the relationship is not loaded + if not value.is_relationship_loaded("responses"): + data.pop("responses") + + if not value.is_relationship_loaded("suggestions"): + data.pop("suggestions") + + if not value.is_relationship_loaded("vectors"): + data.pop("vectors") + + return data FieldValueCreate = Union[StrictStr, List[ChatFieldValue], Dict[StrictStr, Any], None] @@ -92,13 +119,16 @@ class Config: class RecordCreate(BaseModel): fields: Dict[str, FieldValueCreate] - metadata: Optional[Dict[str, Any]] - external_id: Optional[str] - responses: Optional[List[UserResponseCreate]] - suggestions: Optional[List[SuggestionCreate]] - vectors: Optional[Dict[str, List[float]]] + metadata: Optional[Dict[str, Any]] = None + external_id: Optional[str] = None + responses: Optional[List[UserResponseCreate]] = None + suggestions: Optional[List[SuggestionCreate]] = None + vectors: Optional[Dict[str, List[float]]] = None - @validator("fields", pre=True) + # This config is used to coerce numbers to strings in the fields to align with the previous behavior + model_config = ConfigDict(coerce_numbers_to_str=True) + + @field_validator("fields", mode="before") @classmethod def validate_chat_field_content(cls, fields: Any): if not isinstance(fields, dict): @@ -121,7 +151,7 @@ def validate_chat_field_content(cls, fields: Any): return fields - @validator("responses") + @field_validator("responses") @classmethod def check_user_id_is_unique( cls, responses: Optional[List[UserResponseCreate]] @@ -137,7 +167,7 @@ def check_user_id_is_unique( return responses - @validator("metadata") + @field_validator("metadata") @classmethod def prevent_nan_values(cls, metadata: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]: if metadata is None: @@ -153,7 +183,7 @@ def prevent_nan_values(cls, metadata: Optional[Dict[str, Any]]) -> Optional[Dict class RecordUpdate(UpdateSchema): metadata_: Optional[Dict[str, Any]] = Field(None, alias="metadata") suggestions: Optional[List[SuggestionCreate]] = None - vectors: Optional[Dict[str, List[float]]] + vectors: Optional[Dict[str, List[float]]] = None @property def metadata(self) -> Optional[Dict[str, Any]]: @@ -161,7 +191,7 @@ def metadata(self) -> Optional[Dict[str, Any]]: # TODO(@frascuchon): This will be properly adapted once the bulk records refactor is completed. return self.metadata_ - @validator("metadata_") + @field_validator("metadata_") @classmethod def prevent_nan_values(cls, metadata: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]: if metadata is None: @@ -179,7 +209,7 @@ class RecordUpdateWithId(RecordUpdate): class RecordUpsert(RecordCreate): - id: Optional[UUID] + id: Optional[UUID] = None fields: Optional[Dict[str, FieldValueCreate]] = None @@ -187,13 +217,14 @@ class RecordIncludeParam(BaseModel): relationships: Optional[List[RecordInclude]] = Field(None, alias="keys") vectors: Optional[List[str]] = Field(None, alias="vectors") - @root_validator(skip_on_failure=True) - def check(cls, values: Dict[str, Any]) -> Dict[str, Any]: - relationships = values.get("relationships") + @model_validator(mode="after") + @classmethod + def check(cls, instance: "RecordIncludeParam") -> "RecordIncludeParam": + relationships = instance.relationships if not relationships: - return values + return instance - vectors = values.get("vectors") + vectors = instance.vectors if vectors is not None and len(vectors) > 0 and RecordInclude.vectors in relationships: # TODO: once we have a exception handler for ValueError in v1, remove HTTPException # raise ValueError("Cannot include both 'vectors' and 'relationships' in the same request") @@ -201,7 +232,7 @@ def check(cls, values: Dict[str, Any]) -> Dict[str, Any]: "'include' query param cannot have both 'vectors' and 'vectors:vector_settings_name_1,vectors_settings_name_2,...'", ) - return values + return instance @property def with_responses(self) -> bool: @@ -242,12 +273,14 @@ class Records(BaseModel): class RecordsCreate(BaseModel): - items: List[RecordCreate] = Field(..., min_items=RECORDS_CREATE_MIN_ITEMS, max_items=RECORDS_CREATE_MAX_ITEMS) + items: List[RecordCreate] = Field(..., min_length=RECORDS_CREATE_MIN_ITEMS, max_length=RECORDS_CREATE_MAX_ITEMS) class RecordsUpdate(BaseModel): # TODO: review this definition and align to create model - items: List[RecordUpdateWithId] = Field(..., min_items=RECORDS_UPDATE_MIN_ITEMS, max_items=RECORDS_UPDATE_MAX_ITEMS) + items: List[RecordUpdateWithId] = Field( + ..., min_length=RECORDS_UPDATE_MIN_ITEMS, max_length=RECORDS_UPDATE_MAX_ITEMS + ) class MetadataParsedQueryParam: @@ -264,16 +297,17 @@ class VectorQuery(BaseModel): value: Optional[List[float]] = None order: SimilarityOrder = SimilarityOrder.most_similar - @root_validator(skip_on_failure=True) - def check_required(cls, values: dict) -> dict: + @model_validator(mode="after") + @classmethod + def check_required(cls, instance: "VectorQuery") -> "VectorQuery": """Check that either 'record_id' or 'value' is provided""" - record_id = values.get("record_id") - value = values.get("value") + record_id = instance.record_id + value = instance.value if bool(record_id) == bool(value): raise ValueError("Either 'record_id' or 'value' must be provided") - return values + return instance class Query(BaseModel): @@ -305,18 +339,21 @@ class Order(BaseModel): class TermsFilter(BaseModel): type: Literal["terms"] scope: FilterScope - values: List[str] = Field(..., min_items=TERMS_FILTER_VALUES_MIN_ITEMS, max_items=TERMS_FILTER_VALUES_MAX_ITEMS) + values: List[str] = Field(..., min_length=TERMS_FILTER_VALUES_MIN_ITEMS, max_length=TERMS_FILTER_VALUES_MAX_ITEMS) + + model_config = ConfigDict(coerce_numbers_to_str=True) class RangeFilter(BaseModel): type: Literal["range"] scope: FilterScope - ge: Optional[Union[float, str]] - le: Optional[Union[float, str]] + ge: Optional[Union[float, str]] = None + le: Optional[Union[float, str]] = None - @root_validator(skip_on_failure=True) - def check_ge_and_le(cls, values: dict) -> dict: - ge, le = values.get("ge"), values.get("le") + @model_validator(mode="after") + @classmethod + def check_ge_and_le(cls, instance: "RangeFilter") -> "RangeFilter": + ge, le = instance.ge, instance.le if ge is None and le is None: raise ValueError("At least one of 'ge' or 'le' must be provided") @@ -324,27 +361,36 @@ def check_ge_and_le(cls, values: dict) -> dict: if ge is not None and le is not None and ge > le: raise ValueError("'ge' must have a value less than or equal to 'le'") - return values + return instance Filter = Annotated[Union[TermsFilter, RangeFilter], Field(..., discriminator="type")] class Filters(BaseModel): - and_: List[Filter] = Field(None, alias="and", min_items=FILTERS_AND_MIN_ITEMS, max_items=FILTERS_AND_MAX_ITEMS) + and_: Optional[List[Filter]] = Field( + None, + alias="and", + min_length=FILTERS_AND_MIN_ITEMS, + max_length=FILTERS_AND_MAX_ITEMS, + ) class SearchRecordsQuery(BaseModel): - query: Optional[Query] - filters: Optional[Filters] + query: Optional[Query] = None + filters: Optional[Filters] = None sort: Optional[List[Order]] = Field( - None, min_items=SEARCH_RECORDS_QUERY_SORT_MIN_ITEMS, max_items=SEARCH_RECORDS_QUERY_SORT_MAX_ITEMS + None, + min_length=SEARCH_RECORDS_QUERY_SORT_MIN_ITEMS, + max_length=SEARCH_RECORDS_QUERY_SORT_MAX_ITEMS, ) + model_config = ConfigDict(arbitrary_types_allowed=True) + class SearchRecord(BaseModel): record: Record - query_score: Optional[float] + query_score: Optional[float] = None class SearchRecordsResult(BaseModel): diff --git a/argilla-server/src/argilla_server/api/schemas/v1/records_bulk.py b/argilla-server/src/argilla_server/api/schemas/v1/records_bulk.py index fe64a6ca77..c9a945df4b 100644 --- a/argilla-server/src/argilla_server/api/schemas/v1/records_bulk.py +++ b/argilla-server/src/argilla_server/api/schemas/v1/records_bulk.py @@ -15,8 +15,9 @@ from typing import List from uuid import UUID +from pydantic import BaseModel, Field, field_validator + from argilla_server.api.schemas.v1.records import Record, RecordCreate, RecordUpsert -from argilla_server.pydantic_v1 import BaseModel, Field, validator RECORDS_BULK_CREATE_MIN_ITEMS = 1 RECORDS_BULK_CREATE_MAX_ITEMS = 500 @@ -35,10 +36,10 @@ class RecordsBulkWithUpdateInfo(RecordsBulk): class RecordsBulkCreate(BaseModel): items: List[RecordCreate] = Field( - ..., min_items=RECORDS_BULK_CREATE_MIN_ITEMS, max_items=RECORDS_BULK_CREATE_MAX_ITEMS + ..., min_length=RECORDS_BULK_CREATE_MIN_ITEMS, max_length=RECORDS_BULK_CREATE_MAX_ITEMS ) - @validator("items") + @field_validator("items") @classmethod def check_unique_external_ids(cls, items: List[RecordCreate]) -> List[RecordCreate]: """Check that external_ids are unique""" @@ -51,5 +52,5 @@ def check_unique_external_ids(cls, items: List[RecordCreate]) -> List[RecordCrea class RecordsBulkUpsert(RecordsBulkCreate): items: List[RecordUpsert] = Field( - ..., min_items=RECORDS_BULK_UPSERT_MIN_ITEMS, max_items=RECORDS_BULK_UPSERT_MAX_ITEMS + ..., min_length=RECORDS_BULK_UPSERT_MIN_ITEMS, max_length=RECORDS_BULK_UPSERT_MAX_ITEMS ) diff --git a/argilla-server/src/argilla_server/api/schemas/v1/responses.py b/argilla-server/src/argilla_server/api/schemas/v1/responses.py index b3c5a48d54..5577f1a64c 100644 --- a/argilla-server/src/argilla_server/api/schemas/v1/responses.py +++ b/argilla-server/src/argilla_server/api/schemas/v1/responses.py @@ -20,7 +20,7 @@ from argilla_server.api.schemas.v1.questions import QuestionName from argilla_server.enums import ResponseStatus -from argilla_server.pydantic_v1 import BaseModel, Field, StrictInt, StrictStr, root_validator +from pydantic import BaseModel, Field, StrictInt, StrictStr, root_validator, ConfigDict, model_validator RESPONSES_BULK_CREATE_MIN_ITEMS = 1 RESPONSES_BULK_CREATE_MAX_ITEMS = 100 @@ -33,7 +33,7 @@ class RankingQuestionResponseValueItem(BaseModel): value: str - rank: Optional[int] + rank: Optional[int] = None class SpanQuestionResponseValueItem(BaseModel): @@ -41,19 +41,20 @@ class SpanQuestionResponseValueItem(BaseModel): start: int = Field(..., ge=SPAN_QUESTION_RESPONSE_VALUE_ITEM_START_GREATER_THAN_OR_EQUAL) end: int = Field(..., ge=SPAN_QUESTION_RESPONSE_VALUE_ITEM_END_GREATER_THAN_OR_EQUAL) - @root_validator(skip_on_failure=True) - def check_start_and_end(cls, values: Dict[str, Any]) -> Dict[str, Any]: - start, end = values.get("start"), values.get("end") + @model_validator(mode="after") + @classmethod + def check_start_and_end(cls, instance: "SpanQuestionResponseValueItem") -> "SpanQuestionResponseValueItem": + start, end = instance.start, instance.end if start is not None and end is not None and end <= start: raise ValueError("span question response value 'end' must have a value greater than 'start'") - return values + return instance RankingQuestionResponseValue = List[RankingQuestionResponseValueItem] SpanQuestionResponseValue = Annotated[ - List[SpanQuestionResponseValueItem], Field(..., max_items=SPAN_QUESTION_RESPONSE_VALUE_MAX_ITEMS) + List[SpanQuestionResponseValueItem], Field(..., max_length=SPAN_QUESTION_RESPONSE_VALUE_MAX_ITEMS) ] MultiLabelSelectionQuestionResponseValue = List[str] RatingQuestionResponseValue = StrictInt @@ -75,10 +76,14 @@ class ResponseValue(BaseModel): class ResponseValueCreate(BaseModel): value: ResponseValueTypes + model_config = ConfigDict(coerce_numbers_to_str=True) + class ResponseValueUpdate(BaseModel): value: ResponseValueTypes + model_config = ConfigDict(coerce_numbers_to_str=True) + ResponseValues = Dict[str, ResponseValue] ResponseValuesCreate = Dict[QuestionName, ResponseValueCreate] @@ -87,26 +92,25 @@ class ResponseValueUpdate(BaseModel): class Response(BaseModel): id: UUID - values: Optional[ResponseValues] + values: Optional[ResponseValues] = None status: ResponseStatus record_id: UUID user_id: UUID inserted_at: datetime updated_at: datetime - class Config: - orm_mode = True + model_config = ConfigDict(from_attributes=True) class ResponseCreate(BaseModel): - values: Optional[ResponseValuesCreate] + values: Optional[ResponseValuesCreate] = None status: ResponseStatus class ResponseFilterScope(BaseModel): entity: Literal["response"] - question: Optional[QuestionName] - property: Optional[Literal["status"]] + question: Optional[QuestionName] = None + property: Optional[Literal["status"]] = None class SubmittedResponseUpdate(BaseModel): @@ -115,12 +119,12 @@ class SubmittedResponseUpdate(BaseModel): class DiscardedResponseUpdate(BaseModel): - values: Optional[ResponseValuesUpdate] + values: Optional[ResponseValuesUpdate] = None status: Literal[ResponseStatus.discarded] class DraftResponseUpdate(BaseModel): - values: Optional[ResponseValuesUpdate] + values: Optional[ResponseValuesUpdate] = None status: Literal[ResponseStatus.draft] @@ -137,13 +141,13 @@ class SubmittedResponseUpsert(BaseModel): class DiscardedResponseUpsert(BaseModel): - values: Optional[ResponseValuesUpdate] + values: Optional[ResponseValuesUpdate] = None status: Literal[ResponseStatus.discarded] record_id: UUID class DraftResponseUpsert(BaseModel): - values: Optional[ResponseValuesUpdate] + values: Optional[ResponseValuesUpdate] = None status: Literal[ResponseStatus.draft] record_id: UUID @@ -157,8 +161,8 @@ class DraftResponseUpsert(BaseModel): class ResponsesBulkCreate(BaseModel): items: List[ResponseUpsert] = Field( ..., - min_items=RESPONSES_BULK_CREATE_MIN_ITEMS, - max_items=RESPONSES_BULK_CREATE_MAX_ITEMS, + min_length=RESPONSES_BULK_CREATE_MIN_ITEMS, + max_length=RESPONSES_BULK_CREATE_MAX_ITEMS, ) @@ -167,8 +171,8 @@ class ResponseBulkError(BaseModel): class ResponseBulk(BaseModel): - item: Optional[Response] - error: Optional[ResponseBulkError] + item: Optional[Response] = None + error: Optional[ResponseBulkError] = None class ResponsesBulk(BaseModel): @@ -183,7 +187,7 @@ class UserDraftResponseCreate(BaseModel): class UserDiscardedResponseCreate(BaseModel): user_id: UUID - values: Optional[ResponseValuesCreate] + values: Optional[ResponseValuesCreate] = None status: Literal[ResponseStatus.discarded] diff --git a/argilla-server/src/argilla_server/api/schemas/v1/settings.py b/argilla-server/src/argilla_server/api/schemas/v1/settings.py index a8d00562ca..a427386135 100644 --- a/argilla-server/src/argilla_server/api/schemas/v1/settings.py +++ b/argilla-server/src/argilla_server/api/schemas/v1/settings.py @@ -14,14 +14,25 @@ from typing import Optional -from argilla_server.integrations.huggingface.spaces import HuggingfaceSettings -from argilla_server.pydantic_v1 import BaseModel +from pydantic import BaseModel, ConfigDict + + +class HuggingfaceSettings(BaseModel): + space_id: Optional[str] + space_title: Optional[str] + space_subdomain: Optional[str] + space_host: Optional[str] + space_repo_name: Optional[str] + space_author_name: Optional[str] + space_persistent_storage_enabled: bool + + model_config = ConfigDict(from_attributes=True) class ArgillaSettings(BaseModel): - show_huggingface_space_persistent_storage_warning: Optional[bool] + show_huggingface_space_persistent_storage_warning: Optional[bool] = None class Settings(BaseModel): argilla: ArgillaSettings - huggingface: Optional[HuggingfaceSettings] + huggingface: Optional[HuggingfaceSettings] = None diff --git a/argilla-server/src/argilla_server/api/schemas/v1/suggestions.py b/argilla-server/src/argilla_server/api/schemas/v1/suggestions.py index 7d3726579d..274dc64b8b 100644 --- a/argilla-server/src/argilla_server/api/schemas/v1/suggestions.py +++ b/argilla-server/src/argilla_server/api/schemas/v1/suggestions.py @@ -16,6 +16,8 @@ from typing import Any, List, Literal, Optional, Union from uuid import UUID +from pydantic import BaseModel, Field, ConfigDict, field_validator + from argilla_server.api.schemas.v1.questions import QuestionName from argilla_server.api.schemas.v1.responses import ( MultiLabelSelectionQuestionResponseValue, @@ -25,9 +27,8 @@ TextAndLabelSelectionQuestionResponseValue, ) from argilla_server.enums import SuggestionType -from argilla_server.pydantic_v1 import BaseModel, Field -AGENT_REGEX = r"^(?=.*[a-zA-Z0-9])[a-zA-Z0-9-_:\.\/\s]+$" +AGENT_REGEX = r"^[a-zA-Z0-9-_:\.\/\s]*[a-zA-Z0-9][a-zA-Z0-9-_:\.\/\s]*$" AGENT_MIN_LENGTH = 1 AGENT_MAX_LENGTH = 200 @@ -58,10 +59,10 @@ class SearchSuggestionsOptions(BaseModel): class BaseSuggestion(BaseModel): question_id: UUID - type: Optional[SuggestionType] + type: Optional[SuggestionType] = None value: Any - agent: Optional[str] - score: Optional[Union[float, List[float]]] + agent: Optional[str] = None + score: Optional[Union[float, List[float]]] = None class Suggestion(BaseSuggestion): @@ -69,8 +70,7 @@ class Suggestion(BaseSuggestion): inserted_at: datetime updated_at: datetime - class Config: - orm_mode = True + model_config = ConfigDict(from_attributes=True) class Suggestions(BaseModel): @@ -87,15 +87,28 @@ class SuggestionCreate(BaseSuggestion): ] agent: Optional[str] = Field( None, - regex=AGENT_REGEX, + pattern=AGENT_REGEX, min_length=AGENT_MIN_LENGTH, max_length=AGENT_MAX_LENGTH, description="Agent used to generate the suggestion", ) - score: Optional[Union[float, List[float]]] = Field( - None, - min_items=SCORE_MIN_ITEMS, - ge=SCORE_GREATER_THAN_OR_EQUAL, - le=SCORE_LESS_THAN_OR_EQUAL, - description="The score assigned to the suggestion", - ) + score: Optional[Union[float, List[float]]] = Field(None, description="The score assigned to the suggestion") + + @field_validator("score") + @classmethod + def validate_score(cls, v): + if v is None: + return v + + if isinstance(v, list): + if len(v) < SCORE_MIN_ITEMS: + raise ValueError("score must have at least one item") + scores = v + else: + scores = [v] + + for score in scores: + if not (SCORE_GREATER_THAN_OR_EQUAL <= score <= SCORE_LESS_THAN_OR_EQUAL): + raise ValueError("score must be between 0 and 1") + + return v diff --git a/argilla-server/src/argilla_server/api/schemas/v1/users.py b/argilla-server/src/argilla_server/api/schemas/v1/users.py index 1480001552..f45dbda70c 100644 --- a/argilla-server/src/argilla_server/api/schemas/v1/users.py +++ b/argilla-server/src/argilla_server/api/schemas/v1/users.py @@ -16,8 +16,9 @@ from typing import List, Optional from uuid import UUID +from pydantic import BaseModel, Field, constr, ConfigDict + from argilla_server.enums import UserRole -from argilla_server.pydantic_v1 import BaseModel, Field, constr USER_PASSWORD_MIN_LENGTH = 8 USER_PASSWORD_MAX_LENGTH = 100 @@ -26,7 +27,7 @@ class User(BaseModel): id: UUID first_name: str - last_name: Optional[str] + last_name: Optional[str] = None username: str role: UserRole # TODO: We need to move `api_key` outside of this schema and think about a more @@ -35,16 +36,15 @@ class User(BaseModel): inserted_at: datetime updated_at: datetime - class Config: - orm_mode = True + model_config = ConfigDict(from_attributes=True) class UserCreate(BaseModel): - first_name: constr(min_length=1, strip_whitespace=True) - last_name: Optional[constr(min_length=1, strip_whitespace=True)] username: str = Field(..., min_length=1) - role: Optional[UserRole] password: str = Field(min_length=USER_PASSWORD_MIN_LENGTH, max_length=USER_PASSWORD_MAX_LENGTH) + first_name: constr(min_length=1, strip_whitespace=True) + last_name: Optional[constr(min_length=1, strip_whitespace=True)] = None + role: Optional[UserRole] = None class Users(BaseModel): diff --git a/argilla-server/src/argilla_server/api/schemas/v1/vector_settings.py b/argilla-server/src/argilla_server/api/schemas/v1/vector_settings.py index 4715f2b2ab..d7fab06f56 100644 --- a/argilla-server/src/argilla_server/api/schemas/v1/vector_settings.py +++ b/argilla-server/src/argilla_server/api/schemas/v1/vector_settings.py @@ -18,7 +18,7 @@ from argilla_server.api.schemas.v1.commons import UpdateSchema from argilla_server.errors.future import UnprocessableEntityError -from argilla_server.pydantic_v1 import BaseModel, Field, PositiveInt, constr +from pydantic import BaseModel, Field, PositiveInt, constr, ConfigDict VECTOR_SETTINGS_CREATE_NAME_MIN_LENGTH = 1 VECTOR_SETTINGS_CREATE_NAME_MAX_LENGTH = 200 @@ -45,8 +45,7 @@ class VectorSettings(BaseModel): inserted_at: datetime updated_at: datetime - class Config: - orm_mode = True + model_config = ConfigDict(from_attributes=True) def check_vector(self, value: List[float]) -> None: num_elements = len(value) diff --git a/argilla-server/src/argilla_server/api/schemas/v1/vectors.py b/argilla-server/src/argilla_server/api/schemas/v1/vectors.py index 34569742c3..2c2d866efd 100644 --- a/argilla-server/src/argilla_server/api/schemas/v1/vectors.py +++ b/argilla-server/src/argilla_server/api/schemas/v1/vectors.py @@ -15,7 +15,7 @@ from typing import List from uuid import UUID -from argilla_server.pydantic_v1 import BaseModel +from pydantic import BaseModel class Vector(BaseModel): diff --git a/argilla-server/src/argilla_server/api/schemas/v1/webhooks.py b/argilla-server/src/argilla_server/api/schemas/v1/webhooks.py index a093b7f26a..cc9171ef73 100644 --- a/argilla-server/src/argilla_server/api/schemas/v1/webhooks.py +++ b/argilla-server/src/argilla_server/api/schemas/v1/webhooks.py @@ -12,13 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +from uuid import UUID from datetime import datetime from typing import List, Optional -from uuid import UUID + +from pydantic import BaseModel, Field, HttpUrl, ConfigDict, field_validator, field_serializer from argilla_server.webhooks.v1.enums import WebhookEvent from argilla_server.api.schemas.v1.commons import UpdateSchema -from argilla_server.pydantic_v1 import BaseModel, Field, HttpUrl + WEBHOOK_EVENTS_MIN_ITEMS = 1 WEBHOOK_DESCRIPTION_MIN_LENGTH = 1 @@ -31,12 +33,11 @@ class Webhook(BaseModel): secret: str events: List[WebhookEvent] enabled: bool - description: Optional[str] + description: Optional[str] = None inserted_at: datetime updated_at: datetime - class Config: - orm_mode = True + model_config = ConfigDict(from_attributes=True) class Webhooks(BaseModel): @@ -45,26 +46,52 @@ class Webhooks(BaseModel): class WebhookCreate(BaseModel): url: HttpUrl - events: List[WebhookEvent] = Field( - min_items=WEBHOOK_EVENTS_MIN_ITEMS, - unique_items=True, - ) + events: List[WebhookEvent] = Field(min_length=WEBHOOK_EVENTS_MIN_ITEMS) description: Optional[str] = Field( + None, min_length=WEBHOOK_DESCRIPTION_MIN_LENGTH, max_length=WEBHOOK_DESCRIPTION_MAX_LENGTH, ) + @field_validator("events") + @classmethod + def events_must_be_unique(cls, events: List[WebhookEvent]): + if len(set(events)) != len(events): + raise ValueError("Events must be unique") + + return events + + @field_serializer("url") + def serialize_url(self, url: HttpUrl): + return str(url) + class WebhookUpdate(UpdateSchema): - url: Optional[HttpUrl] - events: Optional[List[WebhookEvent]] = Field( - min_items=WEBHOOK_EVENTS_MIN_ITEMS, - unique_items=True, - ) - enabled: Optional[bool] + url: Optional[HttpUrl] = None + events: Optional[List[WebhookEvent]] = Field(None, min_length=WEBHOOK_EVENTS_MIN_ITEMS) + enabled: Optional[bool] = None description: Optional[str] = Field( + None, min_length=WEBHOOK_DESCRIPTION_MIN_LENGTH, max_length=WEBHOOK_DESCRIPTION_MAX_LENGTH, ) __non_nullable_fields__ = {"url", "events", "enabled"} + + @field_validator("events") + @classmethod + def events_must_be_unique(cls, events: Optional[List[WebhookEvent]]): + if events is None: + return + + if len(set(events)) != len(events): + raise ValueError("Events must be unique") + + return events + + @field_serializer("url") + def serialize_url(self, url: Optional[HttpUrl]): + if url is None: + return + + return str(url) diff --git a/argilla-server/src/argilla_server/api/schemas/v1/workspaces.py b/argilla-server/src/argilla_server/api/schemas/v1/workspaces.py index 1b6215123c..5fba689f95 100644 --- a/argilla-server/src/argilla_server/api/schemas/v1/workspaces.py +++ b/argilla-server/src/argilla_server/api/schemas/v1/workspaces.py @@ -16,7 +16,7 @@ from typing import List from uuid import UUID -from argilla_server.pydantic_v1 import BaseModel, Field +from pydantic import BaseModel, Field, ConfigDict class Workspace(BaseModel): @@ -25,8 +25,7 @@ class Workspace(BaseModel): inserted_at: datetime updated_at: datetime - class Config: - orm_mode = True + model_config = ConfigDict(from_attributes=True) class WorkspaceCreate(BaseModel): diff --git a/argilla-server/src/argilla_server/bulk/records_bulk.py b/argilla-server/src/argilla_server/bulk/records_bulk.py index 328f7c53c7..535e5be05b 100644 --- a/argilla-server/src/argilla_server/bulk/records_bulk.py +++ b/argilla-server/src/argilla_server/bulk/records_bulk.py @@ -93,7 +93,7 @@ async def _upsert_records_suggestions( upsert_many_suggestions = [] for idx, (record, suggestions) in enumerate(records_and_suggestions): for suggestion_create in suggestions or []: - upsert_many_suggestions.append(dict(**suggestion_create.dict(), record_id=record.id)) + upsert_many_suggestions.append(dict(**suggestion_create.model_dump(), record_id=record.id)) if not upsert_many_suggestions: return [] @@ -111,7 +111,7 @@ async def _upsert_records_responses( upsert_many_responses = [] for idx, (record, responses) in enumerate(records_and_responses): for response_create in responses or []: - upsert_many_responses.append(dict(**response_create.dict(), record_id=record.id)) + upsert_many_responses.append(dict(**response_create.model_dump(), record_id=record.id)) if not upsert_many_responses: return [] @@ -146,7 +146,7 @@ async def _upsert_records_vectors( @classmethod def _metadata_is_set(cls, record_create: RecordCreate) -> bool: - return "metadata" in record_create.__fields_set__ + return "metadata" in record_create.model_fields_set class UpsertRecordsBulk(CreateRecordsBulk): diff --git a/argilla-server/src/argilla_server/cli/database/users/create.py b/argilla-server/src/argilla_server/cli/database/users/create.py index 705da276e6..2757416143 100644 --- a/argilla-server/src/argilla_server/cli/database/users/create.py +++ b/argilla-server/src/argilla_server/cli/database/users/create.py @@ -21,7 +21,7 @@ from argilla_server.contexts import accounts from argilla_server.database import AsyncSessionLocal from argilla_server.models import User, UserRole -from argilla_server.pydantic_v1 import constr +from pydantic import constr from .utils import get_or_new_workspace diff --git a/argilla-server/src/argilla_server/cli/database/users/create_default.py b/argilla-server/src/argilla_server/cli/database/users/create_default.py index 37321775f9..27786a042e 100644 --- a/argilla-server/src/argilla_server/cli/database/users/create_default.py +++ b/argilla-server/src/argilla_server/cli/database/users/create_default.py @@ -18,7 +18,7 @@ from argilla_server.constants import DEFAULT_API_KEY, DEFAULT_PASSWORD, DEFAULT_USERNAME from argilla_server.contexts import accounts from argilla_server.database import AsyncSessionLocal -from argilla_server.models import User, UserRole, Workspace +from argilla_server.models import User, UserRole from .utils import get_or_new_workspace diff --git a/argilla-server/src/argilla_server/cli/database/users/migrate.py b/argilla-server/src/argilla_server/cli/database/users/migrate.py index beb6c32d84..3702d8ebb0 100644 --- a/argilla-server/src/argilla_server/cli/database/users/migrate.py +++ b/argilla-server/src/argilla_server/cli/database/users/migrate.py @@ -20,7 +20,7 @@ from argilla_server.database import AsyncSessionLocal from argilla_server.models import User, UserRole -from argilla_server.pydantic_v1 import BaseModel, Field, constr +from pydantic import BaseModel, Field, constr from .utils import get_or_new_workspace diff --git a/argilla-server/src/argilla_server/contexts/datasets.py b/argilla-server/src/argilla_server/contexts/datasets.py index af59d0736a..d79a49fc37 100644 --- a/argilla-server/src/argilla_server/contexts/datasets.py +++ b/argilla-server/src/argilla_server/contexts/datasets.py @@ -224,7 +224,7 @@ async def create_field(db: AsyncSession, dataset: Dataset, field_create: FieldCr name=field_create.name, title=field_create.title, required=field_create.required, - settings=field_create.settings.dict(), + settings=field_create.settings.model_dump(), dataset_id=dataset.id, ) @@ -235,7 +235,7 @@ async def update_field(db: AsyncSession, field: Field, field_update: "FieldUpdat f"Field type cannot be changed. Expected '{field.settings['type']}' but got '{field_update.settings.type}'" ) - params = field_update.dict(exclude_unset=True) + params = field_update.model_dump(exclude_unset=True) return await field.update(db, **params) @@ -266,7 +266,7 @@ async def create_metadata_property( db, name=metadata_property_create.name, title=metadata_property_create.title, - settings=metadata_property_create.settings.dict(), + settings=metadata_property_create.settings.model_dump(), allowed_roles=_allowed_roles_for_metadata_property_create(metadata_property_create), dataset_id=dataset.id, ) @@ -296,7 +296,7 @@ async def count_vectors_settings_by_dataset_id(db: AsyncSession, dataset_id: UUI async def update_vector_settings( db: AsyncSession, vector_settings: VectorSettings, vector_settings_update: "VectorSettingsUpdate" ) -> VectorSettings: - params = vector_settings_update.dict(exclude_unset=True) + params = vector_settings_update.model_dump(exclude_unset=True) return await vector_settings.update(db, **params) @@ -518,7 +518,7 @@ async def _validate_vector( f"vector with name={str(vector_name)} does not exist for dataset_id={str(dataset_id)}" ) - vector_settings = VectorSettingsSchema.from_orm(vector_settings) + vector_settings = VectorSettingsSchema.model_validate(vector_settings) vectors_settings[vector_name] = vector_settings vector_settings.check_vector(vector_value) @@ -685,7 +685,7 @@ async def _build_record_update( "vector_settings": {}, } - params = record_update.dict(exclude_unset=True) + params = record_update.model_dump(exclude_unset=True) needs_search_engine_update = False suggestions = None vectors = [] @@ -773,7 +773,7 @@ async def update_record( db: AsyncSession, search_engine: "SearchEngine", record: Record, record_update: "RecordUpdate" ) -> Record: params, suggestions, vectors, needs_search_engine_update, _ = await _build_record_update( - db, record, RecordUpdateWithId(id=record.id, **record_update.dict(by_alias=True, exclude_unset=True)) + db, record, RecordUpdateWithId(id=record.id, **record_update.model_dump(by_alias=True, exclude_unset=True)) ) # Remove existing suggestions @@ -960,7 +960,7 @@ async def upsert_suggestion( suggestion = await Suggestion.upsert( db, - schema=SuggestionCreateWithRecordId(record_id=record.id, **suggestion_create.dict()), + schema=SuggestionCreateWithRecordId(record_id=record.id, **suggestion_create.model_dump()), constraints=[Suggestion.record_id, Suggestion.question_id], ) diff --git a/argilla-server/src/argilla_server/contexts/hub.py b/argilla-server/src/argilla_server/contexts/hub.py index 96faa799bd..2085f1621e 100644 --- a/argilla-server/src/argilla_server/contexts/hub.py +++ b/argilla-server/src/argilla_server/contexts/hub.py @@ -15,7 +15,7 @@ import io import base64 -from typing import Union, Any +from typing import Any from typing_extensions import Self from PIL import Image diff --git a/argilla-server/src/argilla_server/contexts/questions.py b/argilla-server/src/argilla_server/contexts/questions.py index 917f5d47a4..db627c7390 100644 --- a/argilla-server/src/argilla_server/contexts/questions.py +++ b/argilla-server/src/argilla_server/contexts/questions.py @@ -42,7 +42,7 @@ async def create_question(db: AsyncSession, dataset: Dataset, question_create: Q title=question_create.title, description=question_create.description, required=question_create.required, - settings=question_create.settings.dict(), + settings=question_create.settings.model_dump(), dataset_id=dataset.id, ) @@ -50,7 +50,7 @@ async def create_question(db: AsyncSession, dataset: Dataset, question_create: Q async def update_question(db: AsyncSession, question: Question, question_update: QuestionUpdate) -> Question: QuestionUpdateValidator.validate(question_update, question) - params = question_update.dict(exclude_unset=True) + params = question_update.model_dump(exclude_unset=True) return await question.update(db, **params) diff --git a/argilla-server/src/argilla_server/contexts/settings.py b/argilla-server/src/argilla_server/contexts/settings.py index c7ca9e4dec..c7bfb542a7 100644 --- a/argilla-server/src/argilla_server/contexts/settings.py +++ b/argilla-server/src/argilla_server/contexts/settings.py @@ -14,8 +14,8 @@ from typing import Union -from argilla_server.api.schemas.v1.settings import ArgillaSettings, Settings -from argilla_server.integrations.huggingface.spaces import HUGGINGFACE_SETTINGS, HuggingfaceSettings +from argilla_server.api.schemas.v1.settings import ArgillaSettings, Settings, HuggingfaceSettings +from argilla_server.integrations.huggingface.spaces import HUGGINGFACE_SETTINGS from argilla_server.settings import settings @@ -39,4 +39,4 @@ def _get_argilla_settings() -> ArgillaSettings: def _get_huggingface_settings() -> Union[HuggingfaceSettings, None]: if HUGGINGFACE_SETTINGS.is_running_on_huggingface: - return HUGGINGFACE_SETTINGS + return HuggingfaceSettings.model_validate(HUGGINGFACE_SETTINGS) diff --git a/argilla-server/src/argilla_server/database.py b/argilla-server/src/argilla_server/database.py index a2dceb166e..d8ac4c3475 100644 --- a/argilla-server/src/argilla_server/database.py +++ b/argilla-server/src/argilla_server/database.py @@ -13,8 +13,7 @@ # limitations under the License. import os from collections import OrderedDict -from sqlite3 import Connection as SQLite3Connection -from typing import TYPE_CHECKING, AsyncGenerator, Optional +from typing import AsyncGenerator, Optional from sqlalchemy import event, make_url from sqlalchemy.engine import Engine diff --git a/argilla-server/src/argilla_server/errors/base_errors.py b/argilla-server/src/argilla_server/errors/base_errors.py index 082371ee31..f5567ea742 100644 --- a/argilla-server/src/argilla_server/errors/base_errors.py +++ b/argilla-server/src/argilla_server/errors/base_errors.py @@ -14,11 +14,10 @@ from typing import Any, Optional, Type, Union +import pydantic from fastapi.exceptions import RequestValidationError from starlette import status -import argilla_server.pydantic_v1 as pydantic - class ServerError(Exception): HTTP_STATUS: int = status.HTTP_500_INTERNAL_SERVER_ERROR @@ -62,7 +61,19 @@ class ValidationError(ServerError): HTTP_STATUS = status.HTTP_422_UNPROCESSABLE_ENTITY def __init__(self, error: Union[pydantic.ValidationError, RequestValidationError]): - self.errors = error.errors() + # Removing ctx and input from errors since they are new values. + errors = [ + { + "type": error_dict["type"], + "loc": error_dict["loc"], + "msg": error_dict["msg"], + # "ctx": error_dict.get("ctx"), + # "input": error_dict.get("input"), + } + for error_dict in error.errors() + ] + + self.errors = errors class GenericServerError(ServerError): diff --git a/argilla-server/src/argilla_server/errors/error_handler.py b/argilla-server/src/argilla_server/errors/error_handler.py index a72807d4c8..6541b7493c 100644 --- a/argilla-server/src/argilla_server/errors/error_handler.py +++ b/argilla-server/src/argilla_server/errors/error_handler.py @@ -17,6 +17,7 @@ from fastapi import FastAPI, HTTPException, Request from fastapi.exception_handlers import http_exception_handler from fastapi.exceptions import RequestValidationError +from pydantic import BaseModel, field_serializer from argilla_server.api.errors.v1.exception_handlers import set_request_error from argilla_server.errors.base_errors import ( @@ -33,20 +34,39 @@ ValidationError, WrongTaskError, ) -from argilla_server.pydantic_v1 import BaseModel class ErrorDetail(BaseModel): code: str params: Dict[str, Any] + # TODO: Newer version does not serialize some exceptions such as ValueError + @field_serializer("params") + def serialize_params(self, value): + return self._parse_to_serializable(value) + + @classmethod + def _parse_to_serializable(cls, value: Any) -> Any: + if isinstance(value, ValueError): + return str(value) + + if isinstance(value, dict): + for k in value: + value[k] = cls._parse_to_serializable(value[k]) + return value + + if isinstance(value, list): + return [cls._parse_to_serializable(item) for item in value] + + return value + # TODO(@frascuchon): Review class Naming class ServerHTTPException(HTTPException): def __init__(self, error: ServerError): super().__init__( status_code=error.HTTP_STATUS, - detail=ErrorDetail(code=error.code, params=error.arguments).dict(), + detail=ErrorDetail(code=error.code, params=error.arguments).model_dump(), ) diff --git a/argilla-server/src/argilla_server/integrations/huggingface/spaces.py b/argilla-server/src/argilla_server/integrations/huggingface/spaces.py index 04140b1438..8cb4a0d3a0 100644 --- a/argilla-server/src/argilla_server/integrations/huggingface/spaces.py +++ b/argilla-server/src/argilla_server/integrations/huggingface/spaces.py @@ -11,20 +11,22 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from typing import Optional -from argilla_server.pydantic_v1 import BaseSettings, Field +from pydantic import Field +from pydantic_settings import BaseSettings class HuggingfaceSettings(BaseSettings): - space_id: str = Field(None, env="SPACE_ID") - space_title: str = Field(None, env="SPACE_TITLE") - space_subdomain: str = Field(None, env="SPACE_SUBDOMAIN") - space_host: str = Field(None, env="SPACE_HOST") - space_repo_name: str = Field(None, env="SPACE_REPO_NAME") - space_author_name: str = Field(None, env="SPACE_AUTHOR_NAME") + space_id: Optional[str] = Field(None, alias="SPACE_ID") + space_title: Optional[str] = Field(None, alias="SPACE_TITLE") + space_subdomain: Optional[str] = Field(None, alias="SPACE_SUBDOMAIN") + space_host: Optional[str] = Field(None, alias="SPACE_HOST") + space_repo_name: Optional[str] = Field(None, alias="SPACE_REPO_NAME") + space_author_name: Optional[str] = Field(None, alias="SPACE_AUTHOR_NAME") # NOTE: Hugging Face has a typo in their environment variable name, # using PERSISTANT instead of PERSISTENT. We will use the correct spelling in our code. - space_persistent_storage_enabled: bool = Field(False, env="PERSISTANT_STORAGE_ENABLED") + space_persistent_storage_enabled: bool = Field(False, alias="PERSISTANT_STORAGE_ENABLED") @property def is_running_on_huggingface(self) -> bool: diff --git a/argilla-server/src/argilla_server/jobs/dataset_jobs.py b/argilla-server/src/argilla_server/jobs/dataset_jobs.py index a34c92e8ae..bbf6b622db 100644 --- a/argilla-server/src/argilla_server/jobs/dataset_jobs.py +++ b/argilla-server/src/argilla_server/jobs/dataset_jobs.py @@ -17,7 +17,7 @@ from rq import Retry from rq.decorators import job -from sqlalchemy import func, select +from sqlalchemy import select from argilla_server.models import Record, Response from argilla_server.database import AsyncSessionLocal diff --git a/argilla-server/src/argilla_server/jobs/hub_jobs.py b/argilla-server/src/argilla_server/jobs/hub_jobs.py index 3c3611cfdd..bfc74b503a 100644 --- a/argilla-server/src/argilla_server/jobs/hub_jobs.py +++ b/argilla-server/src/argilla_server/jobs/hub_jobs.py @@ -43,7 +43,7 @@ async def import_dataset_from_hub_job(name: str, subset: str, split: str, datase ) async with SearchEngine.get_by_name(settings.search_engine) as search_engine: - parsed_mapping = HubDatasetMapping.parse_obj(mapping) + parsed_mapping = HubDatasetMapping.model_validate(mapping) await ( HubDataset(name, subset, split, parsed_mapping) diff --git a/argilla-server/src/argilla_server/models/database.py b/argilla-server/src/argilla_server/models/database.py index 5b298ae1e6..cf7e4117ae 100644 --- a/argilla-server/src/argilla_server/models/database.py +++ b/argilla-server/src/argilla_server/models/database.py @@ -25,7 +25,6 @@ String, Text, UniqueConstraint, - and_, sql, ) from sqlalchemy.engine.default import DefaultExecutionContext @@ -47,7 +46,7 @@ from argilla_server.models.base import DatabaseModel from argilla_server.models.metadata_properties import MetadataPropertySettings from argilla_server.models.mixins import inserted_at_current_value -from argilla_server.pydantic_v1 import parse_obj_as +from pydantic import TypeAdapter # Include here the data model ref to be accessible for automatic alembic migration scripts __all__ = [ @@ -288,7 +287,7 @@ class Question(DatabaseModel): @property def parsed_settings(self) -> QuestionSettings: - return parse_obj_as(QuestionSettings, self.settings) + return TypeAdapter(QuestionSettings).validate_python(self.settings) @property def is_text(self) -> bool: @@ -337,7 +336,7 @@ def type(self) -> MetadataPropertyType: @property def parsed_settings(self) -> MetadataPropertySettings: - return parse_obj_as(MetadataPropertySettings, self.settings) + return TypeAdapter(MetadataPropertySettings).validate_python(self.settings) @property def visible_for_annotators(self) -> bool: diff --git a/argilla-server/src/argilla_server/models/metadata_properties.py b/argilla-server/src/argilla_server/models/metadata_properties.py index 550e00a0b6..5d6fed62f0 100644 --- a/argilla-server/src/argilla_server/models/metadata_properties.py +++ b/argilla-server/src/argilla_server/models/metadata_properties.py @@ -17,8 +17,7 @@ from argilla_server.enums import MetadataPropertyType from argilla_server.errors.future import UnprocessableEntityError -from argilla_server.pydantic_v1 import BaseModel, Field -from argilla_server.pydantic_v1.generics import GenericModel +from pydantic import BaseModel, Field __all__ = [ "MetadataPropertySettings", @@ -59,7 +58,7 @@ def check_metadata(self, value: Any) -> None: NT = TypeVar("NT", int, float) -class NumericMetadataPropertySettings(BaseMetadataPropertySettings, GenericModel, Generic[NT]): +class NumericMetadataPropertySettings(BaseMetadataPropertySettings, BaseModel, Generic[NT]): min: Optional[NT] = None max: Optional[NT] = None diff --git a/argilla-server/src/argilla_server/models/mixins.py b/argilla-server/src/argilla_server/models/mixins.py index a187e2b42a..3f0bde27f4 100644 --- a/argilla-server/src/argilla_server/models/mixins.py +++ b/argilla-server/src/argilla_server/models/mixins.py @@ -27,7 +27,7 @@ from typing_extensions import Self from argilla_server.errors.future import NotFoundError -from argilla_server.pydantic_v1 import BaseModel +from pydantic import BaseModel if TYPE_CHECKING: from sqlalchemy.orm import InstrumentedAttribute @@ -45,7 +45,7 @@ def _schema_or_kwargs(schema: Union[Schema, None], values: Dict[str, Any]) -> Dict[str, Any]: if schema: - return schema.dict() + return schema.model_dump() return values @@ -142,7 +142,7 @@ async def upsert_many( if len(objects) == 0: raise ValueError("Cannot upsert empty list of objects") - values = [obj if isinstance(obj, dict) else obj.dict() for obj in objects] + values = [obj if isinstance(obj, dict) else obj.model_dump() for obj in objects] # Try to insert all objects insert_stmt = _INSERT_FUNC[db.bind.dialect.name](cls).values(values) diff --git a/argilla-server/src/argilla_server/pydantic_v1/__init__.py b/argilla-server/src/argilla_server/pydantic_v1/__init__.py deleted file mode 100644 index d295b7d39c..0000000000 --- a/argilla-server/src/argilla_server/pydantic_v1/__init__.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright 2021-present, the Recognai S.L. team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from importlib import metadata - -try: - from pydantic.v1 import * # noqa: F403 -except ImportError: - from pydantic import * # noqa: F403 - -try: - PYDANTIC_MAJOR_VERSION: int = int(metadata.version("pydantic").split(".")[0]) -except metadata.PackageNotFoundError: - PYDANTIC_MAJOR_VERSION = 0 diff --git a/argilla-server/src/argilla_server/pydantic_v1/errors.py b/argilla-server/src/argilla_server/pydantic_v1/errors.py deleted file mode 100644 index 08d10a155b..0000000000 --- a/argilla-server/src/argilla_server/pydantic_v1/errors.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright 2021-present, the Recognai S.L. team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -try: - from pydantic.v1.errors import * # noqa: F403 -except ImportError: - from pydantic.errors import * # noqa: F403 diff --git a/argilla-server/src/argilla_server/pydantic_v1/generics.py b/argilla-server/src/argilla_server/pydantic_v1/generics.py deleted file mode 100644 index f8e3043c1f..0000000000 --- a/argilla-server/src/argilla_server/pydantic_v1/generics.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright 2021-present, the Recognai S.L. team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -try: - from pydantic.v1.generics import * # noqa: F403 -except ImportError: - from pydantic.generics import * # noqa: F403 diff --git a/argilla-server/src/argilla_server/pydantic_v1/utils.py b/argilla-server/src/argilla_server/pydantic_v1/utils.py deleted file mode 100644 index a821b7e5f5..0000000000 --- a/argilla-server/src/argilla_server/pydantic_v1/utils.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright 2021-present, the Recognai S.L. team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -try: - from pydantic.v1.utils import * # noqa: F403 -except ImportError: - from pydantic.utils import * # noqa: F403 diff --git a/argilla-server/src/argilla_server/search_engine/base.py b/argilla-server/src/argilla_server/search_engine/base.py index 4914cce3a3..5ced12fb7b 100644 --- a/argilla-server/src/argilla_server/search_engine/base.py +++ b/argilla-server/src/argilla_server/search_engine/base.py @@ -34,9 +34,8 @@ SimilarityOrder, SortOrder, ) -from argilla_server.models import Dataset, MetadataProperty, Record, Response, Suggestion, User, Vector, VectorSettings -from argilla_server.pydantic_v1 import BaseModel, Field -from argilla_server.pydantic_v1.generics import GenericModel +from argilla_server.models import Dataset, MetadataProperty, Record, Response, Suggestion, User, VectorSettings +from pydantic import BaseModel, Field, ConfigDict __all__ = [ "SearchEngine", @@ -119,13 +118,14 @@ class TextQuery(BaseModel): q: str field: Optional[str] = None + model_config = ConfigDict(coerce_numbers_to_str=True) + class UserResponseStatusFilter(BaseModel): statuses: List[ResponseStatusFilter] user: Optional[User] = None - class Config: - arbitrary_types_allowed = True + model_config = ConfigDict(arbitrary_types_allowed=True) @property def response_statuses(self) -> List[ResponseStatus]: @@ -134,7 +134,7 @@ def response_statuses(self) -> List[ResponseStatus]: class SearchResponseItem(BaseModel): record_id: UUID - score: Optional[float] + score: Optional[float] = None class SearchResponses(BaseModel): @@ -163,9 +163,9 @@ class TermCount(BaseModel): NT = TypeVar("NT", int, float) -class NumericMetadataMetrics(GenericModel, Generic[NT]): - min: Optional[NT] - max: Optional[NT] +class NumericMetadataMetrics(BaseModel, Generic[NT]): + min: Optional[NT] = None + max: Optional[NT] = None class IntegerMetadataMetrics(NumericMetadataMetrics[int]): diff --git a/argilla-server/src/argilla_server/security/settings.py b/argilla-server/src/argilla_server/security/settings.py index 2715b9b3b5..3aab0eed8a 100644 --- a/argilla-server/src/argilla_server/security/settings.py +++ b/argilla-server/src/argilla_server/security/settings.py @@ -16,7 +16,8 @@ from typing import TYPE_CHECKING, Optional from uuid import uuid4 -from argilla_server.pydantic_v1 import BaseSettings, PrivateAttr +from pydantic import PrivateAttr +from pydantic_settings import BaseSettings if TYPE_CHECKING: from argilla_server.security.authentication.oauth2 import OAuth2Settings diff --git a/argilla-server/src/argilla_server/settings.py b/argilla-server/src/argilla_server/settings.py index a734ca433b..52415cd6f3 100644 --- a/argilla-server/src/argilla_server/settings.py +++ b/argilla-server/src/argilla_server/settings.py @@ -24,6 +24,10 @@ from pathlib import Path from typing import Dict, List, Optional +from pydantic import Field, field_validator, model_validator +from pydantic_core.core_schema import ValidationInfo +from pydantic_settings import BaseSettings + from argilla_server.constants import ( DATABASE_POSTGRESQL, DATABASE_SQLITE, @@ -35,7 +39,6 @@ SEARCH_ENGINE_ELASTICSEARCH, SEARCH_ENGINE_OPENSEARCH, ) -from argilla_server.pydantic_v1 import BaseSettings, Field, root_validator, validator class Settings(BaseSettings): @@ -73,10 +76,22 @@ class Settings(BaseSettings): __DATASETS_INDEX_NAME__ = "ar.datasets" __DATASETS_RECORDS_INDEX_NAME__ = "ar.dataset.{}" - home_path: Optional[str] = Field(description="The home path where argilla related files will be stored") - base_url: Optional[str] = Field(description="The default base url where server will be deployed") + home_path: Optional[str] = Field( + None, + validate_default=True, + description="The home path where argilla related files will be stored", + ) + base_url: Optional[str] = Field( + None, + validate_default=True, + description="The default base url where server will be deployed", + ) - database_url: Optional[str] = Field(description="The database url that argilla will use as data store") + database_url: Optional[str] = Field( + None, + validate_default=True, + description="The database url that argilla will use as data store", + ) # https://docs.sqlalchemy.org/en/20/core/engines.html#sqlalchemy.create_engine.params.pool_size database_postgresql_pool_size: Optional[int] = Field( default=DEFAULT_DATABASE_POSTGRESQL_POOL_SIZE, @@ -129,11 +144,14 @@ class Settings(BaseSettings): # Hugging Face telemetry enable_telemetry: bool = Field( - default=True, description="The telemetry configuration for Hugging Face hub telemetry. " + default=True, + validate_default=True, + description="The telemetry configuration for Hugging Face hub telemetry. ", ) # See also the telemetry.py module - @validator("enable_telemetry", pre=True, always=True) + @field_validator("enable_telemetry", mode="before") + @classmethod def set_enable_telemetry(cls, enable_telemetry: bool) -> bool: if os.getenv("HF_HUB_DISABLE_TELEMETRY") == "1" or os.getenv("HF_HUB_OFFLINE") == "1": enable_telemetry = False @@ -146,11 +164,13 @@ def set_enable_telemetry(cls, enable_telemetry: bool) -> bool: return enable_telemetry - @validator("home_path", always=True) + @field_validator("home_path", mode="before") + @classmethod def set_home_path_default(cls, home_path: str): return home_path or os.path.join(Path.home(), ".argilla") - @validator("base_url", always=True) + @field_validator("base_url") + @classmethod def normalize_base_url(cls, base_url: str): if not base_url: base_url = "/" @@ -161,10 +181,11 @@ def normalize_base_url(cls, base_url: str): return base_url - @validator("database_url", pre=True, always=True) - def set_database_url(cls, database_url: str, values: dict) -> str: + @field_validator("database_url", mode="before") + @classmethod + def set_database_url(cls, database_url: str, info: ValidationInfo) -> str: if not database_url: - home_path = values.get("home_path") + home_path = info.data.get("home_path") sqlite_file = os.path.join(home_path, "argilla.db") return f"sqlite+aiosqlite:///{sqlite_file}?check_same_thread=False" @@ -190,11 +211,12 @@ def set_database_url(cls, database_url: str, values: dict) -> str: return database_url - @root_validator(skip_on_failure=True) - def create_home_path(cls, values): - Path(values["home_path"]).mkdir(parents=True, exist_ok=True) + @model_validator(mode="after") + @classmethod + def create_home_path(cls, instance: "Settings") -> "Settings": + Path(instance.home_path).mkdir(parents=True, exist_ok=True) - return values + return instance @property def database_engine_args(self) -> Dict: diff --git a/argilla-server/src/argilla_server/use_cases/responses/upsert_responses_in_bulk.py b/argilla-server/src/argilla_server/use_cases/responses/upsert_responses_in_bulk.py index 84f513d5b7..5cf25afac7 100644 --- a/argilla-server/src/argilla_server/use_cases/responses/upsert_responses_in_bulk.py +++ b/argilla-server/src/argilla_server/use_cases/responses/upsert_responses_in_bulk.py @@ -49,7 +49,7 @@ async def execute(self, responses: List[ResponseUpsert], user: User) -> List[Res except Exception as err: responses_bulk_items.append(ResponseBulk(item=None, error=ResponseBulkError(detail=str(err)))) else: - responses_bulk_items.append(ResponseBulk(item=Response.from_orm(response), error=None)) + responses_bulk_items.append(ResponseBulk(item=Response.model_validate(response), error=None)) return responses_bulk_items diff --git a/argilla-server/src/argilla_server/utils.py b/argilla-server/src/argilla_server/utils.py index 7b806817b4..432835e173 100644 --- a/argilla-server/src/argilla_server/utils.py +++ b/argilla-server/src/argilla_server/utils.py @@ -19,7 +19,7 @@ from fastapi import HTTPException, Query -from argilla_server.pydantic_v1 import BaseModel +from pydantic import BaseModel # TODO: remove this function at some point @@ -82,7 +82,7 @@ def parse_query_param( In addition, if a `pydantic.BaseModel` is provided, the dictionary is parsed into an instance of that model: ```python - from argilla_server.pydantic_v1 import BaseModel, Field + from pydantic import BaseModel, Field class Params(BaseModel): diff --git a/argilla-server/src/argilla_server/utils/params.py b/argilla-server/src/argilla_server/utils/params.py index 7b806817b4..432835e173 100644 --- a/argilla-server/src/argilla_server/utils/params.py +++ b/argilla-server/src/argilla_server/utils/params.py @@ -19,7 +19,7 @@ from fastapi import HTTPException, Query -from argilla_server.pydantic_v1 import BaseModel +from pydantic import BaseModel # TODO: remove this function at some point @@ -82,7 +82,7 @@ def parse_query_param( In addition, if a `pydantic.BaseModel` is provided, the dictionary is parsed into an instance of that model: ```python - from argilla_server.pydantic_v1 import BaseModel, Field + from pydantic import BaseModel, Field class Params(BaseModel): diff --git a/argilla-server/src/argilla_server/validators/datasets.py b/argilla-server/src/argilla_server/validators/datasets.py index a7a2f3f979..04af1f8279 100644 --- a/argilla-server/src/argilla_server/validators/datasets.py +++ b/argilla-server/src/argilla_server/validators/datasets.py @@ -20,7 +20,6 @@ from argilla_server.errors.future import ( NotUniqueError, UnprocessableEntityError, - UpdateDistributionWithExistingResponsesError, ) diff --git a/argilla-server/src/argilla_server/validators/records.py b/argilla-server/src/argilla_server/validators/records.py index c22b2fe0c2..66d4a5b129 100644 --- a/argilla-server/src/argilla_server/validators/records.py +++ b/argilla-server/src/argilla_server/validators/records.py @@ -17,19 +17,18 @@ from abc import ABC from typing import Dict, List, Union, Any, Optional from urllib.parse import urlparse, ParseResult, ParseResultBytes -from uuid import UUID +from pydantic import ValidationError from sqlalchemy.ext.asyncio import AsyncSession from argilla_server.api.schemas.v1.chat import ChatFieldValue -from argilla_server.api.schemas.v1.records import RecordCreate, RecordUpdate, RecordUpsert -from argilla_server.api.schemas.v1.records_bulk import RecordsBulkCreate, RecordsBulkUpsert +from argilla_server.api.schemas.v1.records import RecordCreate, RecordUpsert +from argilla_server.api.schemas.v1.records_bulk import RecordsBulkCreate from argilla_server.api.schemas.v1.responses import UserResponseCreate from argilla_server.api.schemas.v1.suggestions import SuggestionCreate from argilla_server.contexts import records from argilla_server.errors.future.base_errors import UnprocessableEntityError from argilla_server.models import Dataset, Record -from argilla_server.pydantic_v1 import ValidationError from argilla_server.validators.responses import ResponseCreateValidator from argilla_server.validators.suggestions import SuggestionCreateValidator from argilla_server.validators.vectors import VectorValidator diff --git a/argilla-server/src/argilla_server/webhooks/v1/datasets.py b/argilla-server/src/argilla_server/webhooks/v1/datasets.py index 079d352bb1..620b9cefab 100644 --- a/argilla-server/src/argilla_server/webhooks/v1/datasets.py +++ b/argilla-server/src/argilla_server/webhooks/v1/datasets.py @@ -51,5 +51,5 @@ async def build_dataset_event(db: AsyncSession, dataset_event: DatasetEvent, dat return Event( event=dataset_event, timestamp=datetime.utcnow(), - data=DatasetEventSchema.from_orm(dataset).dict(), + data=DatasetEventSchema.model_validate(dataset).model_dump(), ) diff --git a/argilla-server/src/argilla_server/webhooks/v1/records.py b/argilla-server/src/argilla_server/webhooks/v1/records.py index f03172d473..3352892e3f 100644 --- a/argilla-server/src/argilla_server/webhooks/v1/records.py +++ b/argilla-server/src/argilla_server/webhooks/v1/records.py @@ -51,5 +51,5 @@ async def build_record_event(db: AsyncSession, record_event: RecordEvent, record return Event( event=record_event, timestamp=datetime.utcnow(), - data=RecordEventSchema.from_orm(record).dict(), + data=RecordEventSchema.model_validate(record).model_dump(), ) diff --git a/argilla-server/src/argilla_server/webhooks/v1/responses.py b/argilla-server/src/argilla_server/webhooks/v1/responses.py index 122fae0fd9..79539977af 100644 --- a/argilla-server/src/argilla_server/webhooks/v1/responses.py +++ b/argilla-server/src/argilla_server/webhooks/v1/responses.py @@ -56,5 +56,5 @@ async def build_response_event(db: AsyncSession, response_event: ResponseEvent, return Event( event=response_event, timestamp=datetime.utcnow(), - data=ResponseEventSchema.from_orm(response).dict(), + data=ResponseEventSchema.model_validate(response).model_dump(), ) diff --git a/argilla-server/src/argilla_server/webhooks/v1/schemas.py b/argilla-server/src/argilla_server/webhooks/v1/schemas.py index 9db5aae9b1..57d98d0c58 100644 --- a/argilla-server/src/argilla_server/webhooks/v1/schemas.py +++ b/argilla-server/src/argilla_server/webhooks/v1/schemas.py @@ -16,20 +16,19 @@ from typing import Optional, List from datetime import datetime -from argilla_server.pydantic_v1 import BaseModel, Field +from pydantic import BaseModel, Field, ConfigDict class UserEventSchema(BaseModel): id: UUID first_name: str - last_name: Optional[str] + last_name: Optional[str] = None username: str role: str inserted_at: datetime updated_at: datetime - class Config: - orm_mode = True + model_config = ConfigDict(from_attributes=True) class WorkspaceEventSchema(BaseModel): @@ -38,22 +37,20 @@ class WorkspaceEventSchema(BaseModel): inserted_at: datetime updated_at: datetime - class Config: - orm_mode = True + model_config = ConfigDict(from_attributes=True) class DatasetQuestionEventSchema(BaseModel): id: UUID name: str title: str - description: Optional[str] + description: Optional[str] = None required: bool settings: dict inserted_at: datetime updated_at: datetime - class Config: - orm_mode = True + model_config = ConfigDict(from_attributes=True) class DatasetFieldEventSchema(BaseModel): @@ -65,8 +62,7 @@ class DatasetFieldEventSchema(BaseModel): inserted_at: datetime updated_at: datetime - class Config: - orm_mode = True + model_config = ConfigDict(from_attributes=True) class DatasetMetadataPropertyEventSchema(BaseModel): @@ -78,8 +74,7 @@ class DatasetMetadataPropertyEventSchema(BaseModel): inserted_at: datetime updated_at: datetime - class Config: - orm_mode = True + model_config = ConfigDict(from_attributes=True) class DatasetVectorSettingsEventSchema(BaseModel): @@ -90,14 +85,13 @@ class DatasetVectorSettingsEventSchema(BaseModel): inserted_at: datetime updated_at: datetime - class Config: - orm_mode = True + model_config = ConfigDict(from_attributes=True) class DatasetEventSchema(BaseModel): id: UUID name: str - guidelines: Optional[str] + guidelines: Optional[str] = None allow_extra_metadata: bool status: str distribution: dict @@ -110,8 +104,7 @@ class DatasetEventSchema(BaseModel): inserted_at: datetime updated_at: datetime - class Config: - orm_mode = True + model_config = ConfigDict(from_attributes=True) class RecordEventSchema(BaseModel): @@ -121,7 +114,7 @@ class RecordEventSchema(BaseModel): # Or find another possible solution. fields: dict metadata: Optional[dict] = Field(None, alias="metadata_") - external_id: Optional[str] + external_id: Optional[str] = None # TODO: # responses: # - Create a new `GET /api/v1/records/{record_id}/responses` endpoint. @@ -142,18 +135,16 @@ class RecordEventSchema(BaseModel): inserted_at: datetime updated_at: datetime - class Config: - orm_mode = True + model_config = ConfigDict(from_attributes=True) class ResponseEventSchema(BaseModel): id: UUID - values: Optional[dict] + values: Optional[dict] = None status: str record: RecordEventSchema user: UserEventSchema inserted_at: datetime updated_at: datetime - class Config: - orm_mode = True + model_config = ConfigDict(from_attributes=True) diff --git a/argilla-server/tests/pydantic_v1/__init__.py b/argilla-server/tests/pydantic_v1/__init__.py deleted file mode 100644 index 56d69af76e..0000000000 --- a/argilla-server/tests/pydantic_v1/__init__.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright 2021-present, the Recognai S.L. team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from importlib import metadata - -try: - from pydantic.v1 import * # noqa: F403 -except ImportError: - from pydantic import * # noqa: F403 - -try: - PYDANTIC_MAJOR_VERSION: int = int(metadata.version("pydantic").split(".")[0]) -except metadata.PackageNotFoundError: - PYDANTIC_MAJOR_VERSION = 0 diff --git a/argilla-server/tests/pydantic_v1/generics.py b/argilla-server/tests/pydantic_v1/generics.py deleted file mode 100644 index f8e3043c1f..0000000000 --- a/argilla-server/tests/pydantic_v1/generics.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright 2021-present, the Recognai S.L. team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -try: - from pydantic.v1.generics import * # noqa: F403 -except ImportError: - from pydantic.generics import * # noqa: F403 diff --git a/argilla-server/tests/pydantic_v1/utils.py b/argilla-server/tests/pydantic_v1/utils.py deleted file mode 100644 index a821b7e5f5..0000000000 --- a/argilla-server/tests/pydantic_v1/utils.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright 2021-present, the Recognai S.L. team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -try: - from pydantic.v1.utils import * # noqa: F403 -except ImportError: - from pydantic.utils import * # noqa: F403 diff --git a/argilla-server/tests/unit/api/handlers/v1/datasets/records/records_bulk/test_create_dataset_records_bulk.py b/argilla-server/tests/unit/api/handlers/v1/datasets/records/records_bulk/test_create_dataset_records_bulk.py index 65cb298f0a..75da4ca579 100644 --- a/argilla-server/tests/unit/api/handlers/v1/datasets/records/records_bulk/test_create_dataset_records_bulk.py +++ b/argilla-server/tests/unit/api/handlers/v1/datasets/records/records_bulk/test_create_dataset_records_bulk.py @@ -521,6 +521,57 @@ async def test_create_dataset_records_bulk_with_chat_field_with_value_exceeding_ assert response.status_code == 422 assert (await db.execute(select(func.count(Record.id)))).scalar_one() == 0 + async def test_create_dataset_records_bulk_with_chat_field_empty_values( + self, db: AsyncSession, async_client: AsyncClient, owner_auth_header: dict + ): + dataset = await DatasetFactory.create(status=DatasetStatus.ready) + + await ChatFieldFactory.create(name="chat", dataset=dataset) + await LabelSelectionQuestionFactory.create(dataset=dataset) + + response = await async_client.post( + self.url(dataset.id), + headers=owner_auth_header, + json={ + "items": [ + { + "fields": {"chat": [{"role": "", "content": ""}]}, + }, + ], + }, + ) + + assert response.status_code == 422 + assert response.json() == { + "detail": { + "code": "argilla.api.errors::ValidationError", + "params": { + "errors": [ + { + "loc": ["body", "items", 0, "fields"], + "msg": "Value error, Error parsing chat " + "field 'chat': [{'type': " + "'string_too_short', 'loc': " + "('role',), 'msg': 'String should " + "have at least 1 character', " + "'input': '', 'ctx': {'min_length': " + "1}, 'url': " + "'https://errors.pydantic.dev/2.9/v/string_too_short'}, " + "{'type': 'string_too_short', 'loc': " + "('content',), 'msg': 'String should " + "have at least 1 character', " + "'input': '', 'ctx': {'min_length': " + "1}, 'url': " + "'https://errors.pydantic.dev/2.9/v/string_too_short'}]", + "type": "value_error", + } + ] + }, + } + } + + assert (await db.execute(select(func.count(Record.id)))).scalar_one() == 0 + async def test_create_dataset_records_bulk_with_chat_field_with_non_dicts( self, db: AsyncSession, async_client: AsyncClient, owner_auth_header: dict ): @@ -609,7 +660,7 @@ async def test_create_dataset_records_bulk_with_chat_field_without_content_key( "errors": [ { "loc": ["body", "items", 0, "fields"], - "msg": "Error parsing chat field 'chat': [{'loc': ('content',), 'msg': 'field required', 'type': 'value_error.missing'}]", + "msg": "Value error, Error parsing chat field 'chat': [{'type': 'missing', 'loc': ('content',), 'msg': 'Field required', 'input': {'role': 'user'}, 'url': 'https://errors.pydantic.dev/2.9/v/missing'}]", "type": "value_error", } ] diff --git a/argilla-server/tests/unit/api/handlers/v1/datasets/test_create_dataset_question.py b/argilla-server/tests/unit/api/handlers/v1/datasets/test_create_dataset_question.py index d59c2fb322..67a9e4cc47 100644 --- a/argilla-server/tests/unit/api/handlers/v1/datasets/test_create_dataset_question.py +++ b/argilla-server/tests/unit/api/handlers/v1/datasets/test_create_dataset_question.py @@ -155,7 +155,7 @@ async def test_create_dataset_question_with_other_span_question_using_the_same_f @pytest.mark.parametrize( "visible_options,error_msg", [ - (1, "ensure this value is greater than or equal to 3"), + (1, "greater than or equal to 3"), (4, "the value for 'visible_options' must be less or equal to the number of items in 'options' (3)"), ], ) diff --git a/argilla-server/tests/unit/api/handlers/v1/datasets/test_list_current_user_datasets.py b/argilla-server/tests/unit/api/handlers/v1/datasets/test_list_current_user_datasets.py index 9f12bb7fa1..10f828ce0d 100644 --- a/argilla-server/tests/unit/api/handlers/v1/datasets/test_list_current_user_datasets.py +++ b/argilla-server/tests/unit/api/handlers/v1/datasets/test_list_current_user_datasets.py @@ -12,15 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import uuid import pytest - from httpx import AsyncClient -from pydantic.schema import timedelta +from datetime import timedelta from argilla_server.constants import API_KEY_HEADER_NAME from argilla_server.enums import DatasetStatus, UserRole - from tests.factories import DatasetFactory, WorkspaceUserFactory, WorkspaceFactory, UserFactory diff --git a/argilla-server/tests/unit/api/handlers/v1/responses/test_create_current_user_responses_bulk.py b/argilla-server/tests/unit/api/handlers/v1/responses/test_create_current_user_responses_bulk.py index 3cfe3fb7a4..10345bd287 100644 --- a/argilla-server/tests/unit/api/handlers/v1/responses/test_create_current_user_responses_bulk.py +++ b/argilla-server/tests/unit/api/handlers/v1/responses/test_create_current_user_responses_bulk.py @@ -440,7 +440,7 @@ async def refresh_records(records): profiler = Profiler() responses = [ - DraftResponseUpsert.parse_obj( + DraftResponseUpsert.model_validate( { "values": {"prompt-quality": {"value": 10}}, "record_id": record.id, diff --git a/argilla-server/tests/unit/api/handlers/v1/test_datasets.py b/argilla-server/tests/unit/api/handlers/v1/test_datasets.py index 71262e3159..e4a643dbf2 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_datasets.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_datasets.py @@ -24,11 +24,6 @@ from argilla_server.api.handlers.v1.datasets.records import LIST_DATASET_RECORDS_LIMIT_DEFAULT from argilla_server.api.schemas.v1.datasets import DATASET_GUIDELINES_MAX_LENGTH, DATASET_NAME_MAX_LENGTH from argilla_server.api.schemas.v1.fields import FIELD_CREATE_NAME_MAX_LENGTH, FIELD_CREATE_TITLE_MAX_LENGTH -from argilla_server.api.schemas.v1.metadata_properties import ( - METADATA_PROPERTY_CREATE_NAME_MAX_LENGTH, - METADATA_PROPERTY_CREATE_TITLE_MAX_LENGTH, - TERMS_METADATA_PROPERTY_VALUES_MAX_ITEMS, -) from argilla_server.api.schemas.v1.records import RECORDS_CREATE_MAX_ITEMS, RECORDS_CREATE_MIN_ITEMS from argilla_server.api.schemas.v1.vector_settings import ( VECTOR_SETTINGS_CREATE_NAME_MAX_LENGTH, @@ -48,7 +43,6 @@ from argilla_server.models import ( Dataset, Field, - MetadataProperty, Question, Record, Response, @@ -1627,7 +1621,7 @@ async def test_create_dataset_records_with_duplicated_response_for_an_user( "errors": [ { "loc": ["body", "items", 0, "responses"], - "msg": f"'responses' contains several responses for the same user_id='{str(owner.id)}'", + "msg": f"Value error, 'responses' contains several responses for the same user_id='{str(owner.id)}'", "type": "value_error", } ], @@ -1736,19 +1730,19 @@ async def test_create_dataset_records_with_wrong_value_field( "params": { "errors": [ { - "loc": ["body", "items", 0, "fields", "output"], - "msg": "str type expected", - "type": "type_error.str", + "loc": ["body", "items", 0, "fields", "output", "constrained-str"], + "msg": "Input should be a valid string", + "type": "string_type", }, { - "loc": ["body", "items", 0, "fields", "output"], - "msg": "value is not a valid list", - "type": "type_error.list", + "loc": ["body", "items", 0, "fields", "output", "list[ChatFieldValue]"], + "msg": "Input should be a valid list", + "type": "list_type", }, { - "loc": ["body", "items", 0, "fields", "output"], - "msg": "value is not a valid dict", - "type": "type_error.dict", + "loc": ["body", "items", 0, "fields", "output", "dict[constrained-str,any]"], + "msg": "Input should be a valid dictionary", + "type": "dict_type", }, ] }, @@ -1846,19 +1840,19 @@ async def test_create_dataset_records_with_wrong_optional_fields( "params": { "errors": [ { - "loc": ["body", "items", 0, "fields", "output"], - "msg": "str type expected", - "type": "type_error.str", + "loc": ["body", "items", 0, "fields", "output", "constrained-str"], + "msg": "Input should be a valid string", + "type": "string_type", }, { - "loc": ["body", "items", 0, "fields", "output"], - "msg": "value is not a valid list", - "type": "type_error.list", + "loc": ["body", "items", 0, "fields", "output", "list[ChatFieldValue]"], + "msg": "Input should be a valid list", + "type": "list_type", }, { - "loc": ["body", "items", 0, "fields", "output"], - "msg": "value is not a valid dict", - "type": "type_error.dict", + "loc": ["body", "items", 0, "fields", "output", "dict[constrained-str,any]"], + "msg": "Input should be a valid dictionary", + "type": "dict_type", }, ] }, diff --git a/argilla-server/tests/unit/api/handlers/v1/test_metadata_properties.py b/argilla-server/tests/unit/api/handlers/v1/test_metadata_properties.py index 83507ddac4..4a2bacdd16 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_metadata_properties.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_metadata_properties.py @@ -16,6 +16,8 @@ from uuid import uuid4 import pytest +from sqlalchemy.ext.asyncio import AsyncSession + from argilla_server.api.schemas.v1.metadata_properties import METADATA_PROPERTY_CREATE_TITLE_MAX_LENGTH from argilla_server.constants import API_KEY_HEADER_NAME from argilla_server.enums import MetadataPropertyType, UserRole diff --git a/argilla-server/tests/unit/api/handlers/v1/webhooks/test_create_webhook.py b/argilla-server/tests/unit/api/handlers/v1/webhooks/test_create_webhook.py index 6fb2b51717..301b284d35 100644 --- a/argilla-server/tests/unit/api/handlers/v1/webhooks/test_create_webhook.py +++ b/argilla-server/tests/unit/api/handlers/v1/webhooks/test_create_webhook.py @@ -125,10 +125,8 @@ async def test_create_webhook_without_authentication(self, db: AsyncSession, asy [ "", "example.com", - "http:example.com", - "https:example.com", - "http://localhost/webhooks", - "http://localhost:3000/webhooks", + "http.example.com", + "https.example.com", ], ) async def test_create_webhook_with_invalid_url( diff --git a/argilla-server/tests/unit/api/handlers/v1/webhooks/test_update_webhook.py b/argilla-server/tests/unit/api/handlers/v1/webhooks/test_update_webhook.py index ad2cc0bb30..236c83a983 100644 --- a/argilla-server/tests/unit/api/handlers/v1/webhooks/test_update_webhook.py +++ b/argilla-server/tests/unit/api/handlers/v1/webhooks/test_update_webhook.py @@ -256,7 +256,7 @@ async def test_update_webhook_without_authentication(self, async_client: AsyncCl assert webhook.url != "https://example.com/webhook" assert webhook.events != [WebhookEvent.response_updated] - @pytest.mark.parametrize("invalid_url", ["", "example.com", "http:example.com", "https:example.com"]) + @pytest.mark.parametrize("invalid_url", ["", "example.com", "http.example.com", "https.example.com"]) async def test_update_webhook_with_invalid_url( self, async_client: AsyncClient, owner_auth_header: dict, invalid_url: str ): diff --git a/argilla-server/tests/unit/api/schemas/v1/records/test_record_create.py b/argilla-server/tests/unit/api/schemas/v1/records/test_record_create.py index a6a1712586..22866c02a1 100644 --- a/argilla-server/tests/unit/api/schemas/v1/records/test_record_create.py +++ b/argilla-server/tests/unit/api/schemas/v1/records/test_record_create.py @@ -100,7 +100,7 @@ def test_record_create_with_chat_field(self): ], ) def test_record_create_with_wrong_chat_field(self, wrong_value: dict): - with pytest.raises(ValueError): + with pytest.raises((ValueError, TypeError)): # noqa RecordCreate(fields={"field": [wrong_value]}) def test_record_create_with_exceeded_chat_messages(self): diff --git a/argilla-server/tests/unit/contexts/search/test_search_records_query_validator.py b/argilla-server/tests/unit/contexts/search/test_search_records_query_validator.py index 27ce751277..beac4590ac 100644 --- a/argilla-server/tests/unit/contexts/search/test_search_records_query_validator.py +++ b/argilla-server/tests/unit/contexts/search/test_search_records_query_validator.py @@ -37,7 +37,7 @@ async def test_validate(self, db: AsyncSession): label_selection_question = await LabelSelectionQuestionFactory.create(dataset=dataset) metadata_property = await FloatMetadataPropertyFactory.create(dataset=dataset) - query = SearchRecordsQuery.parse_obj( + query = SearchRecordsQuery.model_validate( { "query": { "text": {"q": "query"}, @@ -88,7 +88,7 @@ async def test_validate(self, db: AsyncSession): await SearchRecordsQueryValidator.validate(db, dataset, query) async def test_validate_response_filter_scope_in_filters_without_question(self, db: AsyncSession): - query = SearchRecordsQuery.parse_obj( + query = SearchRecordsQuery.model_validate( { "query": { "text": {"q": "query"}, @@ -102,7 +102,7 @@ async def test_validate_response_filter_scope_in_filters_without_question(self, async def test_validate_response_filter_scope_in_filters_with_non_existent_question(self, db: AsyncSession): dataset = await DatasetFactory.create() - query = SearchRecordsQuery.parse_obj( + query = SearchRecordsQuery.model_validate( { "query": { "text": {"q": "query"}, @@ -129,7 +129,7 @@ async def test_validate_response_filter_scope_in_filters_with_non_existent_quest async def test_validate_suggestion_filter_scope_in_filters_with_non_existent_question(self, db: AsyncSession): dataset = await DatasetFactory.create() - query = SearchRecordsQuery.parse_obj( + query = SearchRecordsQuery.model_validate( { "query": { "text": {"q": "query"}, @@ -158,7 +158,7 @@ async def test_validate_metadata_filter_scope_in_filters_with_non_existent_metad ): dataset = await DatasetFactory.create() - query = SearchRecordsQuery.parse_obj( + query = SearchRecordsQuery.model_validate( { "query": { "text": {"q": "query"}, @@ -184,7 +184,7 @@ async def test_validate_metadata_filter_scope_in_filters_with_non_existent_metad ) async def test_validate_response_filter_scope_in_sort_without_question(self, db: AsyncSession): - query = SearchRecordsQuery.parse_obj( + query = SearchRecordsQuery.model_validate( { "query": { "text": {"q": "query"}, @@ -198,7 +198,7 @@ async def test_validate_response_filter_scope_in_sort_without_question(self, db: async def test_validate_response_filter_scope_in_sort_with_non_existent_question(self, db: AsyncSession): dataset = await DatasetFactory.create() - query = SearchRecordsQuery.parse_obj( + query = SearchRecordsQuery.model_validate( { "query": { "text": {"q": "query"}, @@ -217,7 +217,7 @@ async def test_validate_response_filter_scope_in_sort_with_non_existent_question async def test_validate_suggestion_filter_scope_in_sort_with_non_existent_question(self, db: AsyncSession): dataset = await DatasetFactory.create() - query = SearchRecordsQuery.parse_obj( + query = SearchRecordsQuery.model_validate( { "query": { "text": {"q": "query"}, @@ -236,7 +236,7 @@ async def test_validate_suggestion_filter_scope_in_sort_with_non_existent_questi async def test_validate_metadata_filter_scope_in_sort_with_non_existent_metadata_property(self, db: AsyncSession): dataset = await DatasetFactory.create() - query = SearchRecordsQuery.parse_obj( + query = SearchRecordsQuery.model_validate( { "query": { "text": {"q": "query"}, diff --git a/argilla-server/tests/unit/security/test_model.py b/argilla-server/tests/unit/security/test_model.py index fa8092a775..3105e65991 100644 --- a/argilla-server/tests/unit/security/test_model.py +++ b/argilla-server/tests/unit/security/test_model.py @@ -16,9 +16,7 @@ import pytest from argilla_server.api.schemas.v1.users import User, UserCreate -from argilla_server.api.schemas.v1.workspaces import WorkspaceCreate from tests.factories import UserFactory -from tests.pydantic_v1 import ValidationError @pytest.mark.parametrize( @@ -47,11 +45,11 @@ def test_user_create(username: str): async def test_user_first_name(): user = await UserFactory.create(first_name="first-name", workspaces=[]) - assert User.from_orm(user).first_name == "first-name" + assert User.model_validate(user).first_name == "first-name" @pytest.mark.asyncio async def test_user_last_name(): user = await UserFactory.create(last_name="last-name", workspaces=[]) - assert User.from_orm(user).last_name == "last-name" + assert User.model_validate(user).last_name == "last-name" diff --git a/argilla-server/tests/unit/test_utils.py b/argilla-server/tests/unit/test_utils.py index 5fff653616..7ad43d5a53 100644 --- a/argilla-server/tests/unit/test_utils.py +++ b/argilla-server/tests/unit/test_utils.py @@ -18,7 +18,7 @@ from argilla_server.utils import parse_query_param from fastapi import HTTPException -from tests.pydantic_v1 import BaseModel, Field +from pydantic import BaseModel, Field @pytest.mark.parametrize( From 1c8f528bcc7c8f4a5677836d8c4987e1a2d4a885 Mon Sep 17 00:00:00 2001 From: Paco Aranda Date: Wed, 20 Nov 2024 17:37:00 +0100 Subject: [PATCH 31/50] fix: Resolve failing tests after pydantic V2 merge (#5700) # Description This PR resolves failing tests after pydantic V2 upgrade: - Changes the expected error details - Catch TypeError exception inside validator to resolve as a 422 status response **Type of change** - Bug fix (non-breaking change which fixes an issue) **How Has This Been Tested** **Checklist** - I added relevant documentation - I followed the style guidelines of this project - I did a self-review of my code - I made corresponding changes to the documentation - I confirm My changes generate no new warnings - I have added tests that prove my fix is effective or that my feature works - I have added relevant notes to the CHANGELOG.md file (See https://keepachangelog.com/) --- .../argilla_server/api/schemas/v1/records.py | 2 + .../test_create_dataset_records_bulk.py | 62 ++++++++++--------- 2 files changed, 34 insertions(+), 30 deletions(-) diff --git a/argilla-server/src/argilla_server/api/schemas/v1/records.py b/argilla-server/src/argilla_server/api/schemas/v1/records.py index 28ca12e455..a6d1dade7b 100644 --- a/argilla-server/src/argilla_server/api/schemas/v1/records.py +++ b/argilla-server/src/argilla_server/api/schemas/v1/records.py @@ -140,6 +140,8 @@ def validate_chat_field_content(cls, fields: Any): fields[key] = [ item if isinstance(item, ChatFieldValue) else ChatFieldValue(**item) for item in value ] + except TypeError as e: + raise ValueError(f"Error parsing chat field '{key}': {e}") except ValidationError as e: raise ValueError(f"Error parsing chat field '{key}': {e.errors()}") diff --git a/argilla-server/tests/unit/api/handlers/v1/datasets/records/records_bulk/test_create_dataset_records_bulk.py b/argilla-server/tests/unit/api/handlers/v1/datasets/records/records_bulk/test_create_dataset_records_bulk.py index 75da4ca579..d0b61853b4 100644 --- a/argilla-server/tests/unit/api/handlers/v1/datasets/records/records_bulk/test_create_dataset_records_bulk.py +++ b/argilla-server/tests/unit/api/handlers/v1/datasets/records/records_bulk/test_create_dataset_records_bulk.py @@ -744,19 +744,19 @@ async def test_create_dataset_records_bulk_with_wrong_custom_field_value( "params": { "errors": [ { - "loc": ["body", "items", 0, "fields", "text-field"], - "msg": "str type expected", - "type": "type_error.str", + "loc": ["body", "items", 0, "fields", "text-field", "constrained-str"], + "msg": "Input should be a valid string", + "type": "string_type", }, { - "loc": ["body", "items", 0, "fields", "text-field"], - "msg": "value is not a valid list", - "type": "type_error.list", + "loc": ["body", "items", 0, "fields", "text-field", "list[ChatFieldValue]"], + "msg": "Input should be a valid list", + "type": "list_type", }, { - "loc": ["body", "items", 0, "fields", "text-field"], - "msg": "value is not a valid dict", - "type": "type_error.dict", + "loc": ["body", "items", 0, "fields", "text-field", "dict[constrained-str,any]"], + "msg": "Input should be a valid dictionary", + "type": "dict_type", }, ] }, @@ -771,19 +771,19 @@ async def test_create_dataset_records_bulk_with_wrong_custom_field_value( "params": { "errors": [ { - "loc": ["body", "items", 0, "fields", "text-field"], - "msg": "str type expected", - "type": "type_error.str", + "loc": ["body", "items", 0, "fields", "text-field", "constrained-str"], + "msg": "Input should be a valid string", + "type": "string_type", }, { - "loc": ["body", "items", 0, "fields", "text-field"], - "msg": "value is not a valid list", - "type": "type_error.list", + "loc": ["body", "items", 0, "fields", "text-field", "list[ChatFieldValue]"], + "msg": "Input should be a valid list", + "type": "list_type", }, { - "loc": ["body", "items", 0, "fields", "text-field"], - "msg": "value is not a valid dict", - "type": "type_error.dict", + "loc": ["body", "items", 0, "fields", "text-field", "dict[constrained-str,any]"], + "msg": "Input should be a valid dictionary", + "type": "dict_type", }, ] }, @@ -798,19 +798,19 @@ async def test_create_dataset_records_bulk_with_wrong_custom_field_value( "params": { "errors": [ { - "loc": ["body", "items", 0, "fields", "text-field"], - "msg": "str type expected", - "type": "type_error.str", + "loc": ["body", "items", 0, "fields", "text-field", "constrained-str"], + "msg": "Input should be a valid string", + "type": "string_type", }, { - "loc": ["body", "items", 0, "fields", "text-field"], - "msg": "value is not a valid list", - "type": "type_error.list", + "loc": ["body", "items", 0, "fields", "text-field", "list[ChatFieldValue]"], + "msg": "Input should be a valid list", + "type": "list_type", }, { - "loc": ["body", "items", 0, "fields", "text-field"], - "msg": "value is not a valid dict", - "type": "type_error.dict", + "loc": ["body", "items", 0, "fields", "text-field", "dict[constrained-str,any]"], + "msg": "Input should be a valid dictionary", + "type": "dict_type", }, ] }, @@ -826,8 +826,10 @@ async def test_create_dataset_records_bulk_with_wrong_custom_field_value( "errors": [ { "loc": ["body", "items", 0, "fields"], - "msg": "argilla_server.api.schemas.v1.chat.ChatFieldValue() argument after ** must be a mapping, not str", - "type": "type_error", + "msg": "Value error, Error parsing chat field 'text-field': " + "argilla_server.api.schemas.v1.chat.ChatFieldValue() " + "argument after ** must be a mapping, not str", + "type": "value_error", } ] }, @@ -871,7 +873,7 @@ async def test_create_dataset_records_bulk_with_wrong_text_field_value( }, ) - assert response.status_code == 422 + assert response.status_code == 422, response.json() assert response.json() == expected_error assert (await db.execute(select(func.count(Record.id)))).scalar_one() == 0 From 5f6c2911c3ca7ed860f8898a54be1abbad2c7796 Mon Sep 17 00:00:00 2001 From: Sara Han <127759186+sdiazlor@users.noreply.github.com> Date: Thu, 21 Nov 2024 09:50:10 +0100 Subject: [PATCH 32/50] [DOCS] Deploy on spaces review (#5704) # Description Fix some typos and the API reference visualization. Closes # **Type of change** - Bug fix (non-breaking change which fixes an issue) - New feature (non-breaking change which adds functionality) - Breaking change (fix or feature that would cause existing functionality to not work as expected) - Refactor (change restructuring the codebase without changing functionality) - Improvement (change adding some improvement to an existing functionality) - Documentation update **How Has This Been Tested** **Checklist** - I added relevant documentation - I followed the style guidelines of this project - I did a self-review of my code - I made corresponding changes to the documentation - I confirm My changes generate no new warnings - I have added tests that prove my fix is effective or that my feature works - I have added relevant notes to the CHANGELOG.md file (See https://keepachangelog.com/) --- argilla/docs/getting_started/quickstart.md | 2 +- argilla/src/argilla/_helpers/_deploy.py | 41 +++++++++++----------- 2 files changed, 21 insertions(+), 22 deletions(-) diff --git a/argilla/docs/getting_started/quickstart.md b/argilla/docs/getting_started/quickstart.md index 2eeb8524c2..7ecc5d1774 100644 --- a/argilla/docs/getting_started/quickstart.md +++ b/argilla/docs/getting_started/quickstart.md @@ -53,7 +53,7 @@ Argilla is a free, open-source, self-hosted tool. This means you need to deploy Your Argilla API key can be found in the `My Settings` page of your Argilla Space. Take a look at the [sign in to the UI section](#sign-in-to-the-argilla-ui) to learn how to retrieve it. !!! warning "Persistent storage `SMALL`" - Not setting persistent storage to `SMALL` means that **you will loose your data when the Space restarts**. Spaces get restarted due to maintainance, inactivity, and every time you change your Spaces settings. If you want to **use the Space just for testing** you can use `FREE` temporarily. + Not setting persistent storage to `SMALL` means that **you will loose your data when the Space restarts**. Spaces get restarted due to maintenance, inactivity, and every time you change your Spaces settings. If you want to **use the Space just for testing** you can use `FREE` temporarily. If you want to deploy Argilla within a Hugging Face organization, setup a more stable Space, or understand the settings, [check out the HF Spaces settings guide](how-to-configure-argilla-on-huggingface.md). diff --git a/argilla/src/argilla/_helpers/_deploy.py b/argilla/src/argilla/_helpers/_deploy.py index afdb952410..c676bc5901 100644 --- a/argilla/src/argilla/_helpers/_deploy.py +++ b/argilla/src/argilla/_helpers/_deploy.py @@ -44,26 +44,25 @@ def deploy_on_spaces( private: Optional[Union[bool, None]] = False, ) -> "Argilla": """ - Deploys Argilla on Hugging Face Spaces. - - Args: - api_key (str): The Argilla API key to be defined for the owner user and creator of the Space. - repo_name (Optional[str]): The ID of the repository where Argilla will be deployed. Defaults to "argilla". - org_name (Optional[str]): The name of the organization where Argilla will be deployed. Defaults to None. - hf_token (Optional[Union[str, None]]): The Hugging Face authentication token. Defaults to None. - space_storage (Optional[Union[str, SpaceStorage]]): The persistant storage size for the space. Defaults to None without persistant storage. - space_hardware (Optional[Union[str, SpaceHardware]]): The hardware configuration for the space. Defaults to "cpu-basic" with downtime after 48 hours of inactivity. - private (Optional[Union[bool, None]]): Whether the space should be private. Defaults to False. - - Returns: - Argilla: The Argilla client. - - Example: - ```Python - import argilla as rg - api - client = rg.Argilla.deploy_on_spaces(api_key="12345678") - ``` + Deploys Argilla on Hugging Face Spaces. + + Args: + api_key (str): The Argilla API key to be defined for the owner user and creator of the Space. + repo_name (Optional[str]): The ID of the repository where Argilla will be deployed. Defaults to "argilla". + org_name (Optional[str]): The name of the organization where Argilla will be deployed. Defaults to None. + hf_token (Optional[Union[str, None]]): The Hugging Face authentication token. Defaults to None. + space_storage (Optional[Union[str, SpaceStorage]]): The persistent storage size for the space. Defaults to None without persistent storage. + space_hardware (Optional[Union[str, SpaceHardware]]): The hardware configuration for the space. Defaults to "cpu-basic" with downtime after 48 hours of inactivity. + private (Optional[Union[bool, None]]): Whether the space should be private. Defaults to False. + + Returns: + Argilla: The Argilla client. + + Example: + ```Python + import argilla as rg + client = rg.Argilla.deploy_on_spaces(api_key="12345678") + ``` """ hf_token = cls._acquire_hf_token(ht_token=hf_token) hf_api = HfApi(token=hf_token) @@ -126,7 +125,7 @@ def deploy_on_spaces( @staticmethod def _space_storage_warning() -> None: warnings.warn( - "No storage provided. The space will not have persistant storage so every 48 hours your data will be reset." + "No storage provided. The space will not have persistent storage so every 48 hours your data will be reset." ) @classmethod From d6bc6f860e5bae20599dfa6e8c54f6c171641a89 Mon Sep 17 00:00:00 2001 From: Paco Aranda Date: Thu, 21 Nov 2024 14:55:22 +0100 Subject: [PATCH 33/50] [REFACTOR] `argilla`: Align questions to `Resource` API (#5680) # Description Closes https://github.com/argilla-io/argilla/issues/4931 **Type of change** - Refactor (change restructuring the codebase without changing functionality) **How Has This Been Tested** **Checklist** - I added relevant documentation - I followed the style guidelines of this project - I did a self-review of my code - I made corresponding changes to the documentation - I confirm My changes generate no new warnings - I have added tests that prove my fix is effective or that my feature works - I have added relevant notes to the CHANGELOG.md file (See https://keepachangelog.com/) --------- Co-authored-by: burtenshaw --- argilla/src/argilla/_api/_questions.py | 64 +--- argilla/src/argilla/_models/__init__.py | 25 +- .../argilla/_models/_settings/_questions.py | 164 ++++++++++ .../_models/_settings/_questions/__init__.py | 35 --- .../_models/_settings/_questions/_base.py | 50 ---- .../_settings/_questions/_label_selection.py | 53 ---- .../_questions/_multi_label_selection.py | 33 -- .../_models/_settings/_questions/_ranking.py | 40 --- .../_models/_settings/_questions/_rating.py | 40 --- .../_models/_settings/_questions/_span.py | 56 ---- .../_models/_settings/_questions/_text.py | 25 -- .../src/argilla/records/_mapping/_mapper.py | 6 +- argilla/src/argilla/settings/_common.py | 4 +- argilla/src/argilla/settings/_question.py | 283 +++++++++--------- argilla/src/argilla/settings/_resource.py | 42 +-- argilla/tests/integration/conftest.py | 4 + argilla/tests/integration/test_add_records.py | 4 +- .../tests/integration/test_export_dataset.py | 5 +- .../tests/integration/test_export_records.py | 5 +- .../tests/integration/test_import_features.py | 5 +- ...st_metadata.py => test_manage_metadata.py} | 20 +- .../integration/test_publish_datasets.py | 7 +- .../test_update_dataset_settings.py | 9 + .../tests/integration/test_update_records.py | 5 +- .../unit/test_resources/test_questions.py | 74 +---- 25 files changed, 389 insertions(+), 669 deletions(-) create mode 100644 argilla/src/argilla/_models/_settings/_questions.py delete mode 100644 argilla/src/argilla/_models/_settings/_questions/__init__.py delete mode 100644 argilla/src/argilla/_models/_settings/_questions/_base.py delete mode 100644 argilla/src/argilla/_models/_settings/_questions/_label_selection.py delete mode 100644 argilla/src/argilla/_models/_settings/_questions/_multi_label_selection.py delete mode 100644 argilla/src/argilla/_models/_settings/_questions/_ranking.py delete mode 100644 argilla/src/argilla/_models/_settings/_questions/_rating.py delete mode 100644 argilla/src/argilla/_models/_settings/_questions/_span.py delete mode 100644 argilla/src/argilla/_models/_settings/_questions/_text.py rename argilla/tests/integration/{test_metadata.py => test_manage_metadata.py} (88%) diff --git a/argilla/src/argilla/_api/_questions.py b/argilla/src/argilla/_api/_questions.py index 5b112bc76f..98d67eb6da 100644 --- a/argilla/src/argilla/_api/_questions.py +++ b/argilla/src/argilla/_api/_questions.py @@ -18,34 +18,16 @@ import httpx from argilla._api._base import ResourceAPI from argilla._exceptions import api_error_handler -from argilla._models import ( - TextQuestionModel, - LabelQuestionModel, - MultiLabelQuestionModel, - RankingQuestionModel, - RatingQuestionModel, - SpanQuestionModel, - QuestionBaseModel, - QuestionModel, -) +from argilla._models import QuestionModel __all__ = ["QuestionsAPI"] -class QuestionsAPI(ResourceAPI[QuestionBaseModel]): +class QuestionsAPI(ResourceAPI[QuestionModel]): """Manage datasets via the API""" http_client: httpx.Client - _TYPE_TO_MODEL_CLASS = { - "text": TextQuestionModel, - "label_selection": LabelQuestionModel, - "multi_label_selection": MultiLabelQuestionModel, - "ranking": RankingQuestionModel, - "rating": RatingQuestionModel, - "span": SpanQuestionModel, - } - ################ # CRUD methods # ################ @@ -53,15 +35,14 @@ class QuestionsAPI(ResourceAPI[QuestionBaseModel]): @api_error_handler def create( self, - dataset_id: UUID, question: QuestionModel, ) -> QuestionModel: - url = f"/api/v1/datasets/{dataset_id}/questions" + url = f"/api/v1/datasets/{question.dataset_id}/questions" response = self.http_client.post(url=url, json=question.model_dump()) response.raise_for_status() response_json = response.json() question_model = self._model_from_json(response_json=response_json) - self._log_message(message=f"Created question {question_model.name} in dataset {dataset_id}") + self._log_message(message=f"Created question {question_model.name} in dataset {question.dataset_id}") return question_model @api_error_handler @@ -69,25 +50,24 @@ def update( self, question: QuestionModel, ) -> QuestionModel: - # TODO: Implement update method for fields with server side ID - raise NotImplementedError + url = f"/api/v1/questions/{question.id}" + response = self.http_client.patch(url, json=question.model_dump()) + response.raise_for_status() + response_json = response.json() + updated_question = self._model_from_json(response_json) + self._log_message(message=f"Update question {updated_question.name} with id {question.id}") + return updated_question @api_error_handler def delete(self, question_id: UUID) -> None: - # TODO: Implement delete method for fields with server side ID - raise NotImplementedError + url = f"/api/v1/questions/{question_id}" + self.http_client.delete(url).raise_for_status() + self._log_message(message=f"Deleted question with id {question_id}") #################### # Utility methods # #################### - def create_many(self, dataset_id: UUID, questions: List[QuestionModel]) -> List[QuestionModel]: - response_models = [] - for question in questions: - response_model = self.create(dataset_id=dataset_id, question=question) - response_models.append(response_model) - return response_models - @api_error_handler def list(self, dataset_id: UUID) -> List[QuestionModel]: response = self.http_client.get(f"/api/v1/datasets/{dataset_id}/questions") @@ -103,21 +83,7 @@ def list(self, dataset_id: UUID) -> List[QuestionModel]: def _model_from_json(self, response_json: Dict) -> QuestionModel: response_json["inserted_at"] = self._date_from_iso_format(date=response_json["inserted_at"]) response_json["updated_at"] = self._date_from_iso_format(date=response_json["updated_at"]) - return self._get_model_from_response(response_json=response_json) + return QuestionModel(**response_json) def _model_from_jsons(self, response_jsons: List[Dict]) -> List[QuestionModel]: return list(map(self._model_from_json, response_jsons)) - - def _get_model_from_response(self, response_json: Dict) -> QuestionModel: - """Get the model from the response""" - try: - question_type = response_json.get("settings", {}).get("type") - except Exception as e: - raise ValueError("Invalid field type: missing 'settings.type' in response") from e - - question_class = self._TYPE_TO_MODEL_CLASS.get(question_type) - if question_class is None: - self._log_message(message=f"Unknown question type: {question_type}") - question_class = QuestionBaseModel - - return question_class(**response_json, check_fields=False) diff --git a/argilla/src/argilla/_models/__init__.py b/argilla/src/argilla/_models/__init__.py index 553296d6dd..4f69b93024 100644 --- a/argilla/src/argilla/_models/__init__.py +++ b/argilla/src/argilla/_models/__init__.py @@ -39,18 +39,14 @@ FieldSettings, ) from argilla._models._settings._questions import ( - LabelQuestionModel, - LabelQuestionSettings, - MultiLabelQuestionModel, - QuestionBaseModel, QuestionModel, QuestionSettings, - RankingQuestionModel, - RatingQuestionModel, - SpanQuestionModel, SpanQuestionSettings, - TextQuestionModel, TextQuestionSettings, + LabelQuestionSettings, + RatingQuestionSettings, + MultiLabelQuestionSettings, + RankingQuestionSettings, ) from argilla._models._settings._metadata import ( MetadataFieldModel, @@ -61,5 +57,18 @@ FloatMetadataPropertySettings, IntegerMetadataPropertySettings, ) +from argilla._models._settings._questions import ( + QuestionModel, + QuestionSettings, + LabelQuestionSettings, + RatingQuestionSettings, + TextQuestionSettings, + MultiLabelQuestionSettings, + RankingQuestionSettings, + SpanQuestionSettings, +) from argilla._models._settings._vectors import VectorFieldModel + +from argilla._models._user import UserModel, Role +from argilla._models._workspace import WorkspaceModel from argilla._models._webhook import WebhookModel, EventType diff --git a/argilla/src/argilla/_models/_settings/_questions.py b/argilla/src/argilla/_models/_settings/_questions.py new file mode 100644 index 0000000000..558b351f23 --- /dev/null +++ b/argilla/src/argilla/_models/_settings/_questions.py @@ -0,0 +1,164 @@ +# Copyright 2024-present, Argilla, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Annotated, Union, Optional, ClassVar, List, Dict, Literal +from uuid import UUID + +from pydantic import ConfigDict, field_validator, Field, BaseModel, model_validator, field_serializer +from pydantic_core.core_schema import ValidationInfo + +from argilla._models import ResourceModel + +try: + from typing import Self +except ImportError: + from typing_extensions import Self + + +class LabelQuestionSettings(BaseModel): + type: Literal["label_selection"] = "label_selection" + + _MIN_VISIBLE_OPTIONS: ClassVar[int] = 3 + + options: List[Dict[str, Optional[str]]] = Field(default_factory=list, validate_default=True) + visible_options: Optional[int] = Field(None, validate_default=True, ge=_MIN_VISIBLE_OPTIONS) + + @field_validator("options", mode="before") + @classmethod + def __labels_are_unique(cls, options: List[Dict[str, Optional[str]]]) -> List[Dict[str, Optional[str]]]: + """Ensure that labels are unique""" + + unique_labels = list(set([option["value"] for option in options])) + if len(unique_labels) != len(options): + raise ValueError("All labels must be unique") + return options + + @model_validator(mode="after") + def __validate_visible_options(self) -> "Self": + if self.visible_options is None and self.options and len(self.options) >= self._MIN_VISIBLE_OPTIONS: + self.visible_options = len(self.options) + return self + + +class MultiLabelQuestionSettings(LabelQuestionSettings): + type: Literal["multi_label_selection"] = "multi_label_selection" + options_order: Literal["natural", "suggestion"] = Field("natural", description="The order of the labels in the UI.") + + +class RankingQuestionSettings(BaseModel): + type: Literal["ranking"] = "ranking" + + options: List[Dict[str, Optional[str]]] = Field(default_factory=list, validate_default=True) + + @field_validator("options", mode="before") + @classmethod + def __values_are_unique(cls, options: List[Dict[str, Optional[str]]]) -> List[Dict[str, Optional[str]]]: + """Ensure that values are unique""" + + unique_values = list(set([option["value"] for option in options])) + if len(unique_values) != len(options): + raise ValueError("All values must be unique") + + return options + + +class RatingQuestionSettings(BaseModel): + type: Literal["rating"] = "rating" + + options: List[dict] = Field(..., validate_default=True) + + @field_validator("options", mode="before") + @classmethod + def __values_are_unique(cls, options: List[dict]) -> List[dict]: + """Ensure that values are unique""" + + unique_values = list(set([option["value"] for option in options])) + if len(unique_values) != len(options): + raise ValueError("All values must be unique") + + return options + + +class SpanQuestionSettings(BaseModel): + type: Literal["span"] = "span" + + _MIN_VISIBLE_OPTIONS: ClassVar[int] = 3 + + allow_overlapping: bool = False + field: Optional[str] = None + options: List[Dict[str, Optional[str]]] = Field(default_factory=list, validate_default=True) + visible_options: Optional[int] = Field(None, validate_default=True, ge=_MIN_VISIBLE_OPTIONS) + + @field_validator("options", mode="before") + @classmethod + def __values_are_unique(cls, options: List[Dict[str, Optional[str]]]) -> List[Dict[str, Optional[str]]]: + """Ensure that values are unique""" + + unique_values = list(set([option["value"] for option in options])) + if len(unique_values) != len(options): + raise ValueError("All values must be unique") + + return options + + @model_validator(mode="after") + def __validate_visible_options(self) -> "Self": + if self.visible_options is None and self.options and len(self.options) >= self._MIN_VISIBLE_OPTIONS: + self.visible_options = len(self.options) + return self + + +class TextQuestionSettings(BaseModel): + type: Literal["text"] = "text" + + use_markdown: bool = False + + +QuestionSettings = Annotated[ + Union[ + LabelQuestionSettings, + MultiLabelQuestionSettings, + RankingQuestionSettings, + RatingQuestionSettings, + SpanQuestionSettings, + TextQuestionSettings, + ], + Field(..., discriminator="type"), +] + + +class QuestionModel(ResourceModel): + name: str + settings: QuestionSettings + + title: str = Field(None, validate_default=True) + description: Optional[str] = None + required: bool = True + + dataset_id: Optional[UUID] = None + + @field_validator("title", mode="before") + @classmethod + def _title_default(cls, title, info: ValidationInfo): + validated_title = title or info.data["name"] + return validated_title + + @property + def type(self) -> str: + return self.settings.type + + @field_serializer("id", "dataset_id", when_used="unless-none") + def serialize_id(self, value: UUID) -> str: + return str(value) + + model_config = ConfigDict(validate_assignment=True) diff --git a/argilla/src/argilla/_models/_settings/_questions/__init__.py b/argilla/src/argilla/_models/_settings/_questions/__init__.py deleted file mode 100644 index 403774c032..0000000000 --- a/argilla/src/argilla/_models/_settings/_questions/__init__.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright 2024-present, Argilla, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# flake8: noqa -from typing import Union - -from argilla._models._settings._questions._label_selection import LabelQuestionModel, LabelQuestionSettings -from argilla._models._settings._questions._multi_label_selection import ( - MultiLabelQuestionModel, - MultiLabelQuestionSettings, -) -from argilla._models._settings._questions._rating import RatingQuestionModel, RatingQuestionSettings -from argilla._models._settings._questions._ranking import RankingQuestionModel, RankingQuestionSettings -from argilla._models._settings._questions._text import TextQuestionModel, TextQuestionSettings -from argilla._models._settings._questions._base import QuestionBaseModel, QuestionSettings -from argilla._models._settings._questions._span import SpanQuestionModel, SpanQuestionSettings - -QuestionModel = Union[ - LabelQuestionModel, - RatingQuestionModel, - TextQuestionModel, - MultiLabelQuestionModel, - RankingQuestionModel, - QuestionBaseModel, -] diff --git a/argilla/src/argilla/_models/_settings/_questions/_base.py b/argilla/src/argilla/_models/_settings/_questions/_base.py deleted file mode 100644 index e661689507..0000000000 --- a/argilla/src/argilla/_models/_settings/_questions/_base.py +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright 2024-present, Argilla, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from datetime import datetime -from typing import Optional -from uuid import UUID - -from pydantic import BaseModel, field_serializer, field_validator, Field -from pydantic_core.core_schema import ValidationInfo - - -class QuestionSettings(BaseModel, validate_assignment=True): - type: str - - -class QuestionBaseModel(BaseModel, validate_assignment=True): - id: Optional[UUID] = None - name: str - settings: QuestionSettings - - title: str = Field(None, validate_default=True) - description: Optional[str] = None - required: bool = True - inserted_at: Optional[datetime] = None - updated_at: Optional[datetime] = None - - @field_validator("title", mode="before") - @classmethod - def __title_default(cls, title, info: ValidationInfo): - validated_title = title or info.data["name"] - return validated_title - - @field_serializer("inserted_at", "updated_at", when_used="unless-none") - def serialize_datetime(self, value: datetime) -> str: - return value.isoformat() - - @field_serializer("id", when_used="unless-none") - def serialize_id(self, value: UUID) -> str: - return str(value) diff --git a/argilla/src/argilla/_models/_settings/_questions/_label_selection.py b/argilla/src/argilla/_models/_settings/_questions/_label_selection.py deleted file mode 100644 index 358bf441e7..0000000000 --- a/argilla/src/argilla/_models/_settings/_questions/_label_selection.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright 2024-present, Argilla, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Dict, List, Optional, ClassVar - -from pydantic import field_validator, Field, model_validator - -from argilla._models._settings._questions._base import QuestionSettings, QuestionBaseModel - -try: - from typing import Self -except ImportError: - from typing_extensions import Self - - -class LabelQuestionSettings(QuestionSettings): - type: str = "label_selection" - - _MIN_VISIBLE_OPTIONS: ClassVar[int] = 3 - - options: List[Dict[str, Optional[str]]] = Field(default_factory=list, validate_default=True) - visible_options: Optional[int] = Field(None, validate_default=True, ge=_MIN_VISIBLE_OPTIONS) - - @field_validator("options", mode="before") - @classmethod - def __labels_are_unique(cls, options: List[Dict[str, Optional[str]]]) -> List[Dict[str, Optional[str]]]: - """Ensure that labels are unique""" - - unique_labels = list(set([option["value"] for option in options])) - if len(unique_labels) != len(options): - raise ValueError("All labels must be unique") - return options - - @model_validator(mode="after") - def __validate_visible_options(self) -> "Self": - if self.visible_options is None and self.options and len(self.options) >= self._MIN_VISIBLE_OPTIONS: - self.visible_options = len(self.options) - return self - - -class LabelQuestionModel(QuestionBaseModel): - settings: LabelQuestionSettings diff --git a/argilla/src/argilla/_models/_settings/_questions/_multi_label_selection.py b/argilla/src/argilla/_models/_settings/_questions/_multi_label_selection.py deleted file mode 100644 index 8eeeb7f121..0000000000 --- a/argilla/src/argilla/_models/_settings/_questions/_multi_label_selection.py +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright 2024-present, Argilla, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from enum import Enum - -from pydantic import Field - -from argilla._models._settings._questions._label_selection import LabelQuestionSettings, LabelQuestionModel - - -class OptionsOrder(str, Enum): - natural = "natural" - suggestion = "suggestion" - - -class MultiLabelQuestionSettings(LabelQuestionSettings): - type: str = "multi_label_selection" - options_order: OptionsOrder = Field(OptionsOrder.natural, description="The order of the labels in the UI.") - - -class MultiLabelQuestionModel(LabelQuestionModel): - settings: MultiLabelQuestionSettings diff --git a/argilla/src/argilla/_models/_settings/_questions/_ranking.py b/argilla/src/argilla/_models/_settings/_questions/_ranking.py deleted file mode 100644 index 6adb9aebac..0000000000 --- a/argilla/src/argilla/_models/_settings/_questions/_ranking.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright 2024-present, Argilla, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Dict, List, Optional - -from pydantic import field_validator, Field - -from argilla._models._settings._questions._base import QuestionSettings, QuestionBaseModel - - -class RankingQuestionSettings(QuestionSettings): - type: str = "ranking" - - options: List[Dict[str, Optional[str]]] = Field(default_factory=list, validate_default=True) - - @field_validator("options", mode="before") - @classmethod - def __values_are_unique(cls, options: List[Dict[str, Optional[str]]]) -> List[Dict[str, Optional[str]]]: - """Ensure that values are unique""" - - unique_values = list(set([option["value"] for option in options])) - if len(unique_values) != len(options): - raise ValueError("All values must be unique") - - return options - - -class RankingQuestionModel(QuestionBaseModel): - settings: RankingQuestionSettings diff --git a/argilla/src/argilla/_models/_settings/_questions/_rating.py b/argilla/src/argilla/_models/_settings/_questions/_rating.py deleted file mode 100644 index 9248bf3ca8..0000000000 --- a/argilla/src/argilla/_models/_settings/_questions/_rating.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright 2024-present, Argilla, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import List - -from pydantic import field_validator, Field - -from argilla._models._settings._questions._base import QuestionSettings, QuestionBaseModel - - -class RatingQuestionSettings(QuestionSettings): - type: str = "rating" - - options: List[dict] = Field(..., validate_default=True) - - @field_validator("options", mode="before") - @classmethod - def __values_are_unique(cls, options: List[dict]) -> List[dict]: - """Ensure that values are unique""" - - unique_values = list(set([option["value"] for option in options])) - if len(unique_values) != len(options): - raise ValueError("All values must be unique") - - return options - - -class RatingQuestionModel(QuestionBaseModel): - settings: RatingQuestionSettings diff --git a/argilla/src/argilla/_models/_settings/_questions/_span.py b/argilla/src/argilla/_models/_settings/_questions/_span.py deleted file mode 100644 index a24b9e1059..0000000000 --- a/argilla/src/argilla/_models/_settings/_questions/_span.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright 2024-present, Argilla, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Dict, List, Optional, ClassVar - -from pydantic import field_validator, Field, model_validator - -from argilla._models._settings._questions._base import QuestionSettings, QuestionBaseModel - -try: - from typing import Self -except ImportError: - from typing_extensions import Self - - -class SpanQuestionSettings(QuestionSettings): - type: str = "span" - - _MIN_VISIBLE_OPTIONS: ClassVar[int] = 3 - - allow_overlapping: bool = False - field: Optional[str] = None - options: List[Dict[str, Optional[str]]] = Field(default_factory=list, validate_default=True) - visible_options: Optional[int] = Field(None, validate_default=True, ge=_MIN_VISIBLE_OPTIONS) - - @field_validator("options", mode="before") - @classmethod - def __values_are_unique(cls, options: List[Dict[str, Optional[str]]]) -> List[Dict[str, Optional[str]]]: - """Ensure that values are unique""" - - unique_values = list(set([option["value"] for option in options])) - if len(unique_values) != len(options): - raise ValueError("All values must be unique") - - return options - - @model_validator(mode="after") - def __validate_visible_options(self) -> "Self": - if self.visible_options is None and self.options and len(self.options) >= self._MIN_VISIBLE_OPTIONS: - self.visible_options = len(self.options) - return self - - -class SpanQuestionModel(QuestionBaseModel): - settings: SpanQuestionSettings diff --git a/argilla/src/argilla/_models/_settings/_questions/_text.py b/argilla/src/argilla/_models/_settings/_questions/_text.py deleted file mode 100644 index 86d4a43f12..0000000000 --- a/argilla/src/argilla/_models/_settings/_questions/_text.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright 2024-present, Argilla, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from argilla._models._settings._questions._base import QuestionSettings, QuestionBaseModel - - -class TextQuestionSettings(QuestionSettings): - type: str = "text" - - use_markdown: bool = False - - -class TextQuestionModel(QuestionBaseModel): - settings: TextQuestionSettings diff --git a/argilla/src/argilla/records/_mapping/_mapper.py b/argilla/src/argilla/records/_mapping/_mapper.py index a4c4a398a8..be65717ab0 100644 --- a/argilla/src/argilla/records/_mapping/_mapper.py +++ b/argilla/src/argilla/records/_mapping/_mapper.py @@ -22,7 +22,7 @@ from argilla.responses import Response from argilla.settings import FieldBase, VectorField from argilla.settings._metadata import MetadataPropertyBase -from argilla.settings._question import QuestionPropertyBase +from argilla.settings._question import QuestionBase from argilla.suggestions import Suggestion from argilla.records._mapping._routes import ( AttributeRoute, @@ -177,12 +177,12 @@ def _select_attribute_type(self, attribute_route: AttributeRoute) -> AttributeRo If the attribute type is not provided, it will be inferred based on the schema item. """ schema_item = self._schema.get(attribute_route.name) - if isinstance(schema_item, QuestionPropertyBase) and ( + if isinstance(schema_item, QuestionBase) and ( attribute_route.type is None or attribute_route.type == AttributeType.SUGGESTION ): # Suggestions are the default destination for questions. attribute_route.type = AttributeType.SUGGESTION - elif isinstance(schema_item, QuestionPropertyBase) and attribute_route.type == AttributeType.RESPONSE: + elif isinstance(schema_item, QuestionBase) and attribute_route.type == AttributeType.RESPONSE: attribute_route.type = AttributeType.RESPONSE elif isinstance(schema_item, FieldBase): attribute_route.type = AttributeType.FIELD diff --git a/argilla/src/argilla/settings/_common.py b/argilla/src/argilla/settings/_common.py index b5760d1f78..be3f943c0e 100644 --- a/argilla/src/argilla/settings/_common.py +++ b/argilla/src/argilla/settings/_common.py @@ -14,7 +14,7 @@ from typing import Any, Optional, Union -from argilla._models import FieldModel, QuestionBaseModel +from argilla._models import FieldModel, QuestionModel from argilla._resource import Resource __all__ = ["SettingsPropertyBase"] @@ -23,7 +23,7 @@ class SettingsPropertyBase(Resource): """Base class for dataset fields or questions in Settings class""" - _model: Union[FieldModel, QuestionBaseModel] + _model: Union[FieldModel, QuestionModel] def __repr__(self) -> str: return ( diff --git a/argilla/src/argilla/settings/_question.py b/argilla/src/argilla/settings/_question.py index 262dddf1c8..63fb19f208 100644 --- a/argilla/src/argilla/settings/_question.py +++ b/argilla/src/argilla/settings/_question.py @@ -12,26 +12,25 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, List, Literal, Optional, Union +from typing import Dict, List, Literal, Optional, Union, TYPE_CHECKING from argilla import Argilla +from argilla._api import QuestionsAPI from argilla._models._settings._questions import ( - LabelQuestionModel, + QuestionModel, + QuestionSettings, LabelQuestionSettings, - MultiLabelQuestionModel, MultiLabelQuestionSettings, - QuestionModel, - RankingQuestionModel, - RankingQuestionSettings, - RatingQuestionModel, + TextQuestionSettings, RatingQuestionSettings, - SpanQuestionModel, + RankingQuestionSettings, SpanQuestionSettings, - TextQuestionModel, - TextQuestionSettings, ) from argilla.settings._common import SettingsPropertyBase +if TYPE_CHECKING: + from argilla.datasets import Dataset + try: from typing import Self except ImportError: @@ -48,7 +47,62 @@ ] -class QuestionPropertyBase(SettingsPropertyBase): +class QuestionBase(SettingsPropertyBase): + _model: QuestionModel + _api: QuestionsAPI + _dataset: Optional["Dataset"] + + def __init__( + self, + name: str, + settings: QuestionSettings, + title: Optional[str] = None, + required: Optional[bool] = True, + description: Optional[str] = None, + _client: Optional[Argilla] = None, + ): + client = _client or Argilla._get_default() + + super().__init__(api=client.api.questions, client=client) + + self._dataset = None + self._model = QuestionModel( + name=name, + settings=settings, + title=title, + required=required, + description=description, + ) + + @classmethod + def from_model(cls, model: QuestionModel) -> "Self": + instance = cls(name=model.name) # noqa + instance._model = model + + return instance + + @classmethod + def from_dict(cls, data: dict) -> "Self": + model = QuestionModel(**data) + return cls.from_model(model) + + @property + def dataset(self) -> "Dataset": + return self._dataset + + @dataset.setter + def dataset(self, value: "Dataset") -> None: + self._dataset = value + self._model.dataset_id = self._dataset.id + self._with_client(self._dataset._client) + + def _with_client(self, client: "Argilla") -> "Self": + # TODO: Review and simplify. Maybe only one of them is required + self._client = client + self._api = self._client.api.questions + + return self + @staticmethod def _render_values_as_options(values: Union[List[str], List[int], Dict[str, str]]) -> List[Dict[str, str]]: """Render values as options for the question so that the model conforms to the API schema""" @@ -79,16 +133,8 @@ def _render_options_as_labels(cls, options: List[Dict[str, str]]) -> List[str]: """Render values as labels for the question so that they can be returned as a list of strings""" return list(cls._render_options_as_values(options=options).keys()) - def _with_client(self, client: "Argilla") -> "Self": - self._client = client - self._api = client.api.questions - - return self - - -class LabelQuestion(QuestionPropertyBase): - _model: LabelQuestionModel +class LabelQuestion(QuestionBase): def __init__( self, name: str, @@ -97,6 +143,7 @@ def __init__( description: Optional[str] = None, required: bool = True, visible_labels: Optional[int] = None, + client: Optional[Argilla] = None, ) -> None: """ Define a new label question for `Settings` of a `Dataset`. A label \ question is a question where the user can select one label from \ @@ -112,27 +159,19 @@ def __init__( visible_labels (Optional[int]): The number of visible labels for the question to be shown in the UI. \ Setting it to None show all options. """ - self._model = LabelQuestionModel( + + super().__init__( name=name, title=title, - description=description, required=required, + description=description, settings=LabelQuestionSettings( - options=self._render_values_as_options(labels), visible_options=visible_labels + options=self._render_values_as_options(labels), + visible_options=visible_labels, ), + _client=client, ) - @classmethod - def from_model(cls, model: LabelQuestionModel) -> "LabelQuestion": - instance = cls(name=model.name, labels=cls._render_options_as_values(model.settings.options)) - instance._model = model - return instance - - @classmethod - def from_dict(cls, data: dict) -> "LabelQuestion": - model = LabelQuestionModel(**data) - return cls.from_model(model=model) - ############################## # Public properties ############################## @@ -153,14 +192,15 @@ def visible_labels(self) -> Optional[int]: def visible_labels(self, visible_labels: Optional[int]) -> None: self._model.settings.visible_options = visible_labels - ############################## - # Private methods - ############################## + @classmethod + def from_model(cls, model: QuestionModel) -> "Self": + instance = cls(name=model.name, labels=cls._render_options_as_labels(model.settings.options)) # noqa + instance._model = model + return instance -class MultiLabelQuestion(LabelQuestion): - _model: MultiLabelQuestionModel +class MultiLabelQuestion(LabelQuestion): def __init__( self, name: str, @@ -170,6 +210,7 @@ def __init__( title: Optional[str] = None, description: Optional[str] = None, required: bool = True, + client: Optional[Argilla] = None, ) -> None: """Create a new multi-label question for `Settings` of a `Dataset`. A \ multi-label question is a question where the user can select multiple \ @@ -188,38 +229,29 @@ def __init__( description (Optional[str]): The description of the question to be shown in the UI. required (bool): If the question is required for a record to be valid. At least one question must be required. """ - self._model = MultiLabelQuestionModel( + QuestionBase.__init__( + self, name=name, title=title, - description=description, required=required, + description=description, settings=MultiLabelQuestionSettings( options=self._render_values_as_options(labels), visible_options=visible_labels, options_order=labels_order, ), + _client=client, ) @classmethod - def from_model(cls, model: MultiLabelQuestionModel) -> "MultiLabelQuestion": - instance = cls( - name=model.name, - labels=cls._render_options_as_values(model.settings.options), - labels_order=model.settings.options_order, - ) + def from_model(cls, model: QuestionModel) -> "Self": + instance = cls(name=model.name, labels=cls._render_options_as_labels(model.settings.options)) # noqa instance._model = model return instance - @classmethod - def from_dict(cls, data: dict) -> "MultiLabelQuestion": - model = MultiLabelQuestionModel(**data) - return cls.from_model(model=model) - - -class TextQuestion(QuestionPropertyBase): - _model: TextQuestionModel +class TextQuestion(QuestionBase): def __init__( self, name: str, @@ -227,6 +259,7 @@ def __init__( description: Optional[str] = None, required: bool = True, use_markdown: bool = False, + client: Optional[Argilla] = None, ) -> None: """Create a new text question for `Settings` of a `Dataset`. A text question \ is a question where the user can input text. @@ -239,26 +272,15 @@ def __init__( use_markdown (Optional[bool]): Whether to render the markdown in the UI. When True, you will be able \ to use all the Markdown features for text formatting, including LaTex formulas and embedding multimedia content and PDFs. """ - self._model = TextQuestionModel( + super().__init__( name=name, title=title, - description=description, required=required, + description=description, settings=TextQuestionSettings(use_markdown=use_markdown), + _client=client, ) - @classmethod - def from_model(cls, model: TextQuestionModel) -> "TextQuestion": - instance = cls(name=model.name) - instance._model = model - - return instance - - @classmethod - def from_dict(cls, data: dict) -> "TextQuestion": - model = TextQuestionModel(**data) - return cls.from_model(model=model) - @property def use_markdown(self) -> bool: return self._model.settings.use_markdown @@ -268,9 +290,7 @@ def use_markdown(self, use_markdown: bool) -> None: self._model.settings.use_markdown = use_markdown -class RatingQuestion(QuestionPropertyBase): - _model: RatingQuestionModel - +class RatingQuestion(QuestionBase): def __init__( self, name: str, @@ -278,6 +298,7 @@ def __init__( title: Optional[str] = None, description: Optional[str] = None, required: bool = True, + client: Optional[Argilla] = None, ) -> None: """Create a new rating question for `Settings` of a `Dataset`. A rating question \ is a question where the user can select a value from a sequential list of options. @@ -289,39 +310,33 @@ def __init__( description (Optional[str]): The description of the question to be shown in the UI. required (bool): If the question is required for a record to be valid. At least one question must be required. """ - self._model = RatingQuestionModel( + + super().__init__( name=name, title=title, - description=description, required=required, - values=values, + description=description, settings=RatingQuestionSettings(options=self._render_values_as_options(values)), + _client=client, ) - @classmethod - def from_model(cls, model: RatingQuestionModel) -> "RatingQuestion": - instance = cls(name=model.name, values=cls._render_options_as_values(model.settings.options)) - instance._model = model - - return instance - - @classmethod - def from_dict(cls, data: dict) -> "RatingQuestion": - model = RatingQuestionModel(**data) - return cls.from_model(model=model) - @property def values(self) -> List[int]: - return self._render_options_as_labels(self._model.settings.options) + return self._render_options_as_labels(self._model.settings.options) # noqa @values.setter def values(self, values: List[int]) -> None: self._model.values = self._render_values_as_options(values) + @classmethod + def from_model(cls, model: QuestionModel) -> "Self": + instance = cls(name=model.name, values=cls._render_options_as_labels(model.settings.options)) # noqa + instance._model = model + + return instance -class RankingQuestion(QuestionPropertyBase): - _model: RankingQuestionModel +class RankingQuestion(QuestionBase): def __init__( self, name: str, @@ -329,6 +344,7 @@ def __init__( title: Optional[str] = None, description: Optional[str] = None, required: bool = True, + client: Optional[Argilla] = None, ) -> None: """Create a new ranking question for `Settings` of a `Dataset`. A ranking question \ is a question where the user can rank a list of options. @@ -341,26 +357,15 @@ def __init__( description (Optional[str]): The description of the question to be shown in the UI. required (bool): If the question is required for a record to be valid. At least one question must be required. """ - self._model = RankingQuestionModel( + super().__init__( name=name, title=title, - description=description, required=required, + description=description, settings=RankingQuestionSettings(options=self._render_values_as_options(values)), + _client=client, ) - @classmethod - def from_model(cls, model: RankingQuestionModel) -> "RankingQuestion": - instance = cls(name=model.name, values=cls._render_options_as_values(model.settings.options)) - instance._model = model - - return instance - - @classmethod - def from_dict(cls, data: dict) -> "RankingQuestion": - model = RankingQuestionModel(**data) - return cls.from_model(model=model) - @property def values(self) -> List[str]: return self._render_options_as_labels(self._model.settings.options) @@ -369,10 +374,15 @@ def values(self) -> List[str]: def values(self, values: List[int]) -> None: self._model.settings.options = self._render_values_as_options(values) + @classmethod + def from_model(cls, model: QuestionModel) -> "Self": + instance = cls(name=model.name, values=cls._render_options_as_labels(model.settings.options)) # noqa + instance._model = model + + return instance -class SpanQuestion(QuestionPropertyBase): - _model: SpanQuestionModel +class SpanQuestion(QuestionBase): def __init__( self, name: str, @@ -383,6 +393,7 @@ def __init__( title: Optional[str] = None, description: Optional[str] = None, required: bool = True, + client: Optional[Argilla] = None, ): """ Create a new span question for `Settings` of a `Dataset`. A span question \ is a question where the user can select a section of text within a text field \ @@ -400,23 +411,20 @@ def __init__( description (Optional[str]): The description of the question to be shown in the UI. required (bool): If the question is required for a record to be valid. At least one question must be required. """ - self._model = SpanQuestionModel( + super().__init__( name=name, title=title, - description=description, required=required, + description=description, settings=SpanQuestionSettings( field=field, allow_overlapping=allow_overlapping, visible_options=visible_labels, options=self._render_values_as_options(labels), ), + _client=client, ) - @property - def name(self): - return self._model.name - @property def field(self): return self._model.settings.field @@ -450,21 +458,16 @@ def labels(self, labels: List[str]) -> None: self._model.settings.options = self._render_values_as_options(labels) @classmethod - def from_model(cls, model: SpanQuestionModel) -> "SpanQuestion": + def from_model(cls, model: QuestionModel) -> "Self": instance = cls( name=model.name, field=model.settings.field, - labels=cls._render_options_as_values(model.settings.options), - ) + labels=cls._render_options_as_labels(model.settings.options), + ) # noqa instance._model = model return instance - @classmethod - def from_dict(cls, data: dict) -> "SpanQuestion": - model = SpanQuestionModel(**data) - return cls.from_model(model=model) - QuestionType = Union[ LabelQuestion, @@ -475,25 +478,25 @@ def from_dict(cls, data: dict) -> "SpanQuestion": SpanQuestion, ] -_TYPE_TO_CLASS = { - "label_selection": LabelQuestion, - "multi_label_selection": MultiLabelQuestion, - "ranking": RankingQuestion, - "text": TextQuestion, - "rating": RatingQuestion, - "span": SpanQuestion, -} - def question_from_model(model: QuestionModel) -> QuestionType: - try: - return _TYPE_TO_CLASS[model.settings.type].from_model(model) - except KeyError: - raise ValueError(f"Unsupported question model type: {model.settings.type}") - - -def question_from_dict(data: dict) -> QuestionType: - try: - return _TYPE_TO_CLASS[data["settings"]["type"]].from_dict(data) - except KeyError: - raise ValueError(f"Unsupported question model type: {data['settings']['type']}") + question_type = model.type + + if question_type == "label_selection": + return LabelQuestion.from_model(model) + elif question_type == "multi_label_selection": + return MultiLabelQuestion.from_model(model) + elif question_type == "ranking": + return RankingQuestion.from_model(model) + elif question_type == "text": + return TextQuestion.from_model(model) + elif question_type == "rating": + return RatingQuestion.from_model(model) + elif question_type == "span": + return SpanQuestion.from_model(model) + else: + raise ValueError(f"Unsupported question model type: {question_type}") + + +def _question_from_dict(data: dict) -> QuestionType: + return question_from_model(QuestionModel(**data)) diff --git a/argilla/src/argilla/settings/_resource.py b/argilla/src/argilla/settings/_resource.py index 97ced197c9..6971db722f 100644 --- a/argilla/src/argilla/settings/_resource.py +++ b/argilla/src/argilla/settings/_resource.py @@ -26,7 +26,7 @@ from argilla.settings._field import Field, _field_from_dict, _field_from_model, FieldBase from argilla.settings._io import build_settings_from_repo_id from argilla.settings._metadata import MetadataType, MetadataField, MetadataPropertyBase -from argilla.settings._question import QuestionType, question_from_model, question_from_dict, QuestionPropertyBase +from argilla.settings._question import QuestionType, question_from_model, _question_from_dict, QuestionBase from argilla.settings._task_distribution import TaskDistribution from argilla.settings._templates import DefaultSettingsMixin from argilla.settings._vector import VectorField @@ -78,7 +78,7 @@ def __init__( self.__guidelines = self.__process_guidelines(guidelines) self.__allow_extra_metadata = allow_extra_metadata - self.__questions = QuestionsProperties(self, questions) + self.__questions = SettingsProperties(self, questions) self.__fields = SettingsProperties(self, fields) self.__vectors = SettingsProperties(self, vectors) self.__metadata = SettingsProperties(self, metadata) @@ -101,7 +101,7 @@ def questions(self) -> "SettingsProperties": @questions.setter def questions(self, questions: List[QuestionType]): - self.__questions = QuestionsProperties(self, questions) + self.__questions = SettingsProperties(self, questions) @property def vectors(self) -> "SettingsProperties": @@ -220,6 +220,7 @@ def update(self) -> "Resource": self._update_dataset_related_attributes() self.__fields._update() + self.__questions._update() self.__vectors._update() self.__metadata._update() self.__questions._update() @@ -314,7 +315,7 @@ def add( if isinstance(property, FieldBase): self.fields.add(property) - elif isinstance(property, QuestionPropertyBase): + elif isinstance(property, QuestionBase): self.questions.add(property) elif isinstance(property, VectorField): self.vectors.add(property) @@ -349,7 +350,7 @@ def _from_dict(cls, settings_dict: dict) -> "Settings": allow_extra_metadata = settings_dict.get("allow_extra_metadata") mapping = settings_dict.get("mapping") - questions = [question_from_dict(question) for question in settings_dict.get("questions", [])] + questions = [_question_from_dict(question) for question in settings_dict.get("questions", [])] fields = [_field_from_dict(field) for field in fields] vectors = [VectorField.from_dict(vector) for vector in vectors] metadata = [MetadataField.from_dict(metadata) for metadata in metadata] @@ -566,34 +567,3 @@ def __repr__(self) -> str: """Return a string representation of the object.""" return f"{repr([prop for prop in self])}" - - -class QuestionsProperties(SettingsProperties[QuestionType]): - """ - This class is used to align questions with the rest of the settings. - - Since questions are not aligned with the Resource class definition, we use this - class to work with questions as we do with fields, vectors, or metadata (specially when creating questions). - - Once issue https://github.com/argilla-io/argilla/issues/4931 is tackled, this class should be removed. - """ - - def _create(self): - for question in self: - try: - self._create_question(question) - except ArgillaAPIError as e: - raise SettingsError(f"Failed to create question {question.name}") from e - - def _update(self): - pass - - def _delete(self): - pass - - def _create_question(self, question: QuestionType) -> None: - question_model = self._settings._client.api.questions.create( - dataset_id=self._settings.dataset.id, - question=question.api_model(), - ) - question._model = question_model diff --git a/argilla/tests/integration/conftest.py b/argilla/tests/integration/conftest.py index 655c98f76d..7e0850ceed 100644 --- a/argilla/tests/integration/conftest.py +++ b/argilla/tests/integration/conftest.py @@ -32,6 +32,10 @@ def client() -> rg.Argilla: def _cleanup(client: rg.Argilla): + for dataset in client.datasets: + if dataset.name.startswith("test_"): + dataset.delete() + for workspace in client.workspaces: if workspace.name.startswith("test_"): for dataset in workspace.datasets: diff --git a/argilla/tests/integration/test_add_records.py b/argilla/tests/integration/test_add_records.py index a6b87ca96c..11b9652125 100644 --- a/argilla/tests/integration/test_add_records.py +++ b/argilla/tests/integration/test_add_records.py @@ -72,7 +72,7 @@ def test_add_records(client): assert dataset_records[2].fields["text"] == mock_data[2]["text"] -def test_add_dict_records(client: Argilla): +def test_add_dict_records(client: Argilla, dataset_name: str): ws_name = "new_ws" ws = client.workspaces(ws_name) or Workspace(name=ws_name).create() @@ -80,7 +80,7 @@ def test_add_dict_records(client: Argilla): if ds is not None: ds.delete() - ds = rg.Dataset(name="new_ds", workspace=ws) + ds = rg.Dataset(name=dataset_name, workspace=ws) ds.settings = rg.Settings( fields=[rg.TextField(name="text")], questions=[rg.TextQuestion(name="label")], diff --git a/argilla/tests/integration/test_export_dataset.py b/argilla/tests/integration/test_export_dataset.py index 0a226bd1f5..dc8a719daa 100644 --- a/argilla/tests/integration/test_export_dataset.py +++ b/argilla/tests/integration/test_export_dataset.py @@ -31,8 +31,7 @@ @pytest.fixture -def dataset(client) -> rg.Dataset: - mock_dataset_name = "".join(random.choices(ascii_lowercase, k=16)) +def dataset(client, dataset_name: str) -> rg.Dataset: settings = rg.Settings( fields=[ rg.TextField(name="text"), @@ -44,7 +43,7 @@ def dataset(client) -> rg.Dataset: ], ) dataset = rg.Dataset( - name=mock_dataset_name, + name=dataset_name, settings=settings, client=client, ) diff --git a/argilla/tests/integration/test_export_records.py b/argilla/tests/integration/test_export_records.py index 0314cd8741..7b414d9f95 100644 --- a/argilla/tests/integration/test_export_records.py +++ b/argilla/tests/integration/test_export_records.py @@ -28,8 +28,7 @@ @pytest.fixture -def dataset(client) -> rg.Dataset: - mock_dataset_name = "".join(random.choices(ascii_lowercase, k=16)) +def dataset(client, dataset_name: str) -> rg.Dataset: settings = rg.Settings( fields=[ rg.TextField(name="text"), @@ -41,7 +40,7 @@ def dataset(client) -> rg.Dataset: ], ) dataset = rg.Dataset( - name=mock_dataset_name, + name=dataset_name, settings=settings, client=client, ) diff --git a/argilla/tests/integration/test_import_features.py b/argilla/tests/integration/test_import_features.py index 6c1f530661..1f85213c7e 100644 --- a/argilla/tests/integration/test_import_features.py +++ b/argilla/tests/integration/test_import_features.py @@ -30,8 +30,7 @@ @pytest.fixture -def dataset(client) -> rg.Dataset: - mock_dataset_name = "".join(random.choices(ascii_lowercase, k=16)) +def dataset(client, dataset_name: str) -> rg.Dataset: settings = rg.Settings( fields=[ rg.TextField(name="text"), @@ -42,7 +41,7 @@ def dataset(client) -> rg.Dataset: ], ) dataset = rg.Dataset( - name=mock_dataset_name, + name=dataset_name, settings=settings, client=client, ) diff --git a/argilla/tests/integration/test_metadata.py b/argilla/tests/integration/test_manage_metadata.py similarity index 88% rename from argilla/tests/integration/test_metadata.py rename to argilla/tests/integration/test_manage_metadata.py index 2aa9d7c2f2..1acaa65035 100644 --- a/argilla/tests/integration/test_metadata.py +++ b/argilla/tests/integration/test_manage_metadata.py @@ -12,9 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import random -from string import ascii_lowercase - import pytest import argilla as rg @@ -22,8 +19,7 @@ @pytest.fixture -def dataset_with_metadata(client: Argilla, workspace: Workspace) -> Dataset: - name = "".join(random.choices(ascii_lowercase, k=16)) +def dataset_with_metadata(client: Argilla, workspace: Workspace, dataset_name: str) -> Dataset: settings = Settings( fields=[TextField(name="text")], questions=[LabelQuestion(name="label", labels=["positive", "negative"])], @@ -32,7 +28,7 @@ def dataset_with_metadata(client: Argilla, workspace: Workspace) -> Dataset: ], ) dataset = Dataset( - name=name, + name=dataset_name, workspace=workspace.name, settings=settings, client=client, @@ -42,8 +38,7 @@ def dataset_with_metadata(client: Argilla, workspace: Workspace) -> Dataset: return dataset -def test_create_dataset_with_metadata(client: Argilla, workspace: Workspace) -> Dataset: - name = "".join(random.choices(ascii_lowercase, k=16)) +def test_create_dataset_with_metadata(client: Argilla, workspace: Workspace, dataset_name: str) -> None: settings = Settings( fields=[TextField(name="text")], questions=[LabelQuestion(name="label", labels=["positive", "negative"])], @@ -52,7 +47,7 @@ def test_create_dataset_with_metadata(client: Argilla, workspace: Workspace) -> ], ) dataset = Dataset( - name=name, + name=dataset_name, workspace=workspace.name, settings=settings, client=client, @@ -72,8 +67,9 @@ def test_create_dataset_with_metadata(client: Argilla, workspace: Workspace) -> (None, None, rg.IntegerMetadataProperty), ], ) -def test_create_dataset_with_numerical_metadata(client: Argilla, workspace: Workspace, min, max, type) -> Dataset: - name = "".join(random.choices(ascii_lowercase, k=16)) +def test_create_dataset_with_numerical_metadata( + client: Argilla, workspace: Workspace, dataset_name: str, min, max, type +) -> None: settings = Settings( fields=[TextField(name="text")], questions=[LabelQuestion(name="label", labels=["positive", "negative"])], @@ -82,7 +78,7 @@ def test_create_dataset_with_numerical_metadata(client: Argilla, workspace: Work ], ) dataset = Dataset( - name=name, + name=dataset_name, workspace=workspace.name, settings=settings, client=client, diff --git a/argilla/tests/integration/test_publish_datasets.py b/argilla/tests/integration/test_publish_datasets.py index 057a08d646..9ed8245509 100644 --- a/argilla/tests/integration/test_publish_datasets.py +++ b/argilla/tests/integration/test_publish_datasets.py @@ -31,19 +31,18 @@ ) -def test_publish_dataset(client: "Argilla"): +def test_publish_dataset(client: "Argilla", dataset_name: str): ws_name = "new_ws" - ds_name = "new_ds" new_ws = client.workspaces(ws_name) or Workspace(name=ws_name).create() assert client.api.workspaces.exists(new_ws.id), "The workspace was not created" - ds = client.datasets(ds_name, workspace=new_ws) + ds = client.datasets(dataset_name, workspace=new_ws) if ds: ds.delete() assert not client.api.datasets.exists(ds.id), "The dataset was not deleted" - ds = Dataset(name=ds_name, workspace=new_ws) + ds = Dataset(name=dataset_name, workspace=new_ws) ds.settings = Settings( guidelines="This is a test dataset", diff --git a/argilla/tests/integration/test_update_dataset_settings.py b/argilla/tests/integration/test_update_dataset_settings.py index 5ec1883fba..16865f5fe8 100644 --- a/argilla/tests/integration/test_update_dataset_settings.py +++ b/argilla/tests/integration/test_update_dataset_settings.py @@ -64,6 +64,15 @@ def test_update_settings(self, client: Argilla, dataset: Dataset): dataset = client.datasets(dataset.name) assert dataset.settings.vectors["vector"].title == "A new title for vector" + def test_update_question_title(self, client: Argilla, dataset: Dataset): + question = dataset.settings.questions["label"] + question.title = "A new title for label question" + dataset.settings.update() + + dataset = client.datasets(dataset.name) + question = dataset.settings.questions["label"] + assert question.title == "A new title for label question" + def test_update_distribution_settings(self, client: Argilla, dataset: Dataset): dataset.settings.distribution.min_submitted = 100 dataset.update() diff --git a/argilla/tests/integration/test_update_records.py b/argilla/tests/integration/test_update_records.py index 1dc60c85fa..3690a3cd54 100644 --- a/argilla/tests/integration/test_update_records.py +++ b/argilla/tests/integration/test_update_records.py @@ -24,9 +24,8 @@ @pytest.fixture -def dataset(client: rg.Argilla) -> rg.Dataset: +def dataset(client: rg.Argilla, dataset_name: str) -> rg.Dataset: workspace = client.workspaces[0] - mock_dataset_name = "".join(random.choices(ascii_lowercase, k=16)) settings = rg.Settings( allow_extra_metadata=True, fields=[ @@ -37,7 +36,7 @@ def dataset(client: rg.Argilla) -> rg.Dataset: ], ) dataset = rg.Dataset( - name=mock_dataset_name, + name=dataset_name, workspace=workspace.name, settings=settings, client=client, diff --git a/argilla/tests/unit/test_resources/test_questions.py b/argilla/tests/unit/test_resources/test_questions.py index f4bd1ecec7..ab5cef3a25 100644 --- a/argilla/tests/unit/test_resources/test_questions.py +++ b/argilla/tests/unit/test_resources/test_questions.py @@ -19,76 +19,10 @@ from pytest_httpx import HTTPXMock import argilla as rg -from argilla._models import TextQuestionModel, LabelQuestionModel -from argilla._models._settings._questions import SpanQuestionModel +from argilla._models import QuestionModel class TestQuestionsAPI: - def test_create_many_questions(self, httpx_mock: HTTPXMock): - # TODO: Add a test for the delete method in client - mock_dataset_id = uuid.uuid4() - mock_return_value = { - "id": "3fa85f64-5717-4562-b3fc-2c963f66afa6", - "name": "string", - "title": "string", - "required": True, - "settings": {"type": "text", "use_markdown": False}, - "dataset_id": "3fa85f64-5717-4562-b3fc-2c963f66afa6", - "inserted_at": datetime.now().isoformat(), - "updated_at": datetime.now().isoformat(), - } - mock_question = { - "name": "5044cv0wu5", - "title": "string", - "description": "string", - "required": True, - "settings": {"type": "text", "use_markdown": False}, - } - mock_question = TextQuestionModel(**mock_question) - httpx_mock.add_response( - json=mock_return_value, - url=f"http://test_url/api/v1/datasets/{mock_dataset_id}/questions", - method="POST", - status_code=200, - ) - with httpx.Client() as client: - client = rg.Argilla(api_url="http://test_url") - client.api.questions.create_many(dataset_id=mock_dataset_id, questions=[mock_question]) - - def test_create_many_label_questions(self, httpx_mock: HTTPXMock): - # TODO: Add a test for the delete method in client - mock_dataset_id = uuid.uuid4() - mock_return_value = { - "id": "3fa85f64-5717-4562-b3fc-2c963f66afa6", - "name": "string", - "title": "string", - "required": True, - "settings": {"type": "labels", "options": [{"text": "positive", "value": "positive"}]}, - "dataset_id": "3fa85f64-5717-4562-b3fc-2c963f66afa6", - "inserted_at": datetime.now().isoformat(), - "updated_at": datetime.now().isoformat(), - } - mock_question = { - "name": "5044cv0wu5", - "title": "string", - "description": "string", - "required": True, - "settings": { - "type": "label", - "options": [{"text": "negative", "value": "negative"}, {"text": "positive", "value": "positive"}], - }, - } - mock_question = LabelQuestionModel(**mock_question) - httpx_mock.add_response( - json=mock_return_value, - url=f"http://test_url/api/v1/datasets/{mock_dataset_id}/questions", - method="POST", - status_code=200, - ) - with httpx.Client() as client: - client = rg.Argilla(api_url="http://test_url") - client.api.questions.create_many(dataset_id=mock_dataset_id, questions=[mock_question]) - def test_create_span_question(self, httpx_mock: HTTPXMock): mock_dataset_id = uuid.uuid4() mock_return_value = { @@ -96,6 +30,7 @@ def test_create_span_question(self, httpx_mock: HTTPXMock): "name": "string", "title": "string", "required": True, + "dataset_id": str(mock_dataset_id), "settings": { "type": "span", "allow_overlapping": True, @@ -119,11 +54,12 @@ def test_create_span_question(self, httpx_mock: HTTPXMock): ) with httpx.Client() as _: - question = SpanQuestionModel( + question = QuestionModel( name="5044cv0wu5", title="string", description="string", required=True, + dataset_id=mock_dataset_id, settings={ "type": "span", "allow_overlapping": True, @@ -138,5 +74,5 @@ def test_create_span_question(self, httpx_mock: HTTPXMock): ) client = rg.Argilla(api_url="http://test_url") - created_question = client.api.questions.create(dataset_id=mock_dataset_id, question=question) + created_question = client.api.questions.create(question=question) assert created_question.model_dump(exclude_unset=True) == mock_return_value From 48ba9641d3e0fd2c68d8454ed03b2fb75a7244fe Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Thu, 21 Nov 2024 15:31:35 +0100 Subject: [PATCH 34/50] chore: Set release version --- argilla-frontend/package.json | 2 +- argilla-server/src/argilla_server/_version.py | 2 +- argilla/src/argilla/_version.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/argilla-frontend/package.json b/argilla-frontend/package.json index 301f364a9a..11cd5b8336 100644 --- a/argilla-frontend/package.json +++ b/argilla-frontend/package.json @@ -1,6 +1,6 @@ { "name": "argilla", - "version": "2.5.0dev0", + "version": "2.5.0", "private": true, "scripts": { "dev": "nuxt", diff --git a/argilla-server/src/argilla_server/_version.py b/argilla-server/src/argilla_server/_version.py index 694452c171..8d368f9e35 100644 --- a/argilla-server/src/argilla_server/_version.py +++ b/argilla-server/src/argilla_server/_version.py @@ -15,4 +15,4 @@ # coding: utf-8 # -__version__ = "2.5.0dev0" +__version__ = "2.5.0" diff --git a/argilla/src/argilla/_version.py b/argilla/src/argilla/_version.py index 1b693f706b..6821153921 100644 --- a/argilla/src/argilla/_version.py +++ b/argilla/src/argilla/_version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.5.0dev0" +__version__ = "2.5.0" From e7e51e15a4fed0fe6f53b4517f2dccc54482f7d2 Mon Sep 17 00:00:00 2001 From: Paco Aranda Date: Fri, 22 Nov 2024 15:41:38 +0100 Subject: [PATCH 35/50] [CHORE] Review changelogs (#5707) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description **Type of change** - Bug fix (non-breaking change which fixes an issue) - New feature (non-breaking change which adds functionality) - Breaking change (fix or feature that would cause existing functionality to not work as expected) - Refactor (change restructuring the codebase without changing functionality) - Improvement (change adding some improvement to an existing functionality) - Documentation update **How Has This Been Tested** **Checklist** - I added relevant documentation - I followed the style guidelines of this project - I did a self-review of my code - I made corresponding changes to the documentation - I confirm My changes generate no new warnings - I have added tests that prove my fix is effective or that my feature works - I have added relevant notes to the CHANGELOG.md file (See https://keepachangelog.com/) --------- Co-authored-by: José Francisco Calvo --- argilla-frontend/CHANGELOG.md | 2 ++ argilla-server/CHANGELOG.md | 3 +++ argilla/CHANGELOG.md | 10 +++++++++- 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/argilla-frontend/CHANGELOG.md b/argilla-frontend/CHANGELOG.md index e9b929b2cf..0787d28837 100644 --- a/argilla-frontend/CHANGELOG.md +++ b/argilla-frontend/CHANGELOG.md @@ -16,6 +16,8 @@ These are the section headers that we use: ## [Unreleased]() +## [2.5.0](https://github.com/argilla-io/argilla/compare/v2.4.1...v2.5.0) + ### Added - Add a high-contrast theme & improvements for the forced-colors mode. ([#5661](https://github.com/argilla-io/argilla/pull/5661)) diff --git a/argilla-server/CHANGELOG.md b/argilla-server/CHANGELOG.md index 688e8e0e95..d576a4be76 100644 --- a/argilla-server/CHANGELOG.md +++ b/argilla-server/CHANGELOG.md @@ -16,6 +16,8 @@ These are the section headers that we use: ## [Unreleased]() +## [2.5.0](https://github.com/argilla-io/argilla/compare/v2.4.1...v2.5.0) + ### Added - Added new endpoints to create, update, ping and delete webhooks. ([#5453](https://github.com/argilla-io/argilla/pull/5453)) @@ -27,6 +29,7 @@ These are the section headers that we use: ### Changed - Changed default python version to 3.13. ([#5649](https://github.com/argilla-io/argilla/pull/5649)) +- Changed Pydantic version to v2. ([#5666](https://github.com/argilla-io/argilla/pull/5666)) ### Fixed diff --git a/argilla/CHANGELOG.md b/argilla/CHANGELOG.md index d26a7f4f1d..620ed69018 100644 --- a/argilla/CHANGELOG.md +++ b/argilla/CHANGELOG.md @@ -16,12 +16,20 @@ These are the section headers that we use: ## [Unreleased]() +## [2.5.0](https://github.com/argilla-io/argilla/compare/v2.4.0...v2.5.0) + +### Added + +- Added support to remove/override datasets settings. ([#5584](https://github.com/argilla-io/argilla/pull/5584)) +- Added support to update question attributes. ([#5680](https://github.com/argilla-io/argilla/pull/5680)) +- Added support to webhook listeners. ([#5502](https://github.com/argilla-io/argilla/pull/5502)) +- Added support to Python 3.13. ([#5652](https://github.com/argilla-io/argilla/pull/5652)) + ## [2.4.0](https://github.com/argilla-io/argilla/compare/v2.3.0...v2.4.0) ### Added - Added `Argilla.deploy_on_spaces` to deploy the Argilla server on Hugging Face Spaces. ([#5547](https://github.com/argilla-io/argilla/pull/5547)) -- Add support to webhooks. ([#5467](https://github.com/argilla-io/argilla/pull/5467)) ### Changed From 4e4d897ad60218059907db79a343f5b77e72a1f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dami=C3=A1n=20Pumar?= Date: Fri, 22 Nov 2024 09:38:23 +0100 Subject: [PATCH 36/50] =?UTF-8?q?=F0=9F=94=A5=20Fix=20highlight=20on=20bul?= =?UTF-8?q?k=20(#5698)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../annotation/container/fields/RecordFields.vue | 6 +++--- .../container/fields/chat-field/ChatField.vue | 9 +++++---- .../fields/chat-field/useChatFieldViewModel.ts | 4 ++-- .../span-annotation/SpanAnnotationTextField.vue | 11 +++-------- .../useSpanAnnotationTextFieldViewModel.ts | 5 ++--- .../container/fields/text-field/TextField.vue | 8 ++++---- .../fields/text-field/useTextFieldViewModel.ts | 4 ++-- 7 files changed, 21 insertions(+), 26 deletions(-) diff --git a/argilla-frontend/components/features/annotation/container/fields/RecordFields.vue b/argilla-frontend/components/features/annotation/container/fields/RecordFields.vue index ed8f523649..d9cdc15aef 100644 --- a/argilla-frontend/components/features/annotation/container/fields/RecordFields.vue +++ b/argilla-frontend/components/features/annotation/container/fields/RecordFields.vue @@ -17,7 +17,7 @@ >
@@ -32,8 +32,8 @@ > @@ -29,7 +36,7 @@ export default { data() { return { - errors: [], + errors: {}, isDirty: false, }; }, @@ -38,20 +45,33 @@ export default { type: Object, required: true, }, + textFields: { + type: Array, + required: true, + }, placeholder: { type: String, default: "", }, }, + watch: { + "question.settings.field": { + handler() { + this.validateOptions(); + }, + immediate: true, + }, + textFields: { + handler() { + this.validateOptions(); + }, + immediate: true, + }, + }, computed: { optionsJoinedByCommas() { return this.question.options.map((item) => item.text).join(","); }, - translatedValidations() { - return this.errors.map((validation) => { - return this.$t(validation); - }); - }, }, methods: { validateOptions() { diff --git a/argilla-frontend/translation/de.js b/argilla-frontend/translation/de.js index f263610c5e..7a66cdf6db 100644 --- a/argilla-frontend/translation/de.js +++ b/argilla-frontend/translation/de.js @@ -230,6 +230,7 @@ export default { filters: "Filter", filterBy: "Filter nach...", fields: "Felder", + field: "Feld", questions: "Fragen", general: "Übersicht", metadata: "Metadaten", @@ -284,6 +285,12 @@ export default { optionsWithoutLabel: "Optionen ohne Label sind nicht erlaubt", optionsSeparatedByComma: "Optionen müssen durch Kommas getrennt sein", }, + rating: { + atLeastTwoOptions: "Mindestens zwei Optionen müssen vorhanden sein", + }, + span: { + fieldRelated: "One text field is required", + }, }, atLeastOneQuestion: "Mindestens eine Frage wird benötigt", atLeastOneRequired: "Mindestens eine erforderliche Frage wird benötigt", @@ -302,7 +309,8 @@ export default { requiredField: "Pflichtfeld", requiredQuestion: "Pflichtfrage", select: "Auswählen", - mapToColumn: "Einer Spalte zuordnen", + mapToColumn: "Annotate spans on:", + applyToaAField: "Gelten für:", subset: "Teilmenge", selectSubset: "Sie können einen Datensatz nur aus einer Teilmenge erstellen.", diff --git a/argilla-frontend/translation/en.js b/argilla-frontend/translation/en.js index b0acb0ce3b..c0f7e5d47e 100644 --- a/argilla-frontend/translation/en.js +++ b/argilla-frontend/translation/en.js @@ -225,6 +225,7 @@ export default { filters: "Filters", filterBy: "Filter by...", fields: "Fields", + field: "Field", questions: "Questions", general: "General", metadata: "Metadata", @@ -286,6 +287,9 @@ export default { rating: { atLeastTwoOptions: "At least two options are required", }, + span: { + fieldRelated: "One text field is required", + }, }, atLeastOneQuestion: "At least one question is required.", atLeastOneRequired: "At least one required question is needed.", @@ -305,6 +309,7 @@ export default { requiredQuestion: "Required question", select: "Select", mapToColumn: "Map to column", + applyToaAField: "Annotate spans on:", subset: "Subset", selectSubset: "Your can create a dataset from only one subset.", preview: "Preview", diff --git a/argilla-frontend/translation/es.js b/argilla-frontend/translation/es.js index 494846e38f..927249e148 100644 --- a/argilla-frontend/translation/es.js +++ b/argilla-frontend/translation/es.js @@ -1,21 +1,21 @@ export default { - multi_label_selection: "Multi-Etiqueta", + multi_label_selection: "Selección de múltiples etiquetas", ranking: "Ranking", - label_selection: "Etiqueta", - span: "Selección", + label_selection: "Selección de etiqueta", + span: "Span", text: "Texto", rating: "Calificación", minimize: "Minimizar", select: "Seleccionar", search: "Buscar", searchPlaceholder: "Introduce una consulta", - searchDatasets: "Buscar dataset", + searchDatasets: "Buscar datasets", expand: "Expandir", copied: "Copiado", copyLink: "Copiar enlace", copyRecord: "Copiar registro", refresh: "Refrescar", - typeYourText: "Escriba su texto", + typeYourText: "Escribe tu texto", all: "Todas", value: "Valor", title: "Título", @@ -25,19 +25,19 @@ export default { useMarkdown: "Usar Markdown", suggestionFirst: "Mostrar sugerencias primero", visibleForAnnotators: "Visible para los anotadores", - recordInfo: "Información de registro", + recordInfo: "Información del registro", viewMetadata: "Ver metadatos", allowExtraMetadata: "Permitir metadatos adicionales", extraMetadata: "Metadatos adicionales", dimension: "Dimensión", visibleLabels: "Etiquetas visibles", - annotationGuidelines: "Guía de anotación", - guidelines: "Guía", - taskDistribution: "Distribución de la tarea", - minimumSubmittedResponses: "Respuestas mínimas entregadas", + annotationGuidelines: "Directrices de anotación", + guidelines: "Directrices", + taskDistribution: "Distribución de tareas", + minimumSubmittedResponses: "Respuestas mínimas enviadas", taskDistributionTooltip: - "Una tarea se completa cuando todos los \nregistros tienen el número mínimo \nde respuestas entregadas", - noAnnotationGuidelines: "Este dataset no tiene guía de anotación", + "Una tarea se completa cuando todos los registros tienen el número mínimo de respuestas enviadas", + noAnnotationGuidelines: "Este dataset no tiene directrices de anotación", required: "Requerido", optional: "Opcional", template: "Plantilla", @@ -47,37 +47,37 @@ export default { "El dataset está vacío. Pide a un administrador que suba registros y vuelve pronto.", datasetEmptyForAdmin: "El dataset está vacío. Puedes agregar registros usando el SDK de Python, consulta la documentación sobre cómo agregar registros.", - taskDistributionCompleted: "🎉 ¡La tarea está completada!", - noSubmittedRecords: "Aún no has entregado ningún registro", + taskDistributionCompleted: "¡La tarea está completada!", + noSubmittedRecords: "Aún no has enviado ningún registro", noRecordsFound: "No tienes registros {status} que coincidan con tu búsqueda", noRecords: "No tienes registros {status}", - noPendingRecordsToAnnotate: "🎉 No tienes registros pendientes para anotar", + noPendingRecordsToAnnotate: "No tienes registros pendientes para anotar", noDraftRecordsToReview: "No tienes ningún borrador para revisar", }, breadcrumbs: { home: "Inicio", - datasetSettings: "Configuración", - userSettings: "Mi configuración", + datasetSettings: "Configuración del dataset", + userSettings: "Configuración de usuario", }, datasets: { left: "pendiente", - completed: "Completada", + completed: "Completado", pending: "Pendiente", }, recordStatus: { pending: "pendiente | pendientes", draft: "borrador | borradores", discarded: "descartado | descartados", - submitted: "entregado | entregados", + submitted: "enviado | enviados", validated: "validado | validados", completedTooltip: - "El registro está completo, tiene el número \nmínimo de respuestas", + "El registro está completo, tiene el número mínimo de respuestas", }, userSettings: { - title: "Mi configuración", + title: "Configuración de usuario", fields: { - userName: "Usuario", + userName: "Nombre de usuario", firstName: "Nombre", lastName: "Apellido", workspaces: "Espacios de trabajo", @@ -90,23 +90,23 @@ export default { copyKey: "Copiar clave", }, userAvatarTooltip: { - settings: "Mi configuración", + settings: "Configuración de usuario", docs: "Documentación", logout: "Cerrar sesión", }, settings: { title: "Configuración del dataset", datasetInfo: "Información del dataset", - seeYourDataset: "Ver el dataset", + seeYourDataset: "Ver tu dataset", editFields: "Editar campos", editQuestions: "Editar preguntas", - editMetadata: "Editar propiedades de metadatos", + editMetadata: "Editar metadatos", editVectors: "Editar vectores", - deleteDataset: "Eliminar el dataset", - deleteWarning: "Ten cuidado, esta acción no es reversible", - deleteConfirmation: "Confirma la eliminación", + deleteDataset: "Eliminar dataset", + deleteWarning: "Ten cuidado, esta acción no se puede deshacer", + deleteConfirmation: "Confirmar eliminación", deleteConfirmationMessage: - "Estás a punto de eliminar: {datasetName} del espacio de trabajo {workspaceName} . Esta acción no puede deshacerse", + "Estás a punto de eliminar: {datasetName} del espacio de trabajo {workspaceName}. Esta acción no se puede deshacer", yesDelete: "Sí, eliminar", write: "Escribir", preview: "Vista previa", @@ -116,39 +116,39 @@ export default { ignore_and_continue: "Ignorar y continuar", login: "Iniciar sesión", "hf-login": "Iniciar sesión con Hugging Face", - sign_in_with_username: "Iniciar sesión con usuario", + sign_in_with_username: "Iniciar sesión con nombre de usuario", cancel: "Cancelar", continue: "Continuar", delete: "Eliminar", tooltip: { - copyToClipboard: "Copiar en el portapapeles", - copyNameToClipboard: "Copiar el nombre del dataset al portapapeles", + copyToClipboard: "Copiar al portapapeles", + copyNameToClipboard: "Copiar nombre del dataset al portapapeles", copyLinkToClipboard: "Copiar enlace del dataset al portapapeles", - goToDatasetSettings: "Configuración del dataset", + goToDatasetSettings: "Ir a la configuración del dataset", datasetSettings: "Configuración del dataset", }, }, to_submit_complete_required: - "Para entregar completa \nlas respuestas requeridas", - some_records_failed_to_annotate: "Algunos registros no fueron anotados", - changes_no_submit: "No entregó sus cambios", + "Para enviar, completa las respuestas requeridas", + some_records_failed_to_annotate: "Algunos registros no se pudieron anotar", + changes_no_submit: "No has enviado tus cambios", bulkAnnotation: { recordsSelected: "1 registro seleccionado | {count} registros seleccionados", - recordsViewSettings: "Tamaño de registro", - fixedHeight: "Colapsar registros", - defaultHeight: "Expandir registros", - to_annotate_record_bulk_required: "No hay registro seleccionado", + recordsViewSettings: "Configuración de vista de registros", + fixedHeight: "Altura fija", + defaultHeight: "Altura predeterminada", + to_annotate_record_bulk_required: "No hay registros seleccionados", select_to_annotate: "Seleccionar todo", pageSize: "Tamaño de página", - selectAllResults: "Selecciona todos los registros coincidentes {total}", + selectAllResults: "Seleccionar todos los registros coincidentes {total}", haveSelectedRecords: "Has seleccionado todos los registros {total}", actionConfirmation: "Confirmación de acción en bloque", actionConfirmationText: - "Esta acción afectará a {total} registros, ¿Quiere continuar?", + "Esta acción afectará a {total} registros, ¿Deseas continuar?", allRecordsAnnotated: "{total} registros han sido {action}", affectedAll: { - submitted: "entregados", + submitted: "enviados", discarded: "descartados", draft: "guardados como borrador", }, @@ -165,7 +165,7 @@ export default { clear: "Borrar", reset: "Reiniciar", discard: "Descartar", - submit: "Entregar", + submit: "Enviar", draft: "Guardar borrador", write: "Escribir", }, @@ -177,7 +177,7 @@ export default { value: "Valor de sugerencia", }, response: "Valor de respuesta", - record: "general", + record: "General", metadata: "Metadatos", }, suggestion: { @@ -203,12 +203,12 @@ export default { }, spanAnnotation: { shortcutHelper: "Presiona 'Shift' para seleccionar solo caracteres", - notSupported: "La anotación de tipo span no es compatible con su navegador", + notSupported: "La anotación de tipo span no es compatible con tu navegador", }, login: { title: "Iniciar sesión", - username: "Usuario", - usernameDescription: "Introduce tu usuario", + username: "Nombre de usuario", + usernameDescription: "Introduce tu nombre de usuario", password: "Contraseña", show: "Mostrar", hide: "Ocultar", @@ -223,8 +223,9 @@ export default { of: "de", status: "Estado", filters: "Filtros", - filterBy: "Filtrar por ...", + filterBy: "Filtrar por...", fields: "Campos", + field: "Campo", questions: "Preguntas", metadata: "Metadatos", vectors: "Vectores", @@ -237,13 +238,20 @@ export default { with: "con", find: "Encontrar", cancel: "Cancelar", - focus_mode: "Individual", - bulk_mode: "En bloque", + focus_mode: "Modo individual", + bulk_mode: "Modo en bloque", update: "Actualizar", youAreOnlineAgain: "Estás en línea de nuevo", - youAreOffline: "Estás fuera de línea", + youAreOffline: "Estás sin conexión", write: "Texto", preview: "Vista previa", + datasetTable: { + name: "Dataset", + workspace: "Espacio de trabajo", + createdAt: "Creado", + lastActivityAt: "Última actividad", + progress: "Progreso del equipo", + }, metrics: { total: "total", progress: { @@ -254,7 +262,7 @@ export default { }, home: { argillaDatasets: "Tus datasets", - none: "Sin datasets", + none: "Ninguno", importTitle: "Importar un dataset desde Hugging Face Hub", importText: "Comienza con un conjunto de datos del Hub simplemente pegando el nombre del repositorio", @@ -265,11 +273,12 @@ export default { "Si estás usando un Espacio privado, consulta la documentación.", exampleDatasetsTitle: "¿No sabes por dónde empezar?", exampleDatasetsText: "Explora estos datasets de ejemplo", - guidesTitle: "¿No conoce Argilla?", + guidesTitle: "¿No conoces Argilla?", guidesText: "Echa un vistazo a estas guías:", - pasteRepoIdPlaceholder: "Pega un repo id id p.ej., stanfordnlp/imdb", + pasteRepoIdPlaceholder: + "Pega el ID del repositorio, por ejemplo, stanfordnlp/imdb", demoLink: - "Entra en esta demo para probar Argilla", + "Ingresa a esta demo para probar Argilla", name: "Nombre del dataset", updatedAt: "Actualizado", createdAt: "Creado", @@ -279,23 +288,26 @@ export default { labelSelection: { atLeastTwoOptions: "Se requieren al menos dos opciones", optionsWithoutLabel: "No se permiten opciones vacías", - optionsSeparatedByComma: "Use comas para separar las etiquetas", + optionsSeparatedByComma: "Usa comas para separar las etiquetas", }, rating: { atLeastTwoOptions: "Se requieren al menos dos opciones", }, + span: { + fieldRelated: "Se requiere un campo de texto", + }, }, atLeastOneQuestion: "Se requiere al menos una pregunta.", atLeastOneRequired: "Se requiere al menos una pregunta obligatoria.", hasInvalidQuestions: "Algunas preguntas son inválidas", - createDataset: "Crea el dataset en Argilla", + createDataset: "Crear dataset en Argilla", datasetName: "Nombre del dataset", name: "Nombre", - assignWorkspace: "Asignar un espacio de trabajo", - selectSplit: "Seleccionar una división", + assignWorkspace: "Asignar espacio de trabajo", + selectSplit: "Seleccionar división", recordWarning: - "El conjunto de datos creado incluirá las primeras 10K filas y se pueden registrar más records a través del SDK de Python.", - button: "Crear el dataset", + "El conjunto de datos creado incluirá las primeras 10K filas y se pueden agregar más registros a través del SDK de Python.", + button: "Crear dataset", fields: "Campos", questionsTitle: "Preguntas", yourQuestions: "Tus preguntas", @@ -303,12 +315,14 @@ export default { requiredQuestion: "Pregunta obligatoria", select: "Seleccionar", mapToColumn: "Mapear a una columna", + applyToaAField: "Anotar span en:", subset: "Subconjunto", - selectSubset: "Puedes crear un dataset de un solo subconjunto.", + selectSubset: "Puedes crear un dataset con un solo subconjunto.", preview: "Vista previa", importData: "Importar datos", - addRecords: "Agregar records", - cantLoadRepository: "Dataset no encontrado o disponible en Hugging Face", + addRecords: "Agregar registros", + cantLoadRepository: + "No se pudo encontrar o acceder al dataset en Hugging Face", none: "Ninguno", noWorkspaces: "Por favor, sigue esta guía para crear un espacio de trabajo", @@ -323,16 +337,16 @@ export default { question: { text: "Texto", rating: "Calificación", - label_selection: "Etiqueta", + label_selection: "Selección de etiqueta", ranking: "Ranking", - multi_label_selection: "Multi-Etiqueta", + multi_label_selection: "Selección de múltiples etiquetas", span: "Span", "no mapping": "Sin mapeo", }, }, persistentStorage: { adminOrOwner: - "El almacenamiento persistente no está habilitado. Todos los datos se perderán si este espacio se reinicia. Vaya a la configuración del espacio para habilitarlo", + "El almacenamiento persistente no está habilitado. Todos los datos se perderán si este espacio se reinicia. Ve a la configuración del espacio para habilitarlo", annotator: "El almacenamiento persistente no está habilitado. Todos los datos se perderán si este espacio se reinicia", }, @@ -345,16 +359,16 @@ export default { validations: { businessLogic: { missing_vector: { - message: "Vector no encontrado para el registro seleccionado", + message: "No se encontró el vector para el registro seleccionado", }, update_distribution_with_existing_responses: { message: - "La configuración de distribución no se puede modificar para un dataset que contiene respuestas de usuarios", + "No se puede modificar la configuración de distribución para un dataset que contiene respuestas de usuarios", }, }, http: { 401: { - message: "No se pudo validar las credenciales", + message: "No se pudieron validar las credenciales", }, 404: { message: "No se encontró el recurso solicitado", diff --git a/argilla-frontend/v1/domain/entities/hub/QuestionCreation.ts b/argilla-frontend/v1/domain/entities/hub/QuestionCreation.ts index c8ff905aa0..6f9e62c686 100644 --- a/argilla-frontend/v1/domain/entities/hub/QuestionCreation.ts +++ b/argilla-frontend/v1/domain/entities/hub/QuestionCreation.ts @@ -14,6 +14,7 @@ import { QuestionPrototype, } from "../question/QuestionSetting"; import { QuestionType } from "../question/QuestionType"; +import { Subset } from "./Subset"; export const availableQuestionTypes = [ QuestionType.from("label_selection"), @@ -31,6 +32,7 @@ export class QuestionCreation { public required: boolean; public readonly originalColumn: string; constructor( + private readonly subset: Subset, public readonly name: string, settings: QuestionPrototype, public column: string = "no mapping" @@ -97,31 +99,51 @@ export class QuestionCreation { } get isValid(): boolean { - return this.validate().length === 0; + const validation = this.validate(); + + return validation.field.length === 0 && validation.options.length === 0; } - validate(): string[] { - const errors = []; + validate(): Record<"options" | "field", string[]> { + const validation = { + options: [], + field: [], + }; + + if (this.isSpanType) { + if ( + !this.subset.textFields.some( + (field) => field.name === this.settings.field + ) || + !this.settings.field + ) { + validation.field.push("datasetCreation.questions.span.fieldRelated"); + } + } + if (this.isMultiLabelType || this.isSingleLabelType || this.isSpanType) { if (this.options.length < 2) { - errors.push( + validation.options.push( "datasetCreation.questions.labelSelection.atLeastTwoOptions" ); } if (this.options.some((option) => !option.id)) { - errors.push( + validation.options.push( "datasetCreation.questions.labelSelection.optionsWithoutLabel" ); } } + if (this.isRatingType) { if (this.options.length < 2) { - errors.push("datasetCreation.questions.rating.atLeastTwoOptions"); + validation.options.push( + "datasetCreation.questions.rating.atLeastTwoOptions" + ); } } - return errors; + return validation; } public setSettings(settings: QuestionPrototype) { diff --git a/argilla-frontend/v1/domain/entities/hub/Subset.ts b/argilla-frontend/v1/domain/entities/hub/Subset.ts index ba72804400..2cf15099e1 100644 --- a/argilla-frontend/v1/domain/entities/hub/Subset.ts +++ b/argilla-frontend/v1/domain/entities/hub/Subset.ts @@ -85,6 +85,10 @@ export class Subset { return ["no mapping", ...columnNames]; } + get textFields() { + return this.fields.filter((f) => f.settings.type.isTextType); + } + private setDefaultValues() { if (this.questions.length === 1) { this.questions[0].markAsRequired(); @@ -95,6 +99,7 @@ export class Subset { if (this.isASingleLabel(structure)) { this.questions.push( new QuestionCreation( + this, structure.name, { type: "label_selection", @@ -114,6 +119,7 @@ export class Subset { if (this.isAMultiLabel(structure)) { this.questions.push( new QuestionCreation( + this, structure.name, { type: "multi_label_selection", @@ -255,7 +261,10 @@ export class Subset { { text: "event", id: "3", value: "event" }, ]; settings.allow_overlapping = true; - settings.field = "text"; + + if (this.textFields.length > 0) { + settings.field = this.textFields[0].name; + } } if (type === "text") { @@ -273,7 +282,7 @@ export class Subset { this.questions.splice( position ?? this.questions.length, 0, - new QuestionCreation(name, settings) + new QuestionCreation(this, name, settings) ); } } diff --git a/argilla/CHANGELOG.md b/argilla/CHANGELOG.md index 620ed69018..e5a0e4e64b 100644 --- a/argilla/CHANGELOG.md +++ b/argilla/CHANGELOG.md @@ -54,7 +54,7 @@ These are the section headers that we use: ### Changed -- Changed the __repr__ method for `SettingsProperties` to display the details of all the properties in `Setting` object. ([#5380](https://github.com/argilla-io/argilla/issues/5380)) +- Changed the **repr** method for `SettingsProperties` to display the details of all the properties in `Setting` object. ([#5380](https://github.com/argilla-io/argilla/issues/5380)) - Changed error messages when creating datasets with insufficient permissions. ([#5540](https://github.com/argilla-io/argilla/pull/5554)) ### Fixed @@ -89,7 +89,6 @@ These are the section headers that we use: - Added multiple error handling methods to the `rg.Dataset.records.log` method to warn, ignore, or raise errors. ([#5466](https://github.com/argilla-io/argilla/pull/5463)) - Changed dataset import and export of `rg.LabelQuestion` to use `datasets.ClassLabel` not `datasets.Value`. ([#5474](https://github.com/argilla-io/argilla/pull/5474)) - ## [2.1.0](https://github.com/argilla-io/argilla/compare/v2.0.1...v2.1.0) ### Added From 9f03df1b6423a112a0569ad1dfe5b78fe5151a1c Mon Sep 17 00:00:00 2001 From: Leire Aguirre Date: Thu, 28 Nov 2024 09:10:51 +0100 Subject: [PATCH 44/50] fix absolute position css for transition --- .../features/home/dataset-list/DatasetListCards.vue | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/argilla-frontend/components/features/home/dataset-list/DatasetListCards.vue b/argilla-frontend/components/features/home/dataset-list/DatasetListCards.vue index 9e007226d4..280cd377db 100644 --- a/argilla-frontend/components/features/home/dataset-list/DatasetListCards.vue +++ b/argilla-frontend/components/features/home/dataset-list/DatasetListCards.vue @@ -86,17 +86,14 @@ export default { color: var(--fg-tertiary); } } -.list-move, .list-enter-active, .list-leave-active { - transition: transform 0.2s ease-in, opacity 0.1s ease; + transition: all 0.2s ease; } -.list-enter-from, + +.list-enter, .list-leave-to { opacity: 0; - transform: translateX(10px); -} -.list-leave-active { - position: absolute; + transform: scale(0.8); } From 8342ca7004dd68ea7f984c09e5b5d29e4ff48c07 Mon Sep 17 00:00:00 2001 From: Paco Aranda Date: Thu, 28 Nov 2024 10:31:11 +0100 Subject: [PATCH 45/50] [BUGFIX] `argilla`: allow change default distribution values (#5719) # Description When creating a `rg.Settings` instance with default distribution, updates on the `min_submitted` attribute do not take effect. ````python settings = rg.Settings(fields=...) settings.distribution.min_submitted= 3 # this not takes effect ```` ~Maybe related to https://github.com/argilla-io/argilla/issues/5718~ **Type of change** - Bug fix (non-breaking change which fixes an issue) **How Has This Been Tested** **Checklist** - I added relevant documentation - I followed the style guidelines of this project - I did a self-review of my code - I made corresponding changes to the documentation - I confirm My changes generate no new warnings - I have added tests that prove my fix is effective or that my feature works - I have added relevant notes to the CHANGELOG.md file (See https://keepachangelog.com/) --- argilla/CHANGELOG.md | 4 ++++ argilla/src/argilla/settings/_resource.py | 4 ++-- argilla/tests/unit/test_settings/test_settings.py | 8 ++++++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/argilla/CHANGELOG.md b/argilla/CHANGELOG.md index e5a0e4e64b..a35dded88a 100644 --- a/argilla/CHANGELOG.md +++ b/argilla/CHANGELOG.md @@ -25,6 +25,10 @@ These are the section headers that we use: - Added support to webhook listeners. ([#5502](https://github.com/argilla-io/argilla/pull/5502)) - Added support to Python 3.13. ([#5652](https://github.com/argilla-io/argilla/pull/5652)) +### Fixed + +- Fixed error when update settings.distribution.min_submitted from defaults ([#5719](https://github.com/argilla-io/argilla/pull/5719)) + ## [2.4.0](https://github.com/argilla-io/argilla/compare/v2.3.0...v2.4.0) ### Added diff --git a/argilla/src/argilla/settings/_resource.py b/argilla/src/argilla/settings/_resource.py index 6971db722f..e750a66de1 100644 --- a/argilla/src/argilla/settings/_resource.py +++ b/argilla/src/argilla/settings/_resource.py @@ -73,7 +73,7 @@ def __init__( super().__init__(client=_dataset._client if _dataset else None) self._dataset = _dataset - self._distribution = distribution + self._distribution = distribution or TaskDistribution.default() self._mapping = mapping self.__guidelines = self.__process_guidelines(guidelines) self.__allow_extra_metadata = allow_extra_metadata @@ -137,7 +137,7 @@ def allow_extra_metadata(self, value: bool): @property def distribution(self) -> TaskDistribution: - return self._distribution or TaskDistribution.default() + return self._distribution @distribution.setter def distribution(self, value: TaskDistribution) -> None: diff --git a/argilla/tests/unit/test_settings/test_settings.py b/argilla/tests/unit/test_settings/test_settings.py index 73f69883b9..30ba3ea6a2 100644 --- a/argilla/tests/unit/test_settings/test_settings.py +++ b/argilla/tests/unit/test_settings/test_settings.py @@ -176,6 +176,14 @@ def test_settings_with_modified_default_task_distribution(self): other_settings = rg.Settings(fields=[rg.TextField(name="text", title="title")]) assert other_settings.distribution == TaskDistribution(min_submitted=1) + def test_settings_with_modified_task_distribution_value(self): + settings = rg.Settings(fields=[rg.TextField(name="text", title="title")]) + + assert settings.distribution == TaskDistribution(min_submitted=1) + settings.distribution.min_submitted = 10 + + assert settings.distribution == TaskDistribution(min_submitted=10) + def test_compare_equal_settings(self): settings = rg.Settings(fields=[rg.TextField(name="text", title="title")]) assert settings == settings From be7fe661586451fa94c4daf0da92670bba13f15f Mon Sep 17 00:00:00 2001 From: Leire Aguirre Date: Thu, 28 Nov 2024 14:53:47 +0100 Subject: [PATCH 46/50] update cursor --- .../components/features/user-settings/UserSettingsLanguage.vue | 1 + .../components/features/user-settings/UserSettingsTheme.vue | 1 + 2 files changed, 2 insertions(+) diff --git a/argilla-frontend/components/features/user-settings/UserSettingsLanguage.vue b/argilla-frontend/components/features/user-settings/UserSettingsLanguage.vue index 5d175e260b..c19cb316ba 100644 --- a/argilla-frontend/components/features/user-settings/UserSettingsLanguage.vue +++ b/argilla-frontend/components/features/user-settings/UserSettingsLanguage.vue @@ -53,6 +53,7 @@ label { &:hover { transition: color 0.3s ease; color: var(--fg-primary); + cursor: pointer; } } input:checked + label { diff --git a/argilla-frontend/components/features/user-settings/UserSettingsTheme.vue b/argilla-frontend/components/features/user-settings/UserSettingsTheme.vue index 0c29ffe7c8..b6a04f17ef 100644 --- a/argilla-frontend/components/features/user-settings/UserSettingsTheme.vue +++ b/argilla-frontend/components/features/user-settings/UserSettingsTheme.vue @@ -62,6 +62,7 @@ label { &:hover { transition: color 0.3s ease; color: var(--fg-primary); + cursor: pointer; .svg-icon { transition: fill 0.3s ease; fill: var(--fg-primary); From 5e0667519a72a581eea81b7ed1bbb4fcc383b56d Mon Sep 17 00:00:00 2001 From: Paco Aranda Date: Thu, 28 Nov 2024 16:06:51 +0100 Subject: [PATCH 47/50] [DOCS] review 2.5.0 docs (#5723) # Description **Type of change** - Bug fix (non-breaking change which fixes an issue) - New feature (non-breaking change which adds functionality) - Breaking change (fix or feature that would cause existing functionality to not work as expected) - Refactor (change restructuring the codebase without changing functionality) - Improvement (change adding some improvement to an existing functionality) - Documentation update **How Has This Been Tested** **Checklist** - I added relevant documentation - I followed the style guidelines of this project - I did a self-review of my code - I made corresponding changes to the documentation - I confirm My changes generate no new warnings - I have added tests that prove my fix is effective or that my feature works - I have added relevant notes to the CHANGELOG.md file (See https://keepachangelog.com/) --- argilla/mkdocs.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/argilla/mkdocs.yml b/argilla/mkdocs.yml index 645b72f514..b31214619b 100644 --- a/argilla/mkdocs.yml +++ b/argilla/mkdocs.yml @@ -174,7 +174,9 @@ nav: - Import and export datasets: how_to_guides/import_export.md - Advanced: - Custom fields with layout templates: how_to_guides/custom_fields.md - - Use webhooks to respond to server events: how_to_guides/webhooks.md + - Use webhooks to respond to server events: + - how_to_guides/webhooks.md + - Webhooks internals: how_to_guides/webhooks_internals.md - Use Markdown to format rich content: how_to_guides/use_markdown_to_format_rich_content.md - Migrate users, workspaces and datasets to Argilla V2: how_to_guides/migrate_from_legacy_datasets.md - Tutorials: From 6131075444602b30069ab45d07cb8470b0e0abc6 Mon Sep 17 00:00:00 2001 From: Leire Aguirre Date: Thu, 28 Nov 2024 16:20:40 +0100 Subject: [PATCH 48/50] update ui --- .../questions/form/multi-label/MultiLabel.component.vue | 2 +- .../questions/form/ranking/Ranking.component.vue | 2 +- .../container/questions/form/rating/Rating.component.vue | 2 +- .../questions/form/single-label/SingleLabel.component.vue | 2 +- .../container/questions/form/span/SpanComponent.vue | 2 +- .../questions/form/text-area/TextArea.component.vue | 2 +- .../annotation/guidelines/AnnotationGuidelines.vue | 8 +++++++- .../components/features/home/dataset-list/DatasetCard.vue | 2 +- 8 files changed, 14 insertions(+), 8 deletions(-) diff --git a/argilla-frontend/components/features/annotation/container/questions/form/multi-label/MultiLabel.component.vue b/argilla-frontend/components/features/annotation/container/questions/form/multi-label/MultiLabel.component.vue index 0a1d738c9c..8e3d0b85be 100644 --- a/argilla-frontend/components/features/annotation/container/questions/form/multi-label/MultiLabel.component.vue +++ b/argilla-frontend/components/features/annotation/container/questions/form/multi-label/MultiLabel.component.vue @@ -45,6 +45,6 @@ export default { .wrapper { display: flex; flex-direction: column; - gap: $base-space; + gap: $base-space * 1.5; } diff --git a/argilla-frontend/components/features/annotation/container/questions/form/ranking/Ranking.component.vue b/argilla-frontend/components/features/annotation/container/questions/form/ranking/Ranking.component.vue index e392456339..e5c0c0e1c9 100644 --- a/argilla-frontend/components/features/annotation/container/questions/form/ranking/Ranking.component.vue +++ b/argilla-frontend/components/features/annotation/container/questions/form/ranking/Ranking.component.vue @@ -52,6 +52,6 @@ export default { .wrapper { display: flex; flex-direction: column; - gap: $base-space; + gap: $base-space * 1.5; } diff --git a/argilla-frontend/components/features/annotation/container/questions/form/rating/Rating.component.vue b/argilla-frontend/components/features/annotation/container/questions/form/rating/Rating.component.vue index 6aa1b2374d..03d3079df9 100644 --- a/argilla-frontend/components/features/annotation/container/questions/form/rating/Rating.component.vue +++ b/argilla-frontend/components/features/annotation/container/questions/form/rating/Rating.component.vue @@ -43,6 +43,6 @@ export default { .wrapper { display: flex; flex-direction: column; - gap: $base-space; + gap: $base-space * 1.5; } diff --git a/argilla-frontend/components/features/annotation/container/questions/form/single-label/SingleLabel.component.vue b/argilla-frontend/components/features/annotation/container/questions/form/single-label/SingleLabel.component.vue index 8afd2567e0..aa6517c189 100644 --- a/argilla-frontend/components/features/annotation/container/questions/form/single-label/SingleLabel.component.vue +++ b/argilla-frontend/components/features/annotation/container/questions/form/single-label/SingleLabel.component.vue @@ -48,6 +48,6 @@ export default { .wrapper { display: flex; flex-direction: column; - gap: $base-space; + gap: $base-space * 1.5; } diff --git a/argilla-frontend/components/features/annotation/container/questions/form/span/SpanComponent.vue b/argilla-frontend/components/features/annotation/container/questions/form/span/SpanComponent.vue index 5debe06aae..f1545f61ee 100644 --- a/argilla-frontend/components/features/annotation/container/questions/form/span/SpanComponent.vue +++ b/argilla-frontend/components/features/annotation/container/questions/form/span/SpanComponent.vue @@ -76,7 +76,7 @@ export default { .wrapper { display: flex; flex-direction: column; - gap: $base-space; + gap: $base-space * 1.5; } .question { &__warning { diff --git a/argilla-frontend/components/features/annotation/container/questions/form/text-area/TextArea.component.vue b/argilla-frontend/components/features/annotation/container/questions/form/text-area/TextArea.component.vue index 47e8a03ba7..3facc03e6e 100644 --- a/argilla-frontend/components/features/annotation/container/questions/form/text-area/TextArea.component.vue +++ b/argilla-frontend/components/features/annotation/container/questions/form/text-area/TextArea.component.vue @@ -78,6 +78,6 @@ export default { .wrapper { display: flex; flex-direction: column; - gap: $base-space; + gap: $base-space * 1.5; } diff --git a/argilla-frontend/components/features/annotation/guidelines/AnnotationGuidelines.vue b/argilla-frontend/components/features/annotation/guidelines/AnnotationGuidelines.vue index b19c7fc7d4..cd23e94bd1 100644 --- a/argilla-frontend/components/features/annotation/guidelines/AnnotationGuidelines.vue +++ b/argilla-frontend/components/features/annotation/guidelines/AnnotationGuidelines.vue @@ -1,6 +1,6 @@ @@ -13,3 +13,9 @@ export default { }, }; + + diff --git a/argilla-frontend/components/features/home/dataset-list/DatasetCard.vue b/argilla-frontend/components/features/home/dataset-list/DatasetCard.vue index fc84fc5971..b187b04d54 100644 --- a/argilla-frontend/components/features/home/dataset-list/DatasetCard.vue +++ b/argilla-frontend/components/features/home/dataset-list/DatasetCard.vue @@ -136,7 +136,7 @@ $shadow-hover: rgba(0, 0, 0, 0.05) 0px 1px 4px 0px, font-weight: 500; min-height: $base-space * 5; @include font-size(18px); - word-break: break-all; + word-break: break-word; transition: color 0.3s ease; } &__workspace { From 37a72c7e66229276c6154c17d3dfa9ef19151371 Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Thu, 28 Nov 2024 17:10:43 +0100 Subject: [PATCH 49/50] fix: Set 2.5 migration tag --- argilla-server/src/argilla_server/database.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/argilla-server/src/argilla_server/database.py b/argilla-server/src/argilla_server/database.py index c2501f5dfe..41f20d42d1 100644 --- a/argilla-server/src/argilla_server/database.py +++ b/argilla-server/src/argilla_server/database.py @@ -37,8 +37,7 @@ "1.28": "ca7293c38970", "2.0": "237f7c674d74", "2.4": "660d6c6b3360", - "2.5": "6ed1b8bf8e08", - "2.6": "580a6553186f", + "2.5": "580a6553186f", } ) From 2a58a3079cc6862126331ce0a54c7db4f0d9405c Mon Sep 17 00:00:00 2001 From: Leire Aguirre Date: Thu, 28 Nov 2024 17:17:17 +0100 Subject: [PATCH 50/50] fix lint --- .../features/annotation/guidelines/AnnotationGuidelines.vue | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/argilla-frontend/components/features/annotation/guidelines/AnnotationGuidelines.vue b/argilla-frontend/components/features/annotation/guidelines/AnnotationGuidelines.vue index cd23e94bd1..7e471c8f10 100644 --- a/argilla-frontend/components/features/annotation/guidelines/AnnotationGuidelines.vue +++ b/argilla-frontend/components/features/annotation/guidelines/AnnotationGuidelines.vue @@ -16,6 +16,6 @@ export default {