wtsi-npg · mgcam · Oct 4, 2023 · Oct 4, 2023 · Oct 4, 2023 · Oct 4, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### [Changed]
+
+ - Upgrade python pydantic dependency to v2
+
 ## [4.0.0]
 
 ### Added

diff --git a/README.md b/README.md
@@ -20,9 +20,9 @@ package implements a Python API. The attributes of objects are sequencing
 platform specific. The generator for the PacBio platform is implemented by the
 `PacBioEntity` class.
 
-Examles of generating IDs for PacBio data from Python code:
+Examples of generating IDs for PacBio data from Python code:
 
-```
+```python
 from npg_id_generation.pac_bio import PacBioEntity
 
 # from a JSON string via a class method
@@ -47,15 +47,15 @@ The npg_id_generation package also contains a script, `generate_pac_bio_id`,
 which can be called from the command line. The script outputs the generated
 ID to the STDOUT stream. Use the `--help` option to find out details.
 
-```
+```perl
 # Using the script in the Perl code:
 my $id = `npg_id_generation --run_name 'MARATHON' --well_label 'D1'`;
 ```
 
 All generators should conform to a few simple rules:
 
 1. Uniqueness of the ID should be guaranteed.
-2. The ID should be a 64 characher string.
+2. The ID should be a 64 character string.
 3. It should be possible to generate an ID from a JSON string.
 4. The value of the ID should **not** depend on the order of attributes given
    to the constructor or the order of keys used in JSON.
@@ -64,9 +64,9 @@ All generators should conform to a few simple rules:
 6. The value of the ID should **not** depend on whether the undefined values
    of attributes are explicitly set.
 
-The examples below clarity the rules. Objects `o1` - `o6` should generate the same ID.
+The examples below clarify the rules. Objects `o1` - `o6` should generate the same ID.
 
-```
+```python
 o1 = PacBioEntity(run_name="r1", well_label="l1")
 o2 = PacBioEntity(run_name="r1", well_label="l1", tags = None)
 o3 = PacBioEntity(well_label="l1", run_name="r1", )
@@ -79,7 +79,7 @@ In addition, to maintain backwards compatibility for PacBio Revio products,
 the following two objects should generate the same ID, meaning that the
 value of 1 for the plate number attribute is disregarded.
 
-```
+```python
 o1 = PacBioEntity(run_name="r1", well_label="l1")
 o2 = PacBioEntity(run_name="r1", well_label="l1", plate_number=1)
 ```

diff --git a/npg_id_generation/pac_bio.py b/npg_id_generation/pac_bio.py
@@ -21,8 +21,9 @@
 # this program. If not, see <http://www.gnu.org/licenses/>.
 
 from hashlib import sha256
+from typing import Optional
 
-from pydantic import BaseModel, Extra, Field, validator
+from pydantic import BaseModel, Field, field_validator, ConfigDict
 import re
 
 
@@ -42,7 +43,7 @@ def concatenate_tags(tags: list[str]):
         return ",".join(tags)
 
 
-class PacBioEntity(BaseModel, extra=Extra.forbid):
+class PacBioEntity(BaseModel):
     """A PacBio class for product ID generation."""
 
     """
@@ -56,10 +57,11 @@ class PacBioEntity(BaseModel, extra=Extra.forbid):
 
       Order the attributes alphabetically!
     """
+    model_config = ConfigDict(extra="forbid")
 
     run_name: str = Field(title="Pac Bio run name as in LIMS")
     well_label: str = Field(title="Pac Bio well label")
-    plate_number: int = Field(
+    plate_number: Optional[int] = Field(
         default=None,
         ge=1,
         title="Pac Bio plate number",
@@ -70,7 +72,7 @@ class PacBioEntity(BaseModel, extra=Extra.forbid):
         when the value of this attribute is 1, we reset it to undefined.
         """,
     )
-    tags: str = Field(
+    tags: Optional[str] = Field(
         default=None,
         title="A string representing tag or tags",
         description="""
@@ -81,25 +83,29 @@ class PacBioEntity(BaseModel, extra=Extra.forbid):
         """,
     )
 
-    @validator("run_name", "well_label", "tags")
+    @field_validator("run_name", "well_label", "tags")
+    @classmethod
     def attributes_are_non_empty_strings(cls, v):
         if (v is not None) and (v == ""):
             raise ValueError("Cannot be an empty string")
         return v
 
-    @validator("well_label")
+    @field_validator("well_label")
+    @classmethod
     def well_label_conforms_to_pattern(cls, v):
         if not re.match("^[A-Z][1-9][0-9]?$", v):
             raise ValueError(
                 "Well label must be an alphabetic character followed by a number between 1 and 99"
             )
         return v
 
-    @validator("plate_number")
+    @field_validator("plate_number")
+    @classmethod
     def plate_number_default(cls, v):
         return None if (v is None) or (v == 1) else v
 
-    @validator("tags")
+    @field_validator("tags")
+    @classmethod
     def tags_have_correct_characters(cls, v):
         if (v is not None) and (not re.match("^[ACGT]+(,[ACGT]+)*$", v)):
             raise ValueError(
@@ -110,6 +116,4 @@ def tags_have_correct_characters(cls, v):
     def hash_product_id(self):
         """Generate a sha256sum for the PacBio Entity"""
 
-        return sha256(
-            self.json(exclude_none=True, separators=(",", ":")).encode()
-        ).hexdigest()
+        return sha256(self.model_dump_json(exclude_none=True).encode()).hexdigest()