From dd92d793f18d8dec66f1f40ff7b28382f387959e Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Wed, 14 Jul 2021 12:31:37 +0100 Subject: [PATCH 01/46] Base files for wav2vec integration --- flash/audio/__init__.py | 11 ++ flash/audio/speech_recognition/__init__.py | 15 ++ flash/audio/speech_recognition/data.py | 183 +++++++++++++++++++++ flash/audio/speech_recognition/model.py | 63 +++++++ flash_examples/speech_recognition.py | 24 +++ 5 files changed, 296 insertions(+) create mode 100644 flash/audio/__init__.py create mode 100644 flash/audio/speech_recognition/__init__.py create mode 100644 flash/audio/speech_recognition/data.py create mode 100644 flash/audio/speech_recognition/model.py create mode 100644 flash_examples/speech_recognition.py diff --git a/flash/audio/__init__.py b/flash/audio/__init__.py new file mode 100644 index 0000000000..23786d11f3 --- /dev/null +++ b/flash/audio/__init__.py @@ -0,0 +1,11 @@ +from flash.text.classification import TextClassificationData, TextClassifier # noqa: F401 +from flash.text.seq2seq import ( # noqa: F401 + QuestionAnsweringData, + QuestionAnsweringTask, + Seq2SeqData, + Seq2SeqTask, + SummarizationData, + SummarizationTask, + TranslationData, + TranslationTask, +) diff --git a/flash/audio/speech_recognition/__init__.py b/flash/audio/speech_recognition/__init__.py new file mode 100644 index 0000000000..00f1b6fa0c --- /dev/null +++ b/flash/audio/speech_recognition/__init__.py @@ -0,0 +1,15 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from flash.audio.speech_recognition.data import SpeechRecognitionData # noqa: F401 +from flash.audio.speech_recognition.model import SpeechRecognition # noqa: F401 diff --git a/flash/audio/speech_recognition/data.py b/flash/audio/speech_recognition/data.py new file mode 100644 index 0000000000..a394d18da7 --- /dev/null +++ b/flash/audio/speech_recognition/data.py @@ -0,0 +1,183 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from functools import partial +from typing import Any, Callable, Dict, List, Mapping, Optional, Sequence, Tuple, Union + +import soundfile as sf +from torch import Tensor + +from flash.core.data.auto_dataset import AutoDataset +from flash.core.data.data_module import DataModule +from flash.core.data.data_source import DataSource, DefaultDataSources +from flash.core.data.process import Deserializer, Postprocess, Preprocess +from flash.core.utilities.imports import _TEXT_AVAILABLE + +if _TEXT_AVAILABLE: + from datasets import load_dataset + from transformers import default_data_collator, Wav2Vec2CTCTokenizer, Wav2Vec2Processor + + +class AudioDeserializer(Deserializer): + + def __init__(self, backbone: str, max_length: int): + super().__init__() + self.backbone = backbone + self.tokenizer = Wav2Vec2CTCTokenizer.from_pretrained(backbone) + self.max_length = max_length + + def deserialize(self, sample: Any) -> Tensor: + return self.tokenizer(sample["speech"], sampling_rate=sample["sampling_rate"][0]).input_values + + @property + def example_input(self) -> str: + return "An example input" + + def __getstate__(self): # TODO: Find out why this is being pickled + state = self.__dict__.copy() + state.pop("processor") + return state + + def __setstate__(self, state): + self.__dict__.update(state) + self.tokenizer = Wav2Vec2CTCTokenizer.from_pretrained(self.backbone) + + +class AudioDataSource(DataSource): + + def __init__(self, backbone: str, max_length: int = 128): + super().__init__() + + self.backbone = backbone + self.processor = Wav2Vec2Processor.from_pretrained(backbone) + self.max_length = max_length + + def prepare_dataset(self, batch): + # check that all files have the correct sampling rate + assert ( + len(set(batch["sampling_rate"])) == 1 + ), f"Make sure all inputs have the same sampling rate of {self.processor.feature_extractor.sampling_rate}." + + batch["input_values"] = self.processor(batch["speech"], sampling_rate=batch["sampling_rate"][0]).input_values + + if not self.predicting: + with self.processor.as_target_processor(): + batch["labels"] = self.processor(batch["target_text"]).input_ids + return batch + + @staticmethod + def _transform_label(label_to_class_mapping: Dict[str, int], target: str, ex: Dict[str, Union[int, str]]): + ex[target] = label_to_class_mapping[ex[target]] + return ex + + def load_data( + self, + data: Tuple[str, Union[str, List[str]], Union[str, List[str]]], + dataset: Optional[Any] = None, + columns: Union[List[str], Tuple[str]] = ("input_ids", "attention_mask", "labels"), + ) -> Union[Sequence[Mapping[str, Any]]]: + file, input, target = data + + data_files = {} + stage = self.running_stage.value + data_files[stage] = str(file) + + dataset_dict = load_dataset("timit_asr") # todo + + dataset_dict = dataset_dict.map(partial(self._tokenize_fn, input=input), batched=True) + dataset_dict.set_format("torch", columns=columns) + + return dataset_dict[stage] + + def __getstate__(self): # TODO: Find out why this is being pickled + state = self.__dict__.copy() + state.pop("tokenizer") + return state + + def __setstate__(self, state): + self.__dict__.update(state) + self.tokenizer = Wav2Vec2Processor.from_pretrained(self.backbone) + + def predict_load_data(self, data: Any, dataset: AutoDataset): + return self.load_data(data, dataset, columns=["input_ids", "attention_mask"]) + + +class SpeechRecognitionPreprocess(Preprocess): + + def __init__( + self, + train_transform: Optional[Dict[str, Callable]] = None, + val_transform: Optional[Dict[str, Callable]] = None, + test_transform: Optional[Dict[str, Callable]] = None, + predict_transform: Optional[Dict[str, Callable]] = None, + backbone: str = "facebook/wav2vec2-base-960h", + max_length: int = 128, + ): + self.backbone = backbone + self.max_length = max_length + + super().__init__( + train_transform=train_transform, + val_transform=val_transform, + test_transform=test_transform, + predict_transform=predict_transform, + data_sources={ + "timit": AudioDataSource(self.backbone, max_length=max_length), + }, + default_data_source=DefaultDataSources.DATASET, + deserializer=AudioDeserializer(backbone, max_length), + ) + + def get_state_dict(self) -> Dict[str, Any]: + return { + **self.transforms, + "backbone": self.backbone, + "max_length": self.max_length, + } + + @classmethod + def load_state_dict(cls, state_dict: Dict[str, Any], strict: bool): + return cls(**state_dict) + + def per_batch_transform(self, batch: Any) -> Any: + speech_array, sampling_rate = sf.read(batch["file"]) + batch["speech"] = speech_array + batch["sampling_rate"] = sampling_rate + batch["target_text"] = batch["text"] + return batch + + def collate(self, samples: Any) -> Tensor: + """Override to convert a set of samples to a batch""" + # data_collator = DataCollatorCTCWithPadding(processor=processor, padding=True) + + if isinstance(samples, dict): + samples = [samples] + return default_data_collator(samples) + + +class SpeechRecognitionPostprocess(Postprocess): + + def __init__(self, save_path: Optional[str] = None, backbone: str = "facebook/wav2vec2-base-960h",): + super().__init__(save_path=save_path) + self.tokenizer = Wav2Vec2CTCTokenizer.from_pretrained(backbone) + + def per_batch_transform(self, batch: Any) -> Any: + transcription = self.tokenizer.batch_decode(batch)[0] + return transcription + + +class SpeechRecognitionData(DataModule): + """Data Module for text classification tasks""" + + preprocess_cls = SpeechRecognitionPreprocess + postprocess_cls = SpeechRecognitionPostprocess diff --git a/flash/audio/speech_recognition/model.py b/flash/audio/speech_recognition/model.py new file mode 100644 index 0000000000..8e4ef190cd --- /dev/null +++ b/flash/audio/speech_recognition/model.py @@ -0,0 +1,63 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import warnings +from typing import Any, Callable, Dict, Mapping, Optional, Type, Union + +import torch + +from flash import Task +from flash.core.data.process import Serializer +from flash.core.utilities.imports import _TEXT_AVAILABLE + +if _TEXT_AVAILABLE: + from transformers import Wav2Vec2ForCTC + + +class SpeechRecognition(Task): + + def __init__( + self, + backbone: str = "facebook/wav2vec2-base-960h", + loss_fn: Optional[Callable] = None, + optimizer: Type[torch.optim.Optimizer] = torch.optim.Adam, + learning_rate: float = 1e-2, + serializer: Optional[Union[Serializer, Mapping[str, Serializer]]] = None, + ): + self.save_hyperparameters() + + os.environ["TOKENIZERS_PARALLELISM"] = "TRUE" + # disable HF thousand warnings + warnings.simplefilter("ignore") + # set os environ variable for multiprocesses + os.environ["PYTHONWARNINGS"] = "ignore" + + super().__init__( + model=None, + loss_fn=loss_fn, + optimizer=optimizer, + learning_rate=learning_rate, + serializer=serializer, + ) + self.model = Wav2Vec2ForCTC.from_pretrained(backbone) + + self.save_hyperparameters() + + def forward(self, batch: Dict[str, torch.Tensor]): + return self.model(input_ids=batch["input_ids"]) + + def step(self, batch, batch_idx, metrics) -> dict: + out = self(batch) + prediction = torch.argmax(out.logits, dim=-1) + return prediction diff --git a/flash_examples/speech_recognition.py b/flash_examples/speech_recognition.py new file mode 100644 index 0000000000..a4890f06a8 --- /dev/null +++ b/flash_examples/speech_recognition.py @@ -0,0 +1,24 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import flash +from flash.audio.speech_recognition import SpeechRecognition, SpeechRecognitionData + +datamodule = SpeechRecognitionData.from_data_source("timit") + +# 2. Build the task +model = SpeechRecognition() + +# 3. Create the trainer and finetune the model +trainer = flash.Trainer(max_epochs=3) +trainer.predict(model, datamodule=datamodule) From 2a43fe7b830e358a64b45958639dada49aa9810c Mon Sep 17 00:00:00 2001 From: "deepsource-autofix[bot]" <62050782+deepsource-autofix[bot]@users.noreply.github.com> Date: Wed, 14 Jul 2021 11:32:59 +0000 Subject: [PATCH 02/46] Format code with autopep8 --- flash/audio/speech_recognition/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flash/audio/speech_recognition/data.py b/flash/audio/speech_recognition/data.py index a394d18da7..2e37cbfd7d 100644 --- a/flash/audio/speech_recognition/data.py +++ b/flash/audio/speech_recognition/data.py @@ -65,7 +65,7 @@ def __init__(self, backbone: str, max_length: int = 128): def prepare_dataset(self, batch): # check that all files have the correct sampling rate assert ( - len(set(batch["sampling_rate"])) == 1 + len(set(batch["sampling_rate"])) == 1 ), f"Make sure all inputs have the same sampling rate of {self.processor.feature_extractor.sampling_rate}." batch["input_values"] = self.processor(batch["speech"], sampling_rate=batch["sampling_rate"][0]).input_values From 6a39b34b9197e2e14adf8bbb52dbd5e72f6fb128 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 14 Jul 2021 11:33:35 +0000 Subject: [PATCH 03/46] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- flash/audio/speech_recognition/data.py | 28 ++++++++++++++----------- flash/audio/speech_recognition/model.py | 12 +++++------ 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/flash/audio/speech_recognition/data.py b/flash/audio/speech_recognition/data.py index 2e37cbfd7d..d206b3faf6 100644 --- a/flash/audio/speech_recognition/data.py +++ b/flash/audio/speech_recognition/data.py @@ -81,10 +81,10 @@ def _transform_label(label_to_class_mapping: Dict[str, int], target: str, ex: Di return ex def load_data( - self, - data: Tuple[str, Union[str, List[str]], Union[str, List[str]]], - dataset: Optional[Any] = None, - columns: Union[List[str], Tuple[str]] = ("input_ids", "attention_mask", "labels"), + self, + data: Tuple[str, Union[str, List[str]], Union[str, List[str]]], + dataset: Optional[Any] = None, + columns: Union[List[str], Tuple[str]] = ("input_ids", "attention_mask", "labels"), ) -> Union[Sequence[Mapping[str, Any]]]: file, input, target = data @@ -115,13 +115,13 @@ def predict_load_data(self, data: Any, dataset: AutoDataset): class SpeechRecognitionPreprocess(Preprocess): def __init__( - self, - train_transform: Optional[Dict[str, Callable]] = None, - val_transform: Optional[Dict[str, Callable]] = None, - test_transform: Optional[Dict[str, Callable]] = None, - predict_transform: Optional[Dict[str, Callable]] = None, - backbone: str = "facebook/wav2vec2-base-960h", - max_length: int = 128, + self, + train_transform: Optional[Dict[str, Callable]] = None, + val_transform: Optional[Dict[str, Callable]] = None, + test_transform: Optional[Dict[str, Callable]] = None, + predict_transform: Optional[Dict[str, Callable]] = None, + backbone: str = "facebook/wav2vec2-base-960h", + max_length: int = 128, ): self.backbone = backbone self.max_length = max_length @@ -167,7 +167,11 @@ def collate(self, samples: Any) -> Tensor: class SpeechRecognitionPostprocess(Postprocess): - def __init__(self, save_path: Optional[str] = None, backbone: str = "facebook/wav2vec2-base-960h",): + def __init__( + self, + save_path: Optional[str] = None, + backbone: str = "facebook/wav2vec2-base-960h", + ): super().__init__(save_path=save_path) self.tokenizer = Wav2Vec2CTCTokenizer.from_pretrained(backbone) diff --git a/flash/audio/speech_recognition/model.py b/flash/audio/speech_recognition/model.py index 8e4ef190cd..c451c7401e 100644 --- a/flash/audio/speech_recognition/model.py +++ b/flash/audio/speech_recognition/model.py @@ -28,12 +28,12 @@ class SpeechRecognition(Task): def __init__( - self, - backbone: str = "facebook/wav2vec2-base-960h", - loss_fn: Optional[Callable] = None, - optimizer: Type[torch.optim.Optimizer] = torch.optim.Adam, - learning_rate: float = 1e-2, - serializer: Optional[Union[Serializer, Mapping[str, Serializer]]] = None, + self, + backbone: str = "facebook/wav2vec2-base-960h", + loss_fn: Optional[Callable] = None, + optimizer: Type[torch.optim.Optimizer] = torch.optim.Adam, + learning_rate: float = 1e-2, + serializer: Optional[Union[Serializer, Mapping[str, Serializer]]] = None, ): self.save_hyperparameters() From 1b48bc138c839efce57344e0053edffbc80fd788 Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Wed, 14 Jul 2021 17:38:14 +0100 Subject: [PATCH 04/46] Closer to working --- flash/audio/speech_recognition/data.py | 108 +++++++++++++++++++----- flash/audio/speech_recognition/model.py | 7 +- flash_examples/speech_recognition.py | 6 +- 3 files changed, 93 insertions(+), 28 deletions(-) diff --git a/flash/audio/speech_recognition/data.py b/flash/audio/speech_recognition/data.py index d206b3faf6..76ddd0451b 100644 --- a/flash/audio/speech_recognition/data.py +++ b/flash/audio/speech_recognition/data.py @@ -11,10 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from dataclasses import dataclass from functools import partial -from typing import Any, Callable, Dict, List, Mapping, Optional, Sequence, Tuple, Union +from typing import Any, Dict, List, Optional, Union +from typing import Callable, Mapping, Sequence, Tuple import soundfile as sf +import torch from torch import Tensor from flash.core.data.auto_dataset import AutoDataset @@ -25,7 +28,7 @@ if _TEXT_AVAILABLE: from datasets import load_dataset - from transformers import default_data_collator, Wav2Vec2CTCTokenizer, Wav2Vec2Processor + from transformers import Wav2Vec2CTCTokenizer, Wav2Vec2Processor class AudioDeserializer(Deserializer): @@ -62,7 +65,7 @@ def __init__(self, backbone: str, max_length: int = 128): self.processor = Wav2Vec2Processor.from_pretrained(backbone) self.max_length = max_length - def prepare_dataset(self, batch): + def _prepare_dataset(self, batch): # check that all files have the correct sampling rate assert ( len(set(batch["sampling_rate"])) == 1 @@ -76,25 +79,28 @@ def prepare_dataset(self, batch): return batch @staticmethod - def _transform_label(label_to_class_mapping: Dict[str, int], target: str, ex: Dict[str, Union[int, str]]): - ex[target] = label_to_class_mapping[ex[target]] - return ex + def speech_file_to_array_fn(batch): + speech_array, sampling_rate = sf.read(batch["file"]) + batch["speech"] = speech_array + batch["sampling_rate"] = sampling_rate + batch["target_text"] = batch["text"] + return batch def load_data( self, data: Tuple[str, Union[str, List[str]], Union[str, List[str]]], dataset: Optional[Any] = None, - columns: Union[List[str], Tuple[str]] = ("input_ids", "attention_mask", "labels"), + columns: Union[List[str], Tuple[str]] = ("input_values", "labels"), ) -> Union[Sequence[Mapping[str, Any]]]: - file, input, target = data - - data_files = {} + # file, input, target = data + # + # data_files = {} stage = self.running_stage.value - data_files[stage] = str(file) + # data_files[stage] = str(file) dataset_dict = load_dataset("timit_asr") # todo - - dataset_dict = dataset_dict.map(partial(self._tokenize_fn, input=input), batched=True) + dataset_dict = dataset_dict.map(AudioDataSource.speech_file_to_array_fn, num_proc=4) + dataset_dict = dataset_dict.map(partial(self._prepare_dataset), batched=True) dataset_dict.set_format("torch", columns=columns) return dataset_dict[stage] @@ -109,8 +115,71 @@ def __setstate__(self, state): self.tokenizer = Wav2Vec2Processor.from_pretrained(self.backbone) def predict_load_data(self, data: Any, dataset: AutoDataset): - return self.load_data(data, dataset, columns=["input_ids", "attention_mask"]) + return self.load_data(data, dataset, columns=["input_values"]) + + + +@dataclass +class DataCollatorCTCWithPadding: + """ + Data collator that will dynamically pad the inputs received. + Args: + processor (:class:`~transformers.Wav2Vec2Processor`) + The processor used for proccessing the data. + padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned sequences (according to the model's padding side and padding index) + among: + * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single + sequence if provided). + * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the + maximum acceptable input length for the model if that argument is not provided. + * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of + different lengths). + max_length (:obj:`int`, `optional`): + Maximum length of the ``input_values`` of the returned list and optionally padding length (see above). + max_length_labels (:obj:`int`, `optional`): + Maximum length of the ``labels`` returned list and optionally padding length (see above). + pad_to_multiple_of (:obj:`int`, `optional`): + If set will pad the sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + """ + + processor: Wav2Vec2Processor + padding: Union[bool, str] = True + max_length: Optional[int] = None + max_length_labels: Optional[int] = None + pad_to_multiple_of: Optional[int] = None + pad_to_multiple_of_labels: Optional[int] = None + + def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]: + # split inputs and labels since they have to be of different lengths and need + # different padding methods + input_features = [{"input_values": feature["input_values"]} for feature in features] + label_features = [{"input_ids": feature["labels"]} for feature in features] + + batch = self.processor.pad( + input_features, + padding=self.padding, + max_length=self.max_length, + pad_to_multiple_of=self.pad_to_multiple_of, + return_tensors="pt", + ) + with self.processor.as_target_processor(): + labels_batch = self.processor.pad( + label_features, + padding=self.padding, + max_length=self.max_length_labels, + pad_to_multiple_of=self.pad_to_multiple_of_labels, + return_tensors="pt", + ) + # replace padding with -100 to ignore loss correctly + labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100) + + batch["labels"] = labels + + return batch class SpeechRecognitionPreprocess(Preprocess): @@ -137,6 +206,8 @@ def __init__( default_data_source=DefaultDataSources.DATASET, deserializer=AudioDeserializer(backbone, max_length), ) + self.processor = Wav2Vec2Processor.from_pretrained(backbone) + self.collator = DataCollatorCTCWithPadding(processor=self.processor, padding=True) def get_state_dict(self) -> Dict[str, Any]: return { @@ -149,20 +220,13 @@ def get_state_dict(self) -> Dict[str, Any]: def load_state_dict(cls, state_dict: Dict[str, Any], strict: bool): return cls(**state_dict) - def per_batch_transform(self, batch: Any) -> Any: - speech_array, sampling_rate = sf.read(batch["file"]) - batch["speech"] = speech_array - batch["sampling_rate"] = sampling_rate - batch["target_text"] = batch["text"] - return batch - def collate(self, samples: Any) -> Tensor: """Override to convert a set of samples to a batch""" # data_collator = DataCollatorCTCWithPadding(processor=processor, padding=True) if isinstance(samples, dict): samples = [samples] - return default_data_collator(samples) + return self.collator(samples) class SpeechRecognitionPostprocess(Postprocess): diff --git a/flash/audio/speech_recognition/model.py b/flash/audio/speech_recognition/model.py index c451c7401e..1036d1283a 100644 --- a/flash/audio/speech_recognition/model.py +++ b/flash/audio/speech_recognition/model.py @@ -55,9 +55,10 @@ def __init__( self.save_hyperparameters() def forward(self, batch: Dict[str, torch.Tensor]): - return self.model(input_ids=batch["input_ids"]) + return self.model(batch["input_values"], labels=batch["labels"]) def step(self, batch, batch_idx, metrics) -> dict: out = self(batch) - prediction = torch.argmax(out.logits, dim=-1) - return prediction + out["logs"] = {'loss': out.loss} + return out + diff --git a/flash_examples/speech_recognition.py b/flash_examples/speech_recognition.py index a4890f06a8..543e0b9a97 100644 --- a/flash_examples/speech_recognition.py +++ b/flash_examples/speech_recognition.py @@ -14,11 +14,11 @@ import flash from flash.audio.speech_recognition import SpeechRecognition, SpeechRecognitionData -datamodule = SpeechRecognitionData.from_data_source("timit") +datamodule = SpeechRecognitionData.from_data_source("timit", test_data='test', num_workers=0) # 2. Build the task model = SpeechRecognition() # 3. Create the trainer and finetune the model -trainer = flash.Trainer(max_epochs=3) -trainer.predict(model, datamodule=datamodule) +trainer = flash.Trainer(max_epochs=3, gpus=1) +trainer.test(model, datamodule=datamodule) From c87dcc267e49312281804bf18857fd7c07e88788 Mon Sep 17 00:00:00 2001 From: "deepsource-autofix[bot]" <62050782+deepsource-autofix[bot]@users.noreply.github.com> Date: Wed, 14 Jul 2021 16:39:50 +0000 Subject: [PATCH 05/46] Format code with autopep8 --- flash/audio/speech_recognition/data.py | 2 +- flash/audio/speech_recognition/model.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/flash/audio/speech_recognition/data.py b/flash/audio/speech_recognition/data.py index 76ddd0451b..2ce795724e 100644 --- a/flash/audio/speech_recognition/data.py +++ b/flash/audio/speech_recognition/data.py @@ -118,7 +118,6 @@ def predict_load_data(self, data: Any, dataset: AutoDataset): return self.load_data(data, dataset, columns=["input_values"]) - @dataclass class DataCollatorCTCWithPadding: """ @@ -181,6 +180,7 @@ def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> return batch + class SpeechRecognitionPreprocess(Preprocess): def __init__( diff --git a/flash/audio/speech_recognition/model.py b/flash/audio/speech_recognition/model.py index 1036d1283a..e2fb4bfffb 100644 --- a/flash/audio/speech_recognition/model.py +++ b/flash/audio/speech_recognition/model.py @@ -61,4 +61,3 @@ def step(self, batch, batch_idx, metrics) -> dict: out = self(batch) out["logs"] = {'loss': out.loss} return out - From 091da5671b9c04759572a881b335514e1f285c8c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 14 Jul 2021 16:40:26 +0000 Subject: [PATCH 06/46] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- flash/audio/speech_recognition/data.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/flash/audio/speech_recognition/data.py b/flash/audio/speech_recognition/data.py index 2ce795724e..ab7c9258bc 100644 --- a/flash/audio/speech_recognition/data.py +++ b/flash/audio/speech_recognition/data.py @@ -13,8 +13,7 @@ # limitations under the License. from dataclasses import dataclass from functools import partial -from typing import Any, Dict, List, Optional, Union -from typing import Callable, Mapping, Sequence, Tuple +from typing import Any, Callable, Dict, List, Mapping, Optional, Sequence, Tuple, Union import soundfile as sf import torch From 2690e9cc6fdfea34e5266beda844d581aa6c6748 Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Thu, 15 Jul 2021 11:40:02 +0100 Subject: [PATCH 07/46] Refactors --- flash/audio/speech_recognition/data.py | 71 ++++++++++++++----------- flash/audio/speech_recognition/model.py | 13 ++--- flash_examples/speech_recognition.py | 5 +- 3 files changed, 48 insertions(+), 41 deletions(-) diff --git a/flash/audio/speech_recognition/data.py b/flash/audio/speech_recognition/data.py index ab7c9258bc..a852684beb 100644 --- a/flash/audio/speech_recognition/data.py +++ b/flash/audio/speech_recognition/data.py @@ -11,10 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import logging from dataclasses import dataclass from functools import partial from typing import Any, Callable, Dict, List, Mapping, Optional, Sequence, Tuple, Union +import datasets +import pandas as pd import soundfile as sf import torch from torch import Tensor @@ -26,7 +29,7 @@ from flash.core.utilities.imports import _TEXT_AVAILABLE if _TEXT_AVAILABLE: - from datasets import load_dataset + from datasets import Dataset, load_dataset from transformers import Wav2Vec2CTCTokenizer, Wav2Vec2Processor @@ -64,27 +67,6 @@ def __init__(self, backbone: str, max_length: int = 128): self.processor = Wav2Vec2Processor.from_pretrained(backbone) self.max_length = max_length - def _prepare_dataset(self, batch): - # check that all files have the correct sampling rate - assert ( - len(set(batch["sampling_rate"])) == 1 - ), f"Make sure all inputs have the same sampling rate of {self.processor.feature_extractor.sampling_rate}." - - batch["input_values"] = self.processor(batch["speech"], sampling_rate=batch["sampling_rate"][0]).input_values - - if not self.predicting: - with self.processor.as_target_processor(): - batch["labels"] = self.processor(batch["target_text"]).input_ids - return batch - - @staticmethod - def speech_file_to_array_fn(batch): - speech_array, sampling_rate = sf.read(batch["file"]) - batch["speech"] = speech_array - batch["sampling_rate"] = sampling_rate - batch["target_text"] = batch["text"] - return batch - def load_data( self, data: Tuple[str, Union[str, List[str]], Union[str, List[str]]], @@ -98,10 +80,6 @@ def load_data( # data_files[stage] = str(file) dataset_dict = load_dataset("timit_asr") # todo - dataset_dict = dataset_dict.map(AudioDataSource.speech_file_to_array_fn, num_proc=4) - dataset_dict = dataset_dict.map(partial(self._prepare_dataset), batched=True) - dataset_dict.set_format("torch", columns=columns) - return dataset_dict[stage] def __getstate__(self): # TODO: Find out why this is being pickled @@ -124,7 +102,8 @@ class DataCollatorCTCWithPadding: Args: processor (:class:`~transformers.Wav2Vec2Processor`) The processor used for proccessing the data. - padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, + `optional`, defaults to :obj:`True`): Select a strategy to pad the returned sequences (according to the model's padding side and padding index) among: * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single @@ -219,12 +198,42 @@ def get_state_dict(self) -> Dict[str, Any]: def load_state_dict(cls, state_dict: Dict[str, Any], strict: bool): return cls(**state_dict) + def _prepare_dataset(self, batch: Any) -> Any: + # check that all files have the correct sampling rate + assert ( + len(set(batch["sampling_rate"])) == 1 + ), f"Make sure all inputs have the same sampling rate of {self.processor.feature_extractor.sampling_rate}." + + batch["input_values"] = self.processor(batch["speech"], sampling_rate=batch["sampling_rate"][0]).input_values + + if not self.predicting: + with self.processor.as_target_processor(): + batch["labels"] = self.processor(batch["target_text"]).input_ids + return batch + + def _speech_file_to_array_fn(self, batch: Any) -> Any: + speech_array, sampling_rate = sf.read(batch["file"]) + batch["speech"] = speech_array + batch["sampling_rate"] = sampling_rate + if not self.predicting: + batch["target_text"] = batch["text"] + return batch + + def _convert_to_batch(self, batch: Any) -> Dataset: + self._disable_tqdm_bars() + batch = Dataset.from_pandas(pd.DataFrame(batch)) + columns = ["input_values", "labels"] + batch = batch.map(partial(self._speech_file_to_array_fn)) + batch = batch.map(partial(self._prepare_dataset), batched=True) + batch.set_format("torch", columns=columns) + return batch + + def _disable_tqdm_bars(self): + datasets.logging.get_verbosity = lambda: logging.NOTSET + def collate(self, samples: Any) -> Tensor: """Override to convert a set of samples to a batch""" - # data_collator = DataCollatorCTCWithPadding(processor=processor, padding=True) - - if isinstance(samples, dict): - samples = [samples] + samples = self._convert_to_batch(samples) return self.collator(samples) diff --git a/flash/audio/speech_recognition/model.py b/flash/audio/speech_recognition/model.py index e2fb4bfffb..cc3a3ca513 100644 --- a/flash/audio/speech_recognition/model.py +++ b/flash/audio/speech_recognition/model.py @@ -16,6 +16,7 @@ from typing import Any, Callable, Dict, Mapping, Optional, Type, Union import torch +import torch.nn as nn from flash import Task from flash.core.data.process import Serializer @@ -35,29 +36,25 @@ def __init__( learning_rate: float = 1e-2, serializer: Optional[Union[Serializer, Mapping[str, Serializer]]] = None, ): - self.save_hyperparameters() - os.environ["TOKENIZERS_PARALLELISM"] = "TRUE" # disable HF thousand warnings warnings.simplefilter("ignore") # set os environ variable for multiprocesses os.environ["PYTHONWARNINGS"] = "ignore" - super().__init__( - model=None, + model=Wav2Vec2ForCTC.from_pretrained(backbone), loss_fn=loss_fn, optimizer=optimizer, learning_rate=learning_rate, serializer=serializer, ) - self.model = Wav2Vec2ForCTC.from_pretrained(backbone) self.save_hyperparameters() def forward(self, batch: Dict[str, torch.Tensor]): - return self.model(batch["input_values"], labels=batch["labels"]) + return self.model(batch["input_values"]) - def step(self, batch, batch_idx, metrics) -> dict: - out = self(batch) + def step(self, batch: Any, batch_idx: int, metrics: nn.ModuleDict) -> Any: + out = self.model(batch["input_values"], labels=batch["labels"]) out["logs"] = {'loss': out.loss} return out diff --git a/flash_examples/speech_recognition.py b/flash_examples/speech_recognition.py index 543e0b9a97..0b16d71d9f 100644 --- a/flash_examples/speech_recognition.py +++ b/flash_examples/speech_recognition.py @@ -14,11 +14,12 @@ import flash from flash.audio.speech_recognition import SpeechRecognition, SpeechRecognitionData -datamodule = SpeechRecognitionData.from_data_source("timit", test_data='test', num_workers=0) +datamodule = SpeechRecognitionData.from_data_source("timit", test_data='test', train_data='train', num_workers=4) # 2. Build the task model = SpeechRecognition() # 3. Create the trainer and finetune the model -trainer = flash.Trainer(max_epochs=3, gpus=1) +trainer = flash.Trainer(max_epochs=1, gpus=1, profiler='simple', limit_train_batches=100) +trainer.finetune(model, strategy='no_freeze') trainer.test(model, datamodule=datamodule) From 1531560614e6c35a568877786fee654ac1aa405d Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Thu, 15 Jul 2021 11:40:48 +0100 Subject: [PATCH 08/46] Refactors --- flash/audio/speech_recognition/collate.py | 73 +++++++++++++++++++++++ flash/audio/speech_recognition/data.py | 67 +-------------------- 2 files changed, 74 insertions(+), 66 deletions(-) create mode 100644 flash/audio/speech_recognition/collate.py diff --git a/flash/audio/speech_recognition/collate.py b/flash/audio/speech_recognition/collate.py new file mode 100644 index 0000000000..6a68571ef5 --- /dev/null +++ b/flash/audio/speech_recognition/collate.py @@ -0,0 +1,73 @@ +from dataclasses import dataclass +from typing import Dict, List, Optional, Union + +import torch + +from flash.core.utilities.imports import _TEXT_AVAILABLE + +if _TEXT_AVAILABLE: + from transformers import Wav2Vec2Processor + + +@dataclass +class DataCollatorCTCWithPadding: + """ + Data collator that will dynamically pad the inputs received. + Args: + processor (:class:`~transformers.Wav2Vec2Processor`) + The processor used for proccessing the data. + padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, + `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned sequences (according to the model's padding side and padding index) + among: + * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single + sequence if provided). + * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the + maximum acceptable input length for the model if that argument is not provided. + * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of + different lengths). + max_length (:obj:`int`, `optional`): + Maximum length of the ``input_values`` of the returned list and optionally padding length (see above). + max_length_labels (:obj:`int`, `optional`): + Maximum length of the ``labels`` returned list and optionally padding length (see above). + pad_to_multiple_of (:obj:`int`, `optional`): + If set will pad the sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + """ + + processor: Wav2Vec2Processor + padding: Union[bool, str] = True + max_length: Optional[int] = None + max_length_labels: Optional[int] = None + pad_to_multiple_of: Optional[int] = None + pad_to_multiple_of_labels: Optional[int] = None + + def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]: + # split inputs and labels since they have to be of different lengths and need + # different padding methods + input_features = [{"input_values": feature["input_values"]} for feature in features] + label_features = [{"input_ids": feature["labels"]} for feature in features] + + batch = self.processor.pad( + input_features, + padding=self.padding, + max_length=self.max_length, + pad_to_multiple_of=self.pad_to_multiple_of, + return_tensors="pt", + ) + with self.processor.as_target_processor(): + labels_batch = self.processor.pad( + label_features, + padding=self.padding, + max_length=self.max_length_labels, + pad_to_multiple_of=self.pad_to_multiple_of_labels, + return_tensors="pt", + ) + + # replace padding with -100 to ignore loss correctly + labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100) + + batch["labels"] = labels + + return batch diff --git a/flash/audio/speech_recognition/data.py b/flash/audio/speech_recognition/data.py index a852684beb..be1a3c6369 100644 --- a/flash/audio/speech_recognition/data.py +++ b/flash/audio/speech_recognition/data.py @@ -12,16 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from dataclasses import dataclass from functools import partial from typing import Any, Callable, Dict, List, Mapping, Optional, Sequence, Tuple, Union import datasets import pandas as pd import soundfile as sf -import torch from torch import Tensor +from flash.audio.speech_recognition.collate import DataCollatorCTCWithPadding from flash.core.data.auto_dataset import AutoDataset from flash.core.data.data_module import DataModule from flash.core.data.data_source import DataSource, DefaultDataSources @@ -95,70 +94,6 @@ def predict_load_data(self, data: Any, dataset: AutoDataset): return self.load_data(data, dataset, columns=["input_values"]) -@dataclass -class DataCollatorCTCWithPadding: - """ - Data collator that will dynamically pad the inputs received. - Args: - processor (:class:`~transformers.Wav2Vec2Processor`) - The processor used for proccessing the data. - padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, - `optional`, defaults to :obj:`True`): - Select a strategy to pad the returned sequences (according to the model's padding side and padding index) - among: - * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single - sequence if provided). - * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the - maximum acceptable input length for the model if that argument is not provided. - * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of - different lengths). - max_length (:obj:`int`, `optional`): - Maximum length of the ``input_values`` of the returned list and optionally padding length (see above). - max_length_labels (:obj:`int`, `optional`): - Maximum length of the ``labels`` returned list and optionally padding length (see above). - pad_to_multiple_of (:obj:`int`, `optional`): - If set will pad the sequence to a multiple of the provided value. - This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= - 7.5 (Volta). - """ - - processor: Wav2Vec2Processor - padding: Union[bool, str] = True - max_length: Optional[int] = None - max_length_labels: Optional[int] = None - pad_to_multiple_of: Optional[int] = None - pad_to_multiple_of_labels: Optional[int] = None - - def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]: - # split inputs and labels since they have to be of different lengths and need - # different padding methods - input_features = [{"input_values": feature["input_values"]} for feature in features] - label_features = [{"input_ids": feature["labels"]} for feature in features] - - batch = self.processor.pad( - input_features, - padding=self.padding, - max_length=self.max_length, - pad_to_multiple_of=self.pad_to_multiple_of, - return_tensors="pt", - ) - with self.processor.as_target_processor(): - labels_batch = self.processor.pad( - label_features, - padding=self.padding, - max_length=self.max_length_labels, - pad_to_multiple_of=self.pad_to_multiple_of_labels, - return_tensors="pt", - ) - - # replace padding with -100 to ignore loss correctly - labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100) - - batch["labels"] = labels - - return batch - - class SpeechRecognitionPreprocess(Preprocess): def __init__( From e8664d6e94f10e656ebd3afaaa2c28d26589b355 Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Thu, 15 Jul 2021 12:05:12 +0100 Subject: [PATCH 09/46] Cleanups --- flash/audio/speech_recognition/data.py | 25 +++++-------------------- flash_examples/speech_recognition.py | 2 +- 2 files changed, 6 insertions(+), 21 deletions(-) diff --git a/flash/audio/speech_recognition/data.py b/flash/audio/speech_recognition/data.py index be1a3c6369..dc60b2118f 100644 --- a/flash/audio/speech_recognition/data.py +++ b/flash/audio/speech_recognition/data.py @@ -34,11 +34,10 @@ class AudioDeserializer(Deserializer): - def __init__(self, backbone: str, max_length: int): + def __init__(self, backbone: str): super().__init__() self.backbone = backbone self.tokenizer = Wav2Vec2CTCTokenizer.from_pretrained(backbone) - self.max_length = max_length def deserialize(self, sample: Any) -> Tensor: return self.tokenizer(sample["speech"], sampling_rate=sample["sampling_rate"][0]).input_values @@ -49,7 +48,7 @@ def example_input(self) -> str: def __getstate__(self): # TODO: Find out why this is being pickled state = self.__dict__.copy() - state.pop("processor") + state.pop("tokenizer") return state def __setstate__(self, state): @@ -59,12 +58,10 @@ def __setstate__(self, state): class AudioDataSource(DataSource): - def __init__(self, backbone: str, max_length: int = 128): + def __init__(self, backbone: str): super().__init__() self.backbone = backbone - self.processor = Wav2Vec2Processor.from_pretrained(backbone) - self.max_length = max_length def load_data( self, @@ -81,15 +78,6 @@ def load_data( dataset_dict = load_dataset("timit_asr") # todo return dataset_dict[stage] - def __getstate__(self): # TODO: Find out why this is being pickled - state = self.__dict__.copy() - state.pop("tokenizer") - return state - - def __setstate__(self, state): - self.__dict__.update(state) - self.tokenizer = Wav2Vec2Processor.from_pretrained(self.backbone) - def predict_load_data(self, data: Any, dataset: AutoDataset): return self.load_data(data, dataset, columns=["input_values"]) @@ -103,10 +91,8 @@ def __init__( test_transform: Optional[Dict[str, Callable]] = None, predict_transform: Optional[Dict[str, Callable]] = None, backbone: str = "facebook/wav2vec2-base-960h", - max_length: int = 128, ): self.backbone = backbone - self.max_length = max_length super().__init__( train_transform=train_transform, @@ -114,10 +100,10 @@ def __init__( test_transform=test_transform, predict_transform=predict_transform, data_sources={ - "timit": AudioDataSource(self.backbone, max_length=max_length), + "timit": AudioDataSource(self.backbone), }, default_data_source=DefaultDataSources.DATASET, - deserializer=AudioDeserializer(backbone, max_length), + deserializer=AudioDeserializer(backbone), ) self.processor = Wav2Vec2Processor.from_pretrained(backbone) self.collator = DataCollatorCTCWithPadding(processor=self.processor, padding=True) @@ -126,7 +112,6 @@ def get_state_dict(self) -> Dict[str, Any]: return { **self.transforms, "backbone": self.backbone, - "max_length": self.max_length, } @classmethod diff --git a/flash_examples/speech_recognition.py b/flash_examples/speech_recognition.py index 0b16d71d9f..db28f5091e 100644 --- a/flash_examples/speech_recognition.py +++ b/flash_examples/speech_recognition.py @@ -21,5 +21,5 @@ # 3. Create the trainer and finetune the model trainer = flash.Trainer(max_epochs=1, gpus=1, profiler='simple', limit_train_batches=100) -trainer.finetune(model, strategy='no_freeze') +trainer.finetune(model, datamodule=datamodule, strategy='no_freeze') trainer.test(model, datamodule=datamodule) From 6d0f1c3af21d75e5d72fd3d51007bd330425c5fd Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Thu, 15 Jul 2021 12:40:29 +0100 Subject: [PATCH 10/46] Refactor to allow files --- flash/audio/speech_recognition/data.py | 108 ++++++++++++++++++++++--- flash_examples/speech_recognition.py | 2 +- 2 files changed, 100 insertions(+), 10 deletions(-) diff --git a/flash/audio/speech_recognition/data.py b/flash/audio/speech_recognition/data.py index dc60b2118f..01bdcb79da 100644 --- a/flash/audio/speech_recognition/data.py +++ b/flash/audio/speech_recognition/data.py @@ -19,9 +19,11 @@ import pandas as pd import soundfile as sf from torch import Tensor +from torch.utils.data import Sampler from flash.audio.speech_recognition.collate import DataCollatorCTCWithPadding from flash.core.data.auto_dataset import AutoDataset +from flash.core.data.callback import BaseDataFetcher from flash.core.data.data_module import DataModule from flash.core.data.data_source import DataSource, DefaultDataSources from flash.core.data.process import Deserializer, Postprocess, Preprocess @@ -58,30 +60,53 @@ def __setstate__(self, state): class AudioDataSource(DataSource): - def __init__(self, backbone: str): + def __init__(self, backbone: str, filetype: Optional[str] = None): super().__init__() - + self.filetype = filetype self.backbone = backbone def load_data( self, - data: Tuple[str, Union[str, List[str]], Union[str, List[str]]], + file: Tuple[str, Union[str, List[str]], Union[str, List[str]]], dataset: Optional[Any] = None, columns: Union[List[str], Tuple[str]] = ("input_values", "labels"), ) -> Union[Sequence[Mapping[str, Any]]]: - # file, input, target = data - # - # data_files = {} stage = self.running_stage.value - # data_files[stage] = str(file) + dataset_dict = load_dataset(self.filetype, data_files={stage: str(file)}) + return dataset_dict[stage] + + def predict_load_data(self, data: Any, dataset: AutoDataset): + return self.load_data(data, dataset, columns=["input_values"]) + + +class TimitDataSource(AudioDataSource): - dataset_dict = load_dataset("timit_asr") # todo + def load_data( + self, + data: Tuple[str, Union[str, List[str]], Union[str, List[str]]], + dataset: Optional[Any] = None, + columns: Union[List[str], Tuple[str]] = ("input_values", "labels"), + ) -> Union[Sequence[Mapping[str, Any]]]: + stage = self.running_stage.value + dataset_dict = load_dataset("timit_asr") return dataset_dict[stage] def predict_load_data(self, data: Any, dataset: AutoDataset): return self.load_data(data, dataset, columns=["input_values"]) +class AudioCSVDataSource(AudioDataSource): + + def __init__(self, backbone: str): + super().__init__(backbone, filetype='csv') + + +class AudioJSONDataSource(AudioDataSource): + + def __init__(self, backbone: str): + super().__init__(backbone, filetype='json') + + class SpeechRecognitionPreprocess(Preprocess): def __init__( @@ -100,7 +125,9 @@ def __init__( test_transform=test_transform, predict_transform=predict_transform, data_sources={ - "timit": AudioDataSource(self.backbone), + DefaultDataSources.CSV: AudioCSVDataSource(self.backbone), + DefaultDataSources.JSON: AudioJSONDataSource(self.backbone), + "timit": TimitDataSource(self.backbone), }, default_data_source=DefaultDataSources.DATASET, deserializer=AudioDeserializer(backbone), @@ -177,3 +204,66 @@ class SpeechRecognitionData(DataModule): preprocess_cls = SpeechRecognitionPreprocess postprocess_cls = SpeechRecognitionPostprocess + + @classmethod + def from_timit( + cls, + train_transform: Optional[Dict[str, Callable]] = None, + val_transform: Optional[Dict[str, Callable]] = None, + test_transform: Optional[Dict[str, Callable]] = None, + predict_transform: Optional[Dict[str, Callable]] = None, + data_fetcher: Optional[BaseDataFetcher] = None, + preprocess: Optional[Preprocess] = None, + val_split: Optional[float] = None, + batch_size: int = 4, + num_workers: Optional[int] = None, + sampler: Optional[Sampler] = None, + **preprocess_kwargs: Any, + ) -> 'DataModule': + """Creates a :class:`~flash.audio.speech_recognition.data.SpeechRecognitionData` object loading + the TIMIT labeled transcription corpus: https://catalog.ldc.upenn.edu/LDC93S1 + + Args: + train_transform: The dictionary of transforms to use during training which maps + :class:`~flash.core.data.process.Preprocess` hook names to callable transforms. + val_transform: The dictionary of transforms to use during validation which maps + :class:`~flash.core.data.process.Preprocess` hook names to callable transforms. + test_transform: The dictionary of transforms to use during testing which maps + :class:`~flash.core.data.process.Preprocess` hook names to callable transforms. + predict_transform: The dictionary of transforms to use during predicting which maps + :class:`~flash.core.data.process.Preprocess` hook names to callable transforms. + data_fetcher: The :class:`~flash.core.data.callback.BaseDataFetcher` to pass to the + :class:`~flash.core.data.data_module.DataModule`. + preprocess: The :class:`~flash.core.data.data.Preprocess` to pass to the + :class:`~flash.core.data.data_module.DataModule`. If ``None``, ``cls.preprocess_cls`` + will be constructed and used. + val_split: The ``val_split`` argument to pass to the :class:`~flash.core.data.data_module.DataModule`. + batch_size: The ``batch_size`` argument to pass to the :class:`~flash.core.data.data_module.DataModule`. + num_workers: The ``num_workers`` argument to pass to the :class:`~flash.core.data.data_module.DataModule`. + sampler: The ``sampler`` argument to pass to the :class:`~flash.core.data.data_module.DataModule`. + preprocess_kwargs: Additional keyword arguments to use when constructing the preprocess. Will only be used + if ``preprocess = None``. + + Returns: + The constructed TIMIT data module. + + Examples:: + + data_module = SpeechRecognitionData.from_timit() + """ + return cls.from_data_source( + "timit", + train_data="train", + test_data="test", + train_transform=train_transform, + val_transform=val_transform, + test_transform=test_transform, + predict_transform=predict_transform, + data_fetcher=data_fetcher, + preprocess=preprocess, + val_split=val_split, + batch_size=batch_size, + num_workers=num_workers, + sampler=sampler, + **preprocess_kwargs, + ) diff --git a/flash_examples/speech_recognition.py b/flash_examples/speech_recognition.py index db28f5091e..0d936401b3 100644 --- a/flash_examples/speech_recognition.py +++ b/flash_examples/speech_recognition.py @@ -14,7 +14,7 @@ import flash from flash.audio.speech_recognition import SpeechRecognition, SpeechRecognitionData -datamodule = SpeechRecognitionData.from_data_source("timit", test_data='test', train_data='train', num_workers=4) +datamodule = SpeechRecognitionData.from_timit(num_workers=4) # 2. Build the task model = SpeechRecognition() From a9735b2264c0a132f8482f7424851edca5d66cec Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Thu, 15 Jul 2021 18:14:33 +0100 Subject: [PATCH 11/46] Get predictions working --- flash/audio/speech_recognition/collate.py | 28 ++++++----- flash/audio/speech_recognition/data.py | 60 ++++++++++++++++------- flash/audio/speech_recognition/model.py | 3 ++ flash/core/utilities/imports.py | 1 + flash_examples/speech_recognition.py | 23 +++++++-- 5 files changed, 80 insertions(+), 35 deletions(-) diff --git a/flash/audio/speech_recognition/collate.py b/flash/audio/speech_recognition/collate.py index 6a68571ef5..aa046e38df 100644 --- a/flash/audio/speech_recognition/collate.py +++ b/flash/audio/speech_recognition/collate.py @@ -47,7 +47,6 @@ def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> # split inputs and labels since they have to be of different lengths and need # different padding methods input_features = [{"input_values": feature["input_values"]} for feature in features] - label_features = [{"input_ids": feature["labels"]} for feature in features] batch = self.processor.pad( input_features, @@ -56,18 +55,23 @@ def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> pad_to_multiple_of=self.pad_to_multiple_of, return_tensors="pt", ) - with self.processor.as_target_processor(): - labels_batch = self.processor.pad( - label_features, - padding=self.padding, - max_length=self.max_length_labels, - pad_to_multiple_of=self.pad_to_multiple_of_labels, - return_tensors="pt", - ) - # replace padding with -100 to ignore loss correctly - labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100) + label_features = [{"input_ids": feature.get("labels")} for feature in features] + # check to ensure labels exist to collate + labels_exist = not any(x['input_ids'] is None for x in label_features) + if labels_exist: + with self.processor.as_target_processor(): + labels_batch = self.processor.pad( + label_features, + padding=self.padding, + max_length=self.max_length_labels, + pad_to_multiple_of=self.pad_to_multiple_of_labels, + return_tensors="pt", + ) - batch["labels"] = labels + # replace padding with -100 to ignore loss correctly + labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100) + + batch["labels"] = labels return batch diff --git a/flash/audio/speech_recognition/data.py b/flash/audio/speech_recognition/data.py index 01bdcb79da..0f98e865cd 100644 --- a/flash/audio/speech_recognition/data.py +++ b/flash/audio/speech_recognition/data.py @@ -18,6 +18,7 @@ import datasets import pandas as pd import soundfile as sf +import torch from torch import Tensor from torch.utils.data import Sampler @@ -27,14 +28,14 @@ from flash.core.data.data_module import DataModule from flash.core.data.data_source import DataSource, DefaultDataSources from flash.core.data.process import Deserializer, Postprocess, Preprocess -from flash.core.utilities.imports import _TEXT_AVAILABLE +from flash.core.utilities.imports import _SPEECH_RECOGNITION_AVAILABLE -if _TEXT_AVAILABLE: +if _SPEECH_RECOGNITION_AVAILABLE: from datasets import Dataset, load_dataset from transformers import Wav2Vec2CTCTokenizer, Wav2Vec2Processor -class AudioDeserializer(Deserializer): +class SpeechRecognitionDeserializer(Deserializer): def __init__(self, backbone: str): super().__init__() @@ -58,7 +59,7 @@ def __setstate__(self, state): self.tokenizer = Wav2Vec2CTCTokenizer.from_pretrained(self.backbone) -class AudioDataSource(DataSource): +class SpeechRecognitionDataSource(DataSource): def __init__(self, backbone: str, filetype: Optional[str] = None): super().__init__() @@ -67,46 +68,62 @@ def __init__(self, backbone: str, filetype: Optional[str] = None): def load_data( self, - file: Tuple[str, Union[str, List[str]], Union[str, List[str]]], + data: Tuple[str, Union[str, List[str]], Union[str, List[str]]], dataset: Optional[Any] = None, - columns: Union[List[str], Tuple[str]] = ("input_values", "labels"), ) -> Union[Sequence[Mapping[str, Any]]]: + file, input, target = data stage = self.running_stage.value dataset_dict = load_dataset(self.filetype, data_files={stage: str(file)}) return dataset_dict[stage] def predict_load_data(self, data: Any, dataset: AutoDataset): - return self.load_data(data, dataset, columns=["input_values"]) + return self.load_data(data, dataset) -class TimitDataSource(AudioDataSource): +class TimitDataSource(SpeechRecognitionDataSource): def load_data( self, data: Tuple[str, Union[str, List[str]], Union[str, List[str]]], dataset: Optional[Any] = None, - columns: Union[List[str], Tuple[str]] = ("input_values", "labels"), ) -> Union[Sequence[Mapping[str, Any]]]: stage = self.running_stage.value dataset_dict = load_dataset("timit_asr") return dataset_dict[stage] def predict_load_data(self, data: Any, dataset: AutoDataset): - return self.load_data(data, dataset, columns=["input_values"]) + return self.load_data(data, dataset) -class AudioCSVDataSource(AudioDataSource): +class SpeechRecognitionCSVDataSource(SpeechRecognitionDataSource): def __init__(self, backbone: str): super().__init__(backbone, filetype='csv') -class AudioJSONDataSource(AudioDataSource): +class SpeechRecognitionJSONDataSource(SpeechRecognitionDataSource): def __init__(self, backbone: str): super().__init__(backbone, filetype='json') +class SpeechRecognitionFilesSource(DataSource): + + def __init__(self, backbone: str, filetype: Optional[str] = None): + super().__init__() + self.filetype = filetype + self.backbone = backbone + + def load_data( + self, + files: Tuple[str, Union[str, List[str]], Union[str, List[str]]], + dataset: Optional[Any] = None, + ) -> Union[Sequence[Mapping[str, Any]]]: + if isinstance(files, str): + files = [files] + return [dict(file=file) for file in files] + + class SpeechRecognitionPreprocess(Preprocess): def __init__( @@ -125,12 +142,13 @@ def __init__( test_transform=test_transform, predict_transform=predict_transform, data_sources={ - DefaultDataSources.CSV: AudioCSVDataSource(self.backbone), - DefaultDataSources.JSON: AudioJSONDataSource(self.backbone), + DefaultDataSources.CSV: SpeechRecognitionCSVDataSource(self.backbone), + DefaultDataSources.JSON: SpeechRecognitionJSONDataSource(self.backbone), "timit": TimitDataSource(self.backbone), + DefaultDataSources.FILES: SpeechRecognitionFilesSource(self.backbone) }, - default_data_source=DefaultDataSources.DATASET, - deserializer=AudioDeserializer(backbone), + default_data_source=DefaultDataSources.FILES, + deserializer=SpeechRecognitionDeserializer(backbone), ) self.processor = Wav2Vec2Processor.from_pretrained(backbone) self.collator = DataCollatorCTCWithPadding(processor=self.processor, padding=True) @@ -169,7 +187,9 @@ def _speech_file_to_array_fn(self, batch: Any) -> Any: def _convert_to_batch(self, batch: Any) -> Dataset: self._disable_tqdm_bars() batch = Dataset.from_pandas(pd.DataFrame(batch)) - columns = ["input_values", "labels"] + columns = ["input_values"] + if not self.predicting: + columns += ["labels"] batch = batch.map(partial(self._speech_file_to_array_fn)) batch = batch.map(partial(self._prepare_dataset), batched=True) batch.set_format("torch", columns=columns) @@ -195,8 +215,10 @@ def __init__( self.tokenizer = Wav2Vec2CTCTokenizer.from_pretrained(backbone) def per_batch_transform(self, batch: Any) -> Any: - transcription = self.tokenizer.batch_decode(batch)[0] - return transcription + # converts logits into greedy transcription + pred_ids = torch.argmax(batch.logits, dim=-1) + transcriptions = self.tokenizer.batch_decode(pred_ids) + return transcriptions class SpeechRecognitionData(DataModule): diff --git a/flash/audio/speech_recognition/model.py b/flash/audio/speech_recognition/model.py index cc3a3ca513..bf3e47eb4e 100644 --- a/flash/audio/speech_recognition/model.py +++ b/flash/audio/speech_recognition/model.py @@ -54,6 +54,9 @@ def __init__( def forward(self, batch: Dict[str, torch.Tensor]): return self.model(batch["input_values"]) + def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: int = 0) -> Any: + return self(batch) + def step(self, batch: Any, batch_idx: int, metrics: nn.ModuleDict) -> Any: out = self.model(batch["input_values"], labels=batch["labels"]) out["logs"] = {'loss': out.loss} diff --git a/flash/core/utilities/imports.py b/flash/core/utilities/imports.py index f5298e9d8f..11ec14564e 100644 --- a/flash/core/utilities/imports.py +++ b/flash/core/utilities/imports.py @@ -90,6 +90,7 @@ def _compare_version(package: str, op, version) -> bool: _TORCHVISION_GREATER_EQUAL_0_9 = _compare_version("torchvision", operator.ge, "0.9.0") _TEXT_AVAILABLE = _TRANSFORMERS_AVAILABLE +_SPEECH_RECOGNITION_AVAILABLE = _TRANSFORMERS_AVAILABLE _TABULAR_AVAILABLE = _TABNET_AVAILABLE and _PANDAS_AVAILABLE _VIDEO_AVAILABLE = _PYTORCHVIDEO_AVAILABLE _IMAGE_AVAILABLE = all([ diff --git a/flash_examples/speech_recognition.py b/flash_examples/speech_recognition.py index 0d936401b3..cf3bdda04b 100644 --- a/flash_examples/speech_recognition.py +++ b/flash_examples/speech_recognition.py @@ -14,12 +14,27 @@ import flash from flash.audio.speech_recognition import SpeechRecognition, SpeechRecognitionData -datamodule = SpeechRecognitionData.from_timit(num_workers=4) +# # 1. Create the DataModule +# download_data("https://pl-flash-data.s3.amazonaws.com/timit_data.zip", "./data") +# +datamodule = SpeechRecognitionData.from_json( + input_fields="file", + target_fields="text", + train_file="data/train.json", + test_file="data/test.json", + num_workers=4, +) # 2. Build the task -model = SpeechRecognition() +model = SpeechRecognition(learning_rate=1e-5) # 3. Create the trainer and finetune the model -trainer = flash.Trainer(max_epochs=1, gpus=1, profiler='simple', limit_train_batches=100) +trainer = flash.Trainer(max_epochs=1, gpus=1) trainer.finetune(model, datamodule=datamodule, strategy='no_freeze') -trainer.test(model, datamodule=datamodule) + +# 4. Predict on audio files! +predictions = model.predict(["data/example.wav"]) +print(predictions) + +# 5. Save the model! +trainer.save_checkpoint("speech_recognition_model.pt") From 0901d120bd63feb5ccf344deef99bbe714bd67d4 Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Thu, 15 Jul 2021 18:15:33 +0100 Subject: [PATCH 12/46] Add licence --- flash/audio/speech_recognition/collate.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/flash/audio/speech_recognition/collate.py b/flash/audio/speech_recognition/collate.py index aa046e38df..9e6dcd8742 100644 --- a/flash/audio/speech_recognition/collate.py +++ b/flash/audio/speech_recognition/collate.py @@ -1,3 +1,16 @@ +# Copyright 2020 The PyTorch Lightning team and The HuggingFace Team. All rights reserved. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from dataclasses import dataclass from typing import Dict, List, Optional, Union From 1f18f055b63a40f4324e5be1a90570dbbfa38d2f Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Thu, 15 Jul 2021 18:16:47 +0100 Subject: [PATCH 13/46] Fix loads --- flash/audio/speech_recognition/collate.py | 4 ++-- flash/audio/speech_recognition/model.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/flash/audio/speech_recognition/collate.py b/flash/audio/speech_recognition/collate.py index 9e6dcd8742..cdb30bea24 100644 --- a/flash/audio/speech_recognition/collate.py +++ b/flash/audio/speech_recognition/collate.py @@ -16,9 +16,9 @@ import torch -from flash.core.utilities.imports import _TEXT_AVAILABLE +from flash.core.utilities.imports import _SPEECH_RECOGNITION_AVAILABLE -if _TEXT_AVAILABLE: +if _SPEECH_RECOGNITION_AVAILABLE: from transformers import Wav2Vec2Processor diff --git a/flash/audio/speech_recognition/model.py b/flash/audio/speech_recognition/model.py index bf3e47eb4e..8d1b491a76 100644 --- a/flash/audio/speech_recognition/model.py +++ b/flash/audio/speech_recognition/model.py @@ -20,9 +20,9 @@ from flash import Task from flash.core.data.process import Serializer -from flash.core.utilities.imports import _TEXT_AVAILABLE +from flash.core.utilities.imports import _SPEECH_RECOGNITION_AVAILABLE -if _TEXT_AVAILABLE: +if _SPEECH_RECOGNITION_AVAILABLE: from transformers import Wav2Vec2ForCTC From 71cb06dacfe547ababecee8ab102e39d7d5a0c05 Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Thu, 15 Jul 2021 18:19:35 +0100 Subject: [PATCH 14/46] Add check --- flash/audio/speech_recognition/data.py | 2 +- flash/core/utilities/imports.py | 3 ++- requirements/datatype_audio.txt | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/flash/audio/speech_recognition/data.py b/flash/audio/speech_recognition/data.py index 0f98e865cd..d712ac6ca0 100644 --- a/flash/audio/speech_recognition/data.py +++ b/flash/audio/speech_recognition/data.py @@ -17,7 +17,6 @@ import datasets import pandas as pd -import soundfile as sf import torch from torch import Tensor from torch.utils.data import Sampler @@ -31,6 +30,7 @@ from flash.core.utilities.imports import _SPEECH_RECOGNITION_AVAILABLE if _SPEECH_RECOGNITION_AVAILABLE: + import soundfile as sf from datasets import Dataset, load_dataset from transformers import Wav2Vec2CTCTokenizer, Wav2Vec2Processor diff --git a/flash/core/utilities/imports.py b/flash/core/utilities/imports.py index 5ed01af80f..92bc5e23a9 100644 --- a/flash/core/utilities/imports.py +++ b/flash/core/utilities/imports.py @@ -86,6 +86,7 @@ def _compare_version(package: str, op, version) -> bool: _OPEN3D_AVAILABLE = _module_available("open3d") _ASTEROID_AVAILABLE = _module_available("asteroid") _SEGMENTATION_MODELS_AVAILABLE = _module_available("segmentation_models_pytorch") +_SOUNDFILE_AVAILABLE = _module_available("soundfile") _TORCH_SCATTER_AVAILABLE = _module_available("torch_scatter") _TORCH_SPARSE_AVAILABLE = _module_available("torch_sparse") _TORCH_GEOMETRIC_AVAILABLE = _module_available("torch_geometric") @@ -94,7 +95,7 @@ def _compare_version(package: str, op, version) -> bool: _TORCHVISION_GREATER_EQUAL_0_9 = _compare_version("torchvision", operator.ge, "0.9.0") _TEXT_AVAILABLE = _TRANSFORMERS_AVAILABLE -_SPEECH_RECOGNITION_AVAILABLE = _TRANSFORMERS_AVAILABLE +_SPEECH_RECOGNITION_AVAILABLE = _TRANSFORMERS_AVAILABLE and _SOUNDFILE_AVAILABLE _TABULAR_AVAILABLE = _TABNET_AVAILABLE and _PANDAS_AVAILABLE _VIDEO_AVAILABLE = _PYTORCHVIDEO_AVAILABLE _IMAGE_AVAILABLE = all([ diff --git a/requirements/datatype_audio.txt b/requirements/datatype_audio.txt index 03c90d99ec..dd60967ca6 100644 --- a/requirements/datatype_audio.txt +++ b/requirements/datatype_audio.txt @@ -1 +1,2 @@ asteroid>=0.5.1 +soundfile>=0.10.2 From 50642f581873921d57954b374c307b589d7c0cbf Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Thu, 15 Jul 2021 18:21:43 +0100 Subject: [PATCH 15/46] Fix imports --- flash/audio/__init__.py | 12 +----------- flash_examples/speech_recognition.py | 2 +- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/flash/audio/__init__.py b/flash/audio/__init__.py index 23786d11f3..e963de8164 100644 --- a/flash/audio/__init__.py +++ b/flash/audio/__init__.py @@ -1,11 +1 @@ -from flash.text.classification import TextClassificationData, TextClassifier # noqa: F401 -from flash.text.seq2seq import ( # noqa: F401 - QuestionAnsweringData, - QuestionAnsweringTask, - Seq2SeqData, - Seq2SeqTask, - SummarizationData, - SummarizationTask, - TranslationData, - TranslationTask, -) +from flash.audio.speech_recognition import SpeechRecognition, SpeechRecognitionData # noqa: F401 diff --git a/flash_examples/speech_recognition.py b/flash_examples/speech_recognition.py index cf3bdda04b..a8d528af57 100644 --- a/flash_examples/speech_recognition.py +++ b/flash_examples/speech_recognition.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import flash -from flash.audio.speech_recognition import SpeechRecognition, SpeechRecognitionData +from flash.audio import SpeechRecognition, SpeechRecognitionData # # 1. Create the DataModule # download_data("https://pl-flash-data.s3.amazonaws.com/timit_data.zip", "./data") From d2719510462a1f943893a175e09deeb4f38b9050 Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Fri, 16 Jul 2021 10:34:19 +0100 Subject: [PATCH 16/46] Cleanups --- flash/audio/speech_recognition/data.py | 32 +++++++++++++++++++------- flash/core/utilities/imports.py | 1 + requirements/datatype_audio.txt | 1 - requirements/datatype_speech.txt | 3 +++ 4 files changed, 28 insertions(+), 9 deletions(-) create mode 100644 requirements/datatype_speech.txt diff --git a/flash/audio/speech_recognition/data.py b/flash/audio/speech_recognition/data.py index d712ac6ca0..620602d8b7 100644 --- a/flash/audio/speech_recognition/data.py +++ b/flash/audio/speech_recognition/data.py @@ -25,25 +25,32 @@ from flash.core.data.auto_dataset import AutoDataset from flash.core.data.callback import BaseDataFetcher from flash.core.data.data_module import DataModule -from flash.core.data.data_source import DataSource, DefaultDataSources +from flash.core.data.data_source import DataSource, DefaultDataKeys, DefaultDataSources from flash.core.data.process import Deserializer, Postprocess, Preprocess -from flash.core.utilities.imports import _SPEECH_RECOGNITION_AVAILABLE +from flash.core.utilities.imports import _SPEECH_RECOGNITION_AVAILABLE, requires_extras if _SPEECH_RECOGNITION_AVAILABLE: import soundfile as sf from datasets import Dataset, load_dataset from transformers import Wav2Vec2CTCTokenizer, Wav2Vec2Processor +INPUT_FIELD = "file" +TARGET_FIELD = "text" + class SpeechRecognitionDeserializer(Deserializer): + @requires_extras("speech") def __init__(self, backbone: str): super().__init__() self.backbone = backbone self.tokenizer = Wav2Vec2CTCTokenizer.from_pretrained(backbone) - def deserialize(self, sample: Any) -> Tensor: - return self.tokenizer(sample["speech"], sampling_rate=sample["sampling_rate"][0]).input_values + def deserialize(self, sample: Any) -> Dict: + return { + DefaultDataKeys.INPUT: self.tokenizer(sample["speech"], + sampling_rate=sample["sampling_rate"][0]).input_values, + } @property def example_input(self) -> str: @@ -71,9 +78,16 @@ def load_data( data: Tuple[str, Union[str, List[str]], Union[str, List[str]]], dataset: Optional[Any] = None, ) -> Union[Sequence[Mapping[str, Any]]]: - file, input, target = data + if self.filetype == 'json': + file, input, target, field = data + else: + file, input, target = data stage = self.running_stage.value dataset_dict = load_dataset(self.filetype, data_files={stage: str(file)}) + if input != INPUT_FIELD: + dataset_dict.rename_column_(input, INPUT_FIELD) + if target != TARGET_FIELD: + dataset_dict.rename_column_(target, TARGET_FIELD) return dataset_dict[stage] def predict_load_data(self, data: Any, dataset: AutoDataset): @@ -121,11 +135,12 @@ def load_data( ) -> Union[Sequence[Mapping[str, Any]]]: if isinstance(files, str): files = [files] - return [dict(file=file) for file in files] + return [{INPUT_FIELD: file} for file in files] class SpeechRecognitionPreprocess(Preprocess): + @requires_extras("speech") def __init__( self, train_transform: Optional[Dict[str, Callable]] = None, @@ -177,11 +192,11 @@ def _prepare_dataset(self, batch: Any) -> Any: return batch def _speech_file_to_array_fn(self, batch: Any) -> Any: - speech_array, sampling_rate = sf.read(batch["file"]) + speech_array, sampling_rate = sf.read(batch[INPUT_FIELD]) batch["speech"] = speech_array batch["sampling_rate"] = sampling_rate if not self.predicting: - batch["target_text"] = batch["text"] + batch["target_text"] = batch[TARGET_FIELD] return batch def _convert_to_batch(self, batch: Any) -> Dataset: @@ -206,6 +221,7 @@ def collate(self, samples: Any) -> Tensor: class SpeechRecognitionPostprocess(Postprocess): + @requires_extras("speech") def __init__( self, save_path: Optional[str] = None, diff --git a/flash/core/utilities/imports.py b/flash/core/utilities/imports.py index 92bc5e23a9..a02de4023f 100644 --- a/flash/core/utilities/imports.py +++ b/flash/core/utilities/imports.py @@ -120,6 +120,7 @@ def _compare_version(package: str, op, version) -> bool: 'video': _VIDEO_AVAILABLE, 'pointcloud': _POINTCLOUD_AVAILABLE, 'serve': _SERVE_AVAILABLE, + 'speech': _SPEECH_RECOGNITION_AVAILABLE, 'audio': _AUDIO_AVAILABLE, 'graph': _GRAPH_AVAILABLE, } diff --git a/requirements/datatype_audio.txt b/requirements/datatype_audio.txt index dd60967ca6..03c90d99ec 100644 --- a/requirements/datatype_audio.txt +++ b/requirements/datatype_audio.txt @@ -1,2 +1 @@ asteroid>=0.5.1 -soundfile>=0.10.2 diff --git a/requirements/datatype_speech.txt b/requirements/datatype_speech.txt new file mode 100644 index 0000000000..00b7271824 --- /dev/null +++ b/requirements/datatype_speech.txt @@ -0,0 +1,3 @@ +soundfile>=0.10.2 +transformers>=4.5 +datasets>=1.8 From 956ac8ecd662a3b15b322093b9a5d1b2a62b1a3c Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Fri, 16 Jul 2021 11:41:40 +0100 Subject: [PATCH 17/46] Add backbone API --- flash/audio/speech_recognition/backbone.py | 35 ++++++++++++++++++++++ flash/audio/speech_recognition/model.py | 7 ++++- 2 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 flash/audio/speech_recognition/backbone.py diff --git a/flash/audio/speech_recognition/backbone.py b/flash/audio/speech_recognition/backbone.py new file mode 100644 index 0000000000..1aef8419cd --- /dev/null +++ b/flash/audio/speech_recognition/backbone.py @@ -0,0 +1,35 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from functools import partial + +from flash.core.registry import FlashRegistry +from flash.core.utilities.imports import _SPEECH_RECOGNITION_AVAILABLE + +if _SPEECH_RECOGNITION_AVAILABLE: + from transformers import Wav2Vec2ForCTC + +SPEECH_RECOGNITION_BACKBONES = FlashRegistry("backbones") + +WAV2VEC_MODELS = ["facebook/wav2vec2-base-960h", "facebook/wav2vec2-large-960h-lv60"] + + +def _huggingface_from_pretrained(model_name): + return Wav2Vec2ForCTC.from_pretrained(model_name) + + +for model_name in WAV2VEC_MODELS: + SPEECH_RECOGNITION_BACKBONES( + fn=partial(_huggingface_from_pretrained, model_name=model_name), + name=model_name, + ) diff --git a/flash/audio/speech_recognition/model.py b/flash/audio/speech_recognition/model.py index 8d1b491a76..a7b364c74b 100644 --- a/flash/audio/speech_recognition/model.py +++ b/flash/audio/speech_recognition/model.py @@ -19,7 +19,9 @@ import torch.nn as nn from flash import Task +from flash.audio.speech_recognition.backbone import SPEECH_RECOGNITION_BACKBONES from flash.core.data.process import Serializer +from flash.core.registry import FlashRegistry from flash.core.utilities.imports import _SPEECH_RECOGNITION_AVAILABLE if _SPEECH_RECOGNITION_AVAILABLE: @@ -27,6 +29,7 @@ class SpeechRecognition(Task): + backbones: FlashRegistry = SPEECH_RECOGNITION_BACKBONES def __init__( self, @@ -41,8 +44,10 @@ def __init__( warnings.simplefilter("ignore") # set os environ variable for multiprocesses os.environ["PYTHONWARNINGS"] = "ignore" + model = self.backbones.get(backbone + )() if backbone in self.backbones else Wav2Vec2ForCTC.from_pretrained(backbone) super().__init__( - model=Wav2Vec2ForCTC.from_pretrained(backbone), + model=model, loss_fn=loss_fn, optimizer=optimizer, learning_rate=learning_rate, From 6b132f2c83100bc09b45142baf2d53ee2e409458 Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Fri, 16 Jul 2021 12:24:05 +0100 Subject: [PATCH 18/46] Cleanups --- flash/assets/example.wav | Bin 0 -> 108954 bytes flash/audio/speech_recognition/data.py | 28 ++++++------------------- flash_examples/speech_recognition.py | 12 ++++++----- 3 files changed, 13 insertions(+), 27 deletions(-) create mode 100644 flash/assets/example.wav diff --git a/flash/assets/example.wav b/flash/assets/example.wav new file mode 100644 index 0000000000000000000000000000000000000000..8a1d66a36bdf3c54d256f4c61174d5fd3465808e GIT binary patch literal 108954 zcmeFZg`X5h&_CSWvwDVeIETASkPzJ63GVI^9753G4hfz>a3{D03GVLhezr2x-S4+{ zo_q5A5ASd1vv;#IJ>BJ9Rb4#|YBz1xp;+axn3$MiMT?gV>)L5(r_P;*bnDQoYfPab zrDB@Zu3x*^e^-C%Hh4&{0sV2SL`<>5MZ<;-9olVhr~Y03r+t~2E<=VC?J%TMv9K;Z zJN571t#5}e1BUe<8dIoOOtG*bo%#*z+wH$ArAm}7T`KA7;7&ukg_@NvTC`}=#eTgo zRIivqrHg-8@_(;&8bQ~-D_-n>FZJ)xdF0S;L+Enxq-J!bL-zrL`*p(TLyE@~{onD2 z^y<-}bFZPH=EX{fb?e`?L(guVx`xpQWcYud|3`uU|4<-7yP4DmV@%_tG-Cqi_J=ct zYn(Zl5jq^9Gr^q99XfNMtzaCDm8LWLueF6k;-HTT^(@fZiTeir+kMbqXXqOB%|dq= zu2J7MECd0KO&d{pC)Kw(~yJ8bj<>QG%{gk zLvGWBu(u&42xx4=pYXD=A_(aUed!8)1#_XNB>3$AUOeHzmte75F^o*((qUht(dkR= zY}zN$Cv*WL*!SrdwW6nNsOc&D%!xCblGK9EY)EbTQ5zdeq8h#^P5-G^YGdO~ZRw1#qW`vZ(m7H7zj0|ax{}npeTVo$Ep6QFK5cABTInNk zKdJxFg%I@}p(kvP*nQgcr4ekYBT2C3*_H>w!XBS6p)Y-i3&ejDhs0I-5bShrLrQJ` zFSqHA{iKaQ-6cG2%FwemUkvm?=X8hulMLGU5v7QR^o#xy#5Of-%#%hYcx^eiHGxL8 zDQaV2%aSckHuY_OC&87}rwyG=D_hSAv;W2<>f5v-7;Vndvjh$4MbgLao!V+`Q94KzHa^sj?i1G3+NQe=l?{iD1wl=B>5eT&)H7j0@Dm?xdq(Hf#vYG)G%+eY zOIPhLU8T|Nr~dmz`fWo+_vtC>pCF<;Bti5c3etzLwrOWWMg7s|KfGv6o5n;%YGwbY zvF+bBC+Nxl`3UoUr>OOn`C-W`dHqW8-CW@ju!-L%2|{B{~?vFS|ygpDD!vT>k(=(!~5X#^X8;uvv- z+S$@*_fC)!Z*8b;IOvMqi|yyx+_iC_{%tI52yHBquGvr7G$mRP&FP-~vH3&KB~dwv zqDkXYOPks@=l`Ro{Zx`%**z1V2n+hq$ON}76G?PPqBD6PNif>{CAe+s(o+KS7tzMn z=OhXe^maSjLeP`8-$^ZOD%*6Ua~mt0l62J`&!!}`w}0DxC&`J;5xPUq+qz-*Y|E?7 zDVvH!D|(tZK+h9wHYW%V`qCOqSm%-*v}KpNiCA#xBt@Gv`IWmqO#3V!jPaO8rm|Q z#54N}^=m_5%ZZKie-yWM&xVA0rZcihq*dfSMF8&i7++~WVqBUhwT=MJ64z+PB+rui zbm1?Jl@!l;nHN{xEG!h|(tq2RC7zHh(MLg_2v6HO(cEK&{CYdOp*V~1q6n9sNQw_! zp`PdpjZGs`pSA}=Pf~>y*C+)8-d(`j$r4C_C*xJ3?ytaFz z@$G}Slq7>TuZb2E|Jt}xpX71c_!yu?c<6`#b;Cp7@DP`5icqv`>R{+=;eQfNbd@}9 zihXQKo=tVajHpXrd!}%(NEU@QVQ3ox4iV+0_Tb!$EA%n- zHyw}uiSLQHOJ@PJp|RY!9|UCaxW~{3W3gys=RaKNBa$U!$?@ODG914%vh=t{Qbd>q zv;z^D+=~-&vLw%*h8Noxp6rN=48{|2fWncmQO25(X^kI=P;Qs)6re<;afqt#&3C`Z?FFF<4FzC!P^cDrmyu`CE=F|58 zMHayEOuxYO(kvTVzrgc_fo}r-Cu52FE8tGWzGH>hEB#of$4FQ8IUTDrf`SPcqd1G# z&v68CMX*!)sZIkpKk7@MK~+|PrGOsNotwZZ8_S0teR%q_&IC+u0-|tGfwY=NOTk{^ zE@@K)u*m@WB|x)N;8zNq7sS{f(0??`4}RYO7GBUJ8^$^Xp7`}ueFP)q2cAzsm$EE7 zWbCaz3;bgNQ))n!4wUu-Ba+8_?0fX<0+dyjYBwY&8K@+1pJXm0u*kvo zV!V8;31IyPbP(*F&dol9e&6sl*cM1iX0(5(@8Vtp#;d^o)=Tt09mDdlFZy3#;@8PB z#$g<_SQR#1-_gz4Aa+|Xf|Sq42ql5jEuEF+LVtI3MfNke_6ZmTbpe(e@Z1Jvz6Tc9 z!CTUUc;Guh4`e@rzLWJZT^am-jox2jE-T4a>VX*N2UdyA#dDQdOYmljF2icD7Qk^X zIF^PL1hv9J_a-ba+YEj+V+AnIEI|Df9LdJ+>H9h*Fl1GlMrlP-ZfE z`#Gc|CD>g+Fa@m}gUUhBpep))qn|)_&V#;hAq%A;D^=JAKpoVz0Z%w6{Y0mLJavaG zHe)-XZ+TfCScz}?nBE93rCa?s5B%(RmcLL|Cp>=uI zoz-Jmp+Avm{{%e`VPn`odatgh+oIRaY=ypzYhG3n{7DD?X9g|@^&h%18_dq=!FoIH z_F#*3X~40Bjnbn4TT{UKkDjJW0Mpa@qOPusuu-f%D0ohP&vxkFK#fN166?jv=(c(& zU`We)>Ye&$aCDqjY6VNhPw1O!w=T?0)a^#Z+F_h;vT^B7$W6s-rTjRroI*(5fP{i<*3(a?M^s}4%P)~{34Pkd)Nj;H zjNczzUk0phLo;83)=SW922eQ<8xB1?56!Cy8B50du?4!ee#Gj6Tjlj*)t}vkwro*R z;N(+wo;_37RAJtgchM!mn^Jrr`$uimw>d1YmMWT;<8yT!f!ArAj8?2#oDsg(8S$pHrofAQWre!%ns?>DnBa% z>rhk=&>7fKJT(?G!hGEY@cjzDjY3a(^$WF$r)GbsERfMHupBpGW3R$WRplM^J0;lP zEGv&WZ5Cho!C{r36|lcTBkqoY;2z{q6@KWutW{nZ?K5d z^kFtY`Jp==F^XMRezio85@&Q%NcAVsJCb#S)vd_aGOn+wTk5_Zz%%kcbPl}^Sl`Cz z6Ts!$n4ya6%)ou1_(x|`rBp?h*Z7y+Q5zJj9iZ>by}U77r?+ATodS9J9a7j^n=)Ew z6venhpOgQpKD-!OtY(t6VNTs#ZD&(4SI^Uf^hNNlk!q%IvcJ&FF*Q(^<2iU&c3dS_ zYxr{UNS%=*JssuIo1poW;*Qs0?zqG%ogQgKey^i`hs2! zH!+o(A=mM@;))isvmD9~@^Jo4R@CEoI+jBZQlHse{)D&Ee_Df8H4$cvQB1bw{dsxF zN*)%^wt@@iSxIQz4OSL&!FW{!vV9Q}mr+k)t6;_6>nu8qpJyrcKD9x&)Sq}Uo>lKq zQ`C7DFM6@pis>wD8ymq6LQh(-S89*C!Y;t3c4m21epu%IJQM6u9oWoCs){zmURaj} z>W%Kfztaa)em;!X0>|RjalTZ1L%*B#Yn}x&`4!A`BlI?%Ma*GoF?)@3cE{3u+d@PrAwN6_29?rk#+hH*#LWT-JS4T3)iCW3J z^B+|h^mr^k4g35S5WmtX!N)anss53FWewF6d6F0A_jOO%L;c04i9c0S`4E0oWqus? z^EhBR3klkwny~u1IZugMpfKwTeqU7I=`s2VsJ8$(mex^v7WC&O#@(;tVP*2O)H)e( z^72`166T=6{IYJZ{#NUuA50g}ulP)s9WzEx*nk`GgN}m+(X2T4sOgvyYVZ~Mi#n_i z!nzh?`(f=@f_J-hIlY(}e6jkbe9*TMdJlw+C5pl(?)e79o>J1A?(f?~WVqsK$S&U3Cz(D5IhN#~QfAUoZ8a`8g#lEYz@ zKfu#?#b)V@s!<57*<}TV19jzY4Gl#dZNbb z%Hl7UQ*Y2sbY9h3Ok^)rL$!ou;>Y3hRfdN<4j#%|-9tCVT)0?ff}Oa?4OU0JQUR78 zvap){snV!!dK7o@2|6#dXB;c4b3zubL7tkkFX~r)gQtcZ@3B_1`bIh4MyAI+K0st+ zYZd$r?pJAfJ@!&|;sw|&^{>jT2l80hS+}a71FQo-$6Cobx*y*pma4wG6~D+^v!MFO zmhd-vqpGiaiQc@q>?)%<7rk_0)fC=NSGGVE(S3OVaZSIMGgPFZ#6Y!Gc4H+)T6P!m z;ovWMHhmg)>z)2p_r~mB60`M8_PbuMck6q=IZV~ipD|ZYfc2ZCX6m!7KAQtuuoZL4 zKl(jplKSvu7V0IOV$NRr5+JXLIie8!Foxgjp`j(&CRQ05+n!YupZPhtLXLu0(FQuR zfRz@oWOA5{;Ty#+ec1BL4XnQStjejQ2Z?$1mxg>XiJZD{w!X$P(l<)fxU=a^4iaQaJmGol|Lb zWxj-$WgQeeD1Jdq)2q}9<>6i6eIR22&w8RR#x}zDd<@(FJLcFv{3>gTS^YP7IW-^= zu{vC@z`XN=@bbQFs~VxcgWr)%q|i0>cf28=#FWZRat7Zmmc4;?98>vJFTEC4uny+2 z7wG3F&EemC4{tWWPQl~rtuE?*th7;{|DndHy6hQzv^8oHaJr~w>#2GqkB2Xw+iam? zjK7RG`i-?&$Fd(pNxdDE7|0Iupc;)>V!rOGW?}9w#do4#DZ}*^_7nf2XXqC2miMsQ z{4i$gMBRcv#BBc)p6JdNvj~|*EfXIQ@HewMvEGEI#UdM9R^SLHGJ1U*fMyALsdCg`z1OI5&0qZROMEAMK2yuK4{BT zUWAW_*WZRM3VCp|n1UZv7xQvGM17y23$L{Yv+H^FUcEuQl2u3Q{kkz|Hl4L%DY#Qt z!JNMpG4*D|Ip4uUtHN^f=ZKErS?IdV$;;|KtR6i64e)iQ=wI1j%y`{(FL*y)*k$!X zJ=O(zR@PA`Q>*k2L^}qniFjtK?g+^XXOsCx#E=U>hgPgLtHmC(W{6bR0K+kCCUkWG z+s%8y+i${rhy||zrwzP0p120ByaQd>t7hnqTyPDGa6?xYSM>$iSa*X}>W>+(Drg<8 z<5e?Q%~8C*PNPqvM+bioTL`H5Y|Qr+Sw>!l4TIKVsSPxNJ8_YNXWB?jKH^S(XG5gkL%R!U8h~_UL zdoTk$x`=#F3?#q+|H9!TKZgBph9^z}-|5h<8i4MOUJsajfWJE;oOg(eAHt(~0StFw z>^6|S3F!41e6ZJuOOIlt>AEX?d>5kIVK{bzGD{$xeG&JZ!#sZmT)BW~^BQbb8HL1sesRRnmvN=a}jpDGmaL(B@^J-1Y3PbuY&hm zTbF{DQAig?+)-F3*AcLEURZ#)>a|LRE9s%%e$@az#A9_`T~gutsQL+WVG}?e%dYTb z{4MnUD{E;ujECUhE#6&Z;ulyA@kn%qRanax3k@4+>Sz2FB8SEB%?2PoUJT#%4g99Q zYNvXrwy0%@KVQRcd{7_NZ>j-&_EEt18a(^9n2Wc9N;BC`!~!E(Pc|9x_e=HydR&(O zz~3P%pThRCXOP8DY!Cd_l4zNS{{icHPd{b%AUoX=HU9{_ry%Z;82^K6kN?+!c}DnJ zvmr0_U~$J{tnPXq=9P-j`TlyTo(s9@s$1*!dN?SyUblkW^hApq`g=&gKFs)M;VmZW zjJyqh0Ek+^*35$hPRCsP1K$m=YdO!rJ3?9o=C_rAtqx|Kh2U)z=AI*n@4rVZlpA@2 zR=Od4y@HVLlDN_fxf21n4D=7a;W$6kemZQOIn8Pud47=#tzoedT18sotz7d zGl(ds8dCW6Huk3@pR)k}rl%O$ow@i|xrZ%ttTVi<3pC!3nQ))p$$hN8YAbs|A_l9j z81JDfql&5@)dx93HdKE?CnEGXHBRkQ8KHF#A)jIJ)!$*}f+xYN@y5IbWb`5|tBX(L zd*H3rhj-ZzM{QnMY{r~gj@?48<~5?#CrG>B(=KrC3@r0EH5O4&N8M4igNI%dQkxvn zkOwpOa`g_QC5H@WMwZ~Xx~MYg2H<91M94WH$sbf9T?g`%hs9xz9tWC_VEgzzzJg!l zTM$dVV{iB+QB4fzv$fir6+=q<5@P|fPZ7hSap6CBW305_+*hm>kj7POM5 zHA=Hj=3Hxs*zPLe7{%uE4vx&mAl6m<>Ef4xpY$P()H!+_a z;ota2J{dTa<(qjOaaCLr;i3;uBhJG6n+mV{u9z>Hvuwx??td zrG(>^>!7ihb^V+?*1;f$fxp@Y$o?uKUt5Z$!ohBIF4Fz{$^~ZccG^(Ps>U=r#kaN zC)&AJd7Fz!HOsi{_Vd@Km-$_dolVtIYbx>r6XiyAkRN7QtPH{LtZC|Jbwy5 ztCc~ffE=}lRXn0B_{mZgfECUqeiUV@s*D@MmWNaH~g^3V=UkTF7e>ori4Z;lrQCNxd5E|1Gf?T*$gm#Ql2JJX4QGMikk zI;$*dkDkG*s&I2)u!dDc9Z+eNvU*spEFd{1qGPjZx{6IX--(aA6# zZe(-pHj)|TjK@Y@qku>!44fwy@8DPcg3RoE_$bxb4>};9%Z_T4N{`6i0V(MsYeL_{ zFiUKb=cNGFZD4B-so|I-o2fi%t8&BADh%RX#ZJ**OcRgA0nuG_5d*~;9*JDJkMH39 z*&;-zCo#KD*V!SX5$JC?=AZ8Bu{44Vcu$iymVT4HMdlA z%%AP_E%+h3;HeeV+W|)=L;+Y8K(?eC|4p{6S6*I$@*8mZYT4iIlt z>(ov41-^I-MCnew3mRGsURM@mrI8564*Z=(Y9{WSP}^W@Evb-^X@z`CqH3%! zu!16uNDo`yTI4rM7!{4S#&uChOctGtCdL@y5$PdI2N4fe=O3V}h-2Z&e#VSHUflpZ z&txlkQchM^LO#b3wO^$~990jI7Og-YVZZZPJTuS5N8+}}cHf~VRL z-jfDLH=vJHdL}%&4u~k*A@U0o zz7Fdt-H>b84}UBtY}+qtEasqR@JZLG!m5bcq3Xap+zZ?O3#8N1n~+1#$3H=?zp@7y zr8DmaoK_;2-w2+0al}j65ySaG6TgZ;WSJIu>T$4xQC7=5*>0SUT>8FHGst;V3u1w_h^kf+JXpYjo+iulNTimc#R zgs~Ab(l27H7%8fX%kWFn^K?8FKMLEn7GA|b=u-+rbr;oMbr-(dF<|&eEkaz88}OZg zUs?lMT)}@ue9;dcc2iyp7V!wO;9U^Sv;j9WVr{P(eAVLkI`k>^yPBkisyToyLFGiY z@*X1IR?xui@I#AY20o-xgJyGeA3Xw|bQ^H58@PEG+Mi3`!TeqnoV*F`%dD@!I~;~Q z>U-7&-l1D86I(5OAT@?OjLBu=DkTFgV?(gMIkY~=1XS2bBPRYk2 zo7+nlKy29-k;G`sP|>Om_;V4osi2yI+DgSB(x?ktG8LJM4v0dwz-G2Y)K>^G5Um$s ztxR9Q9JEaqf#-4xGeK(^F1uSPZz=!@N zoZu714frsVus%8$metprQEtwnA&{d6-LMDT+(m+;_hCE|#Gyk@t)UT=w;L3;GZ#YkG+;!A+ z=5khXhPx`ba=TtR&pN+%e(z`neC|TaiyZ9EcYRh~h?JW0EK*+9N7&$X#L^=BMHCLCb@}9{q#bf>`X0%!rvah@SlNs9K9C z^%89SGu;8+^53vqvyqP}j7-TmNM?5A?H^<8WwM>s+S*`clYOoCrq6t8{%f8$f3x1e z!z&|GsMdNB^9rvajpELOt{>d@Tyd_S-6P$m>#A$7^N4YaC=3~Uidiv@EMsM{Mp_50 zS5`{dP0og?T2S{ z5bzhoY#XhLLH`;><~b-;?0@m4>XLyzEBF+4Rn9Wao#JZyq%LW?O5ARZw2Ga#_dBDpm`tEwJh-=gNNY z7mH(kVy3apdDflU`@+-G)6g@_lh3o%z1MZa(N&~I4!gfBV%-f+3{DQ73`Usq%)6%3 zN(X;36?ig{zviEL1(Ct{V0<**8ZX5nKA1JY9LB^qva&iMw_4mfZ4NXWn-VslJ))FCY=)4I`L4p=THzhTpLxT)A3Y^KE;n~gcRayL zUuvs$uuou!|BQcF;6)%d&?(r@>}-vddG%5DiXVhLO?M1(L^|S)agL*oYmV}cR$?)G z0()wz`T8}yz^hgPE7n>m->M%m%kI;Evzb_v7|BQTZTvVNCH{ghvKLh>y^~Bl3dCzgym@TUFb66{1Y<)5xn32|XXzO;fhSfs4ks(;k zI6RK)BBznc(c9VDNo#v?{8!-D5Sj0BsyrEs}-d)z!lMG#?r`1DW_2 zShV4C2)K&QZWf!2Zdf%0Zs>0#YPR!1`D1m`+u3uhze zBxikBJ482`9aZ=Nl}+XZPHW-wX0+Z07X@3IG1h7MOg+-`SW4s!a=>rO0I%N94N;yu z*-+KrS{(d2@G3Cej5kM_b%N1>Bfg2gIsV+iDpoyJLezFwO|~NSx>V^SC%E@|+J|3_ zjENZRZRD6DANWcpY)d#Dw#_LmM^56+i)ct&vPit{H|1NRO0R?i+! z0naqoK$JFfc3usTFRc&Ak8BSv2rLOS4o)-|NCT^&laXI8!?E^&{MZDs1M$~KWX68M zI$3XQs2eM{S)0x5!HIt1|KfiT=pK}T+<^-IeEx_23BfjUvzQiEG|i&)*HS+Yo8$W6 z$sCb0$`L8UYCAkujfAdodt>u{&Hc4UTpiz?#6by-5;iCH^mD6~sP8QkSthb?c$To9 z-n8CWZ?f=vVZxKy5v%=X7r&ACcfxOphkfsT?R@JJyC?qX8xZ_cwGtm4Q(gbKb9=VB zr?~wdBm7Xmp`WhS_xa42s&l+e8Z60oZka^ib%nMFqvsfpZ8Oa^h z9bb&cqBGBn>WjK+kaR=eE}O@KGXtIc|N6f3mkQ(vW(ZCQ^!8`+Z}kTQUb){fHN~`y zGtw_gw#D7t(=2>fWTmK+k*~Zr**t&y_+R2yeM$XEep#E)*ViIpT72dNop9CnvwY#4 z7%?WQdgO!f2Vuv&r@d{$2St1ftM6`N1l5S(h{Ub&W_;bmn!aql7Kx)1{z@q5YZu(7 z1{eWXI?oe#36IMg?!Dug8rCb~YuE(WZ62eKT3gLRum(2+(f$&?3cl6;9)TR@Xt|J0 z5X+4mj)TbR6fv4OvN+2*vpO=1mpX?c8&Y0Av`Sco%tAqDps+vOf7m}Z&@p%-I5fB< zu+M+U9~X#Je%H`cPcxp0o*sGH)y7jYJXd5eGE3xePhB1DUliXVuKJfwpT2zkE%A=; zY{I?xstFGg3j3$3=dJ@0-y$zWj1K=PtbupAw_*4X5zE3n?(D{SRV~;t@ooI%gjb0h zeVu*X5_=_#OepS~6+Eks3E^tz`QSP3{?~QYExj+pw?|A5JMVIe3c3?uy=e9hjtY$Q zd;Noa!avtvKX}%Qa@SVV zeK!-r<4b;7^r>O&THj`0<%Ava$r4g0toChGb=)N*{)t!={xa;n_kw4F_el8Fh<9O= z-4Bg5IvA|zlL>z&p7&Mq|LJQ88+A6mL*mJRP+#~DPQT0PZs|Jey5qj&eHVTwqCxmL z_aMW;65%85x5k+lgBb%`eQkWXe3yK^10}7+s8L&itj+}?jPXVtM|-E=8SR{6oa5=) zXw;qTK@KiTHn)4tfaevaahd?4)%BOM+Hg*diq}_rc1aH_kH}{#9x9{u-3TUDCw;3 zTWoQ6W?tt3BJ1 z&n{&=a9ni$={)2ZX=D+8blj=UI@9S?77##d1D1s*fZ3096-}@bb z%)z?mUR~2WC-tQ0Jjru=7Pz685uYMUC#xG3aBWpngVzHq6MyGAX9CdTLUHANx1Pd#E4MMwC{H;uIJ>0#+2-$t|xJM8M~$Y69ZS{g^OVm8lu9E=Gx z!W>n@9}~!B2CM|>LB27kI-$StB}PNXB-o!<$mpGdcQ6yHB@0n?SXwOxPxA&~Rl*ni^I`RAy4e7yLM)IG!Q$ z8HSZjWMiz{!4tlh2}KediLZR;0v*kp)-L3(r^#meka+5v={@L8a94Dlb53>+ah-6T zaavdlJgwizQdUtj2dt7b>sc?&W~RrSWS+8ys0pYe%?KZ;GO{Mk_-e#XyTp0n605K_ zIue=4PI97^+$wEFTg-eGXbGQXmG5t#_CE=vv{tgc?v;`Lh}E8j&ZnLq!uLkpizpwJ zKjIg6S|bjT)-y9(U~9lMZw7;jz2eKquZg=DS3PlQ@CIP+iQ27qtRU|r$~%{NN``d_ zo8_(Q8SVPP8HxCBoFj)ZlJC*OWK(lZATm%bP%c>B9AqvDt`EcnPnuKJQSL!TxjDZo zp1|Xq?JS5V!yUsMSB=5qGuH8a$T&RG7u8Yu(ehX?;90rkWB6q`5$i3MkCENXg!Sn{ z$a4IFHPaa6G1g&)cAW01TfkI&IZQA za6Fee(8o2bdgN5iG0Jm!Pu52T?nSIrj!|>uN~;s1+8v1h7RodFp6K8@=Kkb7D;gNW zwaxv#d$~J{C$)Q@@PI&-!ej@r= z%V#0(>mo9VELf9zY>bCyHE1yw_a53 zmFE!G`qe^s?9-8H$cTtM4w;;asBs&?SHb`Jib$#<*44XV{q=+{fmQM=$U=3+I@%4a z6I28rTU#9vT~Cm4@`7wCUt51zE3kH#M^2G7R8}@XtaA)-&UKs;AH`$GB3D;eS=U5Y z8P{dUc;lNWf@;$Fs2Gb9Cs{T19=ZPuRv+uK)dxL~vzA)PL@%t%=0Z-d4f4Rr z5MkxfZ9#*hh+e``iB%J;<6a#75bst(|Bq3t?8knDcc_#22N?_6SFs)$(Ir^*T!K1= zl{jCHT=O>M((@yy*$p|9XwbF_R^KzCN}?MgS!k2JMCjqBi6`_KG|~&hHjRxr8ib9AG$ydV;g4p4*ICja!INk6^|A zAgJ*hDDWGqrDvkXcn2W60SsbM`}sX8e;S~kWgM^{fLexMkW;M(?eB|9`F5xd=!@0- z7LcUIII02a+Q=i zI{yJ2zYqLhga5ZN(m9ND3L_r{z6bFA8*7g%f%6hze+ZD>(GQV< z1vT&ExPkhOi-3W+aRa%(yBPC2B<41-e*->!!1#$FtZA@dj$CTJR^Ch$s=_~qq3EUrq`oDz{t|IGu86y)c$ASO9IR6)X-wCPQ zgDV%&_6cBk2b!nA4ze_;_^OOb$`0UrUtrxFShfV#Z9s*F;B!6vt^o>FL#v87uY~efY&K-ouof3gtZ6fiRk4yzONzouhH*a(ETBP)9#=rpvBt|ZC``Z zX93mC5JV3!`dLW9UTDBIJo6rHzJdB~jFJjRI$%^4oNWr6o8fN@$igsGn+>4dU=dWEDx$; z|3$BH!2SyQb)wZZz~sX?XYl=mu^&OZ0?>!YIu~en3REE4-GoejhNTR{>cd^oAvY)x zgHg+Y=hbj@0iNCADO80_j0P>*<4#TBUmLmoRN#pT9^3$TAA{nAObLg--^AZ!4OTL|@ue*t5vpNjYMfU3(- z=SVrdA5ldZk4n0=u>HSdoh3D9fmT@Qdx{FLS)j}JsA+!&?Kpv54Y!g1FAE4O!7H2z zTag74TNb}cg{-v~e42yaZ$Mgop!Wi-8SDl1qcD$ez*%8Pc?L+(X;kfX$Nc;oH10KE z+K8QN(SWEZFiM76$jXqzs#xXA4XN-!8al#8-+_L$MNipLU04u1%I1OcE@){!{63*$ z;MWd7UENd2mqIr3M@Ys@)H|NVx?LAkdG&+$w;XFPeZZBIsF^wn>ZAizCPJ=f!zbLW zS71j%J(WW(M$PdPT@mY=4%UN@V0}>?of7<6fc-p`P`zCR>wEQ(6Fi2s_St+QR$kh$ zM##a}!7e_5jNuhjYOcimO0v}jYjXbr$~W*(n!`ic1vxo}HN7o>{Wa`TJmhsB=#vhT zdjK;+TJ+ojeCYs*st0^(g0ddar4J<1k8vKO+N>@jp40kwtQR)N>hv5`gto%-KVbzk z7JDypp<0~7Ta3r1?XK|Kcd1{{zZX7>A3o14P~rr%e;cs=TMxksoQEyr-|>G`X{>vu zfDJ5zn!|ALy0mVpGU)G7)3;9bwlc|Ks6YFO4}-5ciJe8wUq8r7fVD;KU|&ebCSi#@ zJT3OU1=vh622n7rn|m>f3&5rz*Nt$m4_=2Rs2*c?*jwc8Q>r(}mPcdtH4)Wj!+2`G6ti)*}0Y4wbMYuO?vVj#U$?fgTAt z$^f}z;QI?$DC`r#v(;e*$6;l77G{AA@HLacB5enTGvFnJVKuKEW~xNk^xV+SWY97n zWE4AGV8y<|ru>V$x8UP$0=zHP8(eD%_)XM~FVW@I161XGKrPEntkeC*?(kh=2_o!( zs>vjOfLgm2Vk@fpD~kkZa~#{tBgIpGA9Z@Uc{b$0Uc;Mbpw=p^c^}3a^#>WiieV4s zg2YY7s`f$bK0AmypFW`8a8#XcfJASEc5Ogi^#OS8B~T0SE95N|D+3AXh0M$@NJDq< zGAtDHRsmI(!QO1e?!Q`)rWwF23ncJYP%jdDUYBAGXBFm1jwdR@VsAmrQ40~mFVN83 zplcL%{Z@m`Xp5P{iGJy=5UsG*{ugY+Mp!c^>}E^+AAmXJIpU0Whyt&wdfGsCq83&n zPs*`sG4_IGK!rdWblPZi^ zb3Q6++h9Gqx;l#$&F8WN)(~q!o@-#7RcbWW#!RfymW7R4k2yOhGEnQmv!3i5%f~aJ z!lEQ3*##--3+dSf>VHMFm<-k27hz|@L9vA_7AqZ{A)SB2g6741GY{jXhre+SHj`oq zg&or+L4~u3Xx2kouwNKf`*&FQgOH^w(6V~4#gDOuTnDk1~2~m`$k2{~0#vJ>+Dv%7#h@6ZMjQ)f61b3kkdh+jbeV z;%=l>+8_B-|*`wXt@S- z5qSC%BzFlY`v`Qc48LzIYJ*RKM=_`#Ziv<4N$58U+LaZv$Szb%><1R-QAx5K^&jgo zQ_tl`c}ZS{^}(#to{i&SB0`wZr{SPdYaB-)Nym7AU%?9OOw6s9oL{IM&5=sN62 zW7rs)2guUCgr$Cu`MoPf=!e>sfruA7z}^&rzf=LUe0x}|JecM7;MoVTG4DcA%16w3 zpTGy~xx$@?kQN0C^9!UTAL8?a@R%lI+}x1V9k_k~bbbL#RtR3g3p}#~x)=@HeE_rI zR@_~VnYAjaUbuYsM2Z`D^d5psDNd$TT~CdGpp{tkSIAhJXoVX15J{qXKE6i=QL%l5KL-J4GBYrIeKJ|o74u#A$K_8_c33Xv> z8p4+6L4On%`_c9tS|`9y{(`e7=x;CPhFhQ;yzR_YqG$0tb%(u8W|^W%wjpAfb~`2Q?0s@H8xOe$b&aIN203 zO>gcf4*DHNmF+iBa|~wbD!4NebtT8}dnGuX4ZPh8*xR)@m&REJ75`JV~j}1eLQGQ*@+{7;uY|vx3>5~5#2-ZIu162-s?f{B{6`liQlQg z|0N zJcTbrZ)c(0!Bt#)ihGos`HWT+%X|Q(D6XS;{R_@tqaW0#;O`5-O|d1#-4t=(#61aW zr^J{A;B8Q(m>zPP5t7gtbQ}gw*8`L}VLx&~TB=|cssP^5yIz98Dg~g8fj6B2XC6eD zlqHA-jU1T21T51_j7IU~1B{H_Dfm7E{P!^a9mv}gT(P6yPa(J|L*Rp*q3GFuR}H=S zBr|leBw#2MvQzY4l!AbccIw1{PF}Q)1pT5x;V4i$h(6wf246$?e}e2$oJ|ls1O=V~ zq8Io+$F)Zx>QZLoV~BE}Loj^AvlJ2gAiIg6d;I_O^cDX<1H1R=?=}8^#$5}NO7Ch( z37CkZsQ?3UwG`-BK9q6E1Ip!uCsYU(KJ>Pc+=v4*{Leu#1!Yd606)DY)_xNYy-O&F zz9~=g3UEFTK}pfs^AP;^Lt1V}u>|=SKpTsL@=*j0?Vm~kI5Ged%Ex4ejOW0(xdBBU z9N9p%OyDtgu>syFP>?ufj~xqaNoqa=OL`N|3yf%wPiHRy$#cL&di@Sm`h<4Gv9FNC z_z^C=7f~!NG@9~S^rj>PE6^Nz=aS8<4;Yg)jq*A) zV$zqS{yoMcxuSU6<`~f=9(1F>L@j!66CJ(~Er|;@tv=&UBE}$_N^pi_e6lNPK!Z&G zL$89MmQAl*fH_x)Vz~f+R1l1&>$RKqjxoB42@#{C5g)#;!;LXi}GVMA|3SRD|$l}@h3JUHDAy# z>5@$;%E=Jrh*~6HM6u5}NV4KFavZ2YRPu#jCcgRc+Xu>!v{5Vi#zMA8YXc#!a*Rsy zog&27w2+)Epbt?hFSLPlAtxwEG$UT;4ACn?h`K~GTRZ6OR0O%gNMxaDRGQIfE+UFi zCdr1|rrb;Xvgt+d>LSij=9lD)-nm8j@pK`YWeLd@SrH zkxUT9s8^CO+lrD6dJTRN6z_0{wDEn2V)T|Gq5;u`q?@G5rU_9c5ITr@wrwM-+H|BG zEr%@#2h9i;JEKe1gCvS5Ml>V2B70&}EIagsB#bnJw3xCWWV`8&WF%uIMo9?Sr*D9d z_)Rn;h(7`k(o(u+(~G2wEErM+ptu+Pkfc+llXCRwz&(2779C_Y=}xlHXhd1cG1@ka zF!bXahlA_@jgd48%}t*|*XS?VO=^|+KlCAr(0kBqJ7T}!q9+OR`gZihxbP#84 zT?hjtq-FNpof(o8gCiScDi^4h9p{**&n_){iMRZAXWdz_LDWbQ1*;bOEqLGLHquecH$lBQT( zC%%w=*^*>GNwiQQYS?_Hw>r|tenTORLvJOdw<(g`+i(#?B;hod6Ax2^PPR2CO(0rj zf_!C%gyjo)N5zo$pC~RG7x69*KkIBH@CUN7x!-DpXe3c>)f2Egbf-FHwbvU_Nl{dH zly&q={*b4~TW)5mkyvr2car?3W&pDa*rVH&bwmu-o&AAmRN+l9zaoxbjkihcWV?|; zE2pDWYsBM!<38;Xzld7uv52iEz;FFsm4$D-D)c4*2i~ZX0dZkEtn7Y={Nh!-H{b!H zvKZvJj;Tm^@@24xusil^tVf-rsQ>wa+)P0^S)_oTrg@NKmD*3wtCy_#obMRtU^fVbau zK@{2%D~FYld+v`+Lrp|WHSoV1yyAR_<0k@}6v(OMK$YrN>Hj&xiHz zu83WWsXfxN@>*}raaOc?!6rG@xbu2SxvDt&I1*7oFv&RKNOTQ%XLfycba3<%)6{s> zqu-ND&`U*GU-q!t1mpZ&0t174f|r7gtUKywWD(vXqML&#?gc9>syHH@m7K|(iyTfz zRilG=&FAskyqK6JatejFZ>*QIP@~?=x@HZ)ex*qDLVlC85G9^f?NvZkM@4p1(G^ut zpRl4|4lCT-#8KhG+fT}h$B3TZV@>;rd}BSZ?pY1xG3>YOCpTN8tS6|rd5diRF?iZp zL6r@N<$s3V*?|4vy&*#aJEcmZdSSGzBy-3oRz9nb83?WnmJYrSVPtg;cBOJHbqeQ7M`!0fS3}QM&p^+g?qaSzZs}*)36lWF3FhTETm|%4_ z+nYPA3Ni+Dj|r$5+7MV4d~9}+8`LAD2+wH_iVrklQS1v zi#dPrLEO7fWj^)!Ix)6Kd@G`}e7IJ}M{#$@MBPmYN5k+q|;L@o`Ju9bLu zSss}uk9to>`uv#E_piZE9aG<%?sx6nu;j$3+EO-?5vGE#4Eyr=kd`D_WYQx9x z!CL)=To_eKT*Qv3hN`w4YsFY@>|}^SCDU8;8EU_Wpz6G~JS59wj}{W|k6(-53;P8aW-^zU(7n?h- za6-AjAvIm}a-?=exue`0oEe-aoq61aJcB&}&r#1p_d7=d@1_<6FZyaEE=!!}d*|;R zY;C?a51a2$PbRG#>TlG2mT<0jfA)0revf^&MO?d$`*^p&5MyP*H=BPE5Rdy zqJb>|kGWkw2JQm|H!h1D*lC%cH^u(H2}CL?x;Rz}v7s%K~yav?d{I$}N#mI>ZR?d%vUz{Ys~NFGQ%JWXth zo5}JT9pD#aYD@9kw%UvUilH zwR-{T=sr5XxT?FexUi3x4YF8ZXksA#N_@J65s5$fuLX-szv_&f?MBqm{DqqJT}q0SP*-z3SuYq5v)3J>_{ksH+49$hj2Ak!RKH zST4rf-csp>*t3uQyyn&5p}=MT4*#n_8(Gr09$qb#Bl@?rn^QeV?hNP7_4*G~@m4jL zfmS_HSAEPkC*dgEp`5WBV}DK<8)R&?BdvE(gfr@g$mHQZ4|5N6=5t&&))}#49Nrkz zNCnL7fqsdz;@8D_;-4lQ^*6FM>2@NI<9p{*XRPzIbEosQ({#FA-<+$Qw;YX(C49Sn zB43z8gC_(31Zo7+n4PSRGCkgrQ-(j{CB=3TZIm@?W9Mr+?4k@nCaPl>a3(b$ntlPh ziYB0fZKvvjx$6M(%p0)QXko|DH|(5Qhq>q;b~wahUP*zJ)&Aza!(%TNU<3^u-2#p*lT{pnr*fWMEib77#x2s zepF(IKw+7h<9!IO8lD{9Po9h(o6&fXzh1(_sq^NEWWtAySoMp5E6pBOK?K)0D%yk0KpxS;O_1c92R$5+``Jtba(al ztDT$Y-haN#L)K=xtE)~`ojT_|?;EIQ6?9s1DQQ=CoPB>kUh)2Dfo>_pfTZJ4vuY8x1p&@A4Q&^Mud z!qNDt@w?+s#yyOk9oH$)7|7BI@YUZWpO-u~^6w-|!z+b7F~})Uw>}YsbY1IPYj5!T z!2X2!@wMYc!YY4L%jfRZ{{n053Ev$4Q+P|?Nw3FK)%Zy~Wz-(R|F=-;a*=b@-i!rK z2$r_m+gT-X1ToB5XO{ICo~CAdq7o3FrL3S|8oM-eJjV zN9{@fC0&82J}HibkI<_+4yfRH37O)<;~&M>PNHIRnqjuGDM{tl=|z)`<^fIYpYja zEtLR^;=0AZiO-erJiZ>N`6T}J30eIatdedI<3re2N#`X`pJH^f#YvX>7J9zY2f1~f z2iBj8P4__!ATLJ&A_-k=3*yZabEKHb|up{A( zV0tQ5%9w9`t0GgTh)*7fd==3ptP)Ho6`eTiL-0kQRiIhmVjw+e|Fywju!L3HN@jm{ zu4)^M`rbc$@57&l_w&v0_NGQ}2D?Ix(|}sZoX!W3q(hwU&RdYGRj}+xxF=HS&8ZS; z5AV+&{e`#)Ytef+TD}6=nhT~2jZB8aNhyJhR8@Bab+r3AfrMijHJB~6RQ-ZZ8f4Au(MSxI3Z9y&>?U*a6LHQZX;`Io%B?m z31Ly;lf%=8Zwo8s83+&MHCcpO-Jjs`O2fK7%~@h6P&<3j8fI^BI=dgWd3uBy=Bejd zWd6eZZvyk)dAzn4&SqJetej0$tOmRU^Wc&=jO7m|KYxNObpvvN-Q*7EFuvkiDp=oG zj@=kOrgZGphwK5tFn@>mC-J`EP_1(K{**=1r_b~{V_ekK$W~?@r()pi_)PeA=?Pm| zoDNzgJcwT#HzaO%d_>?!tEKZ?Yv5TQJ}POmWNnfsOExoNljo@Lx!>T2m2?)6g^qJf zJA1IB|5N-QRDoS(m+2yZ*7oR+%zoYxVflQ$e8ZXd^^6&!2Ykc5d4l$wjO)36s@ii} z-vyfmJ6gq@itbIV26q3&xNbBuuIpL!;vzk@1X)3g&wDSl5}aetgO8oOCwjqyK>OZR2<=jvZ($F~aJwOeUlo1MZ6hOdq|p41ar zFv+Vhzu8uQ;ZDZl#%le@L`P8bwI)z7K3nYA*kkeY0@v--+EV?raROh-GS-_O&)1%l zo+q9sW;3G!+$`~AEEHmExO{F8wsr?s1nr>5KISZhoiH_i@N-d~%>Qw{i!s@_ukXQ+ zenXVt2X@SGykN!eQHweThn2lDIqW&?e+i5|UZT^*?rF~h!>@sKA5WeAdi#YH6I>n0 z>mQP^-JjDQrMHO4nQCgfq#1n~9;eD4(L%V^H2=edCDeO)^;BYrj1Clw`|Fc@-}&SB zUrzX+*(JqGbDwvOx2m^i*hkJeme=-9^ZsitG)frri~`0x=4MCP$Xbw)HSTh(6DRzp zHBIIrg3wru0QGj!9RU|zw3#<7weMosD{lu+a^oK8#06NT;+xroXBTIEp=x{?)f1aQ zszqz7$a3Z*L%axtdVd%ePQzN#nLhXlX0GILS=_+d;o`;~VnJ67<|^Zy3)HA|BSP>c z*aq)9)-FfAhi#1u&i6k{SQ}U=?|IrpmP#`r{off2mNkubB(9B`3JR@Kj=T{$Bm)hslK5;K?SShjq>C-UW+B# z_wpBOZeWxDd;dtk40N#z!<|wCALOz9#7b!wkQqh1(bN02@2IbG*lBZ_PSpZvisj-s zm5xE8Wldx)dsQ%V@MG`?yNsOcN|@`)^PY5|l$x@_SEs@vHHfsWWbOA7w>wXCOv8W5 z2p-}AS*lU=VjcO!USwsa&gM{XptaP_&Rq2mbRPMyU6gJXXY5!nXvs$~4*EV26+7?WIbXg{xN7y( zs&o1ntTnY?`&-9{#kcflbF%0+J*9nfeS5v7j4E(f*QFBtvDjxW^J-yfy#XVG7|1S= z+U{#FabCzc6cWhcQ9JmYD&7IGezlepVWDdXpTi5#Dl5bkMzjO}M#91Kv-pJ=V_hm> zh7;GlPE0Nz90K3r#RS>UVqv@~P8?*hoDUYS2idO+^m=)+!xB_$G8hcU^Ing;1<4?j6&p-%wX=)Ltio^X@P0>xqNMkM@e&r;vLJzq8z(%?eY3-AdCM4= z+_%;V>b$F4pPcU6Sa3+?ML*ft$`fejFBE8IotLLY8{>Clw+s-OaYa=o%LEL|o{WnXEzh$q|NV@9|eiM-APd3eEH z4L{@8aHm`cJFpvc&T^iwAUKPsa1sqC5;C1I_`6zt$!+71l z!pXauQ|f+V^A(9s*z896U{q=g3y~?tfZq8w5o34_bk22pKo`qc*&etPVZ7MG)6@Wa zGKqCA6TdqFr}7ps4t`K13SsdEWXT~AX9szm;rIXW^Aj>G_Y-qsw>bTu1Hp0vTt*DN zrj$o6a4y)u+4vx*WA&+3oR?bRCsYG%xdD{KAm*2!)8uxrOu0ax>O`eyfrnF-@?XFV zmIuwUi@0@4ut@8{pdAF=WWu1|5GLMsMC*L?TL=ExT)90A5j7c;Xb{b+dLkdI!2-7@ z?;FmO*;rU3-dPAd-ibua(sdAGwOB2F1Yy<$Bt{Z;ERASyK|b4>b~I#d%f{J7<*-!M zgwn*xOD-dUY}_rL|0G_IstHgzrWn@$)5MJrGX7_Ia#abXvclJS-x1zv9APe60{bym*@stuUVS)2b-|xd|Ju_sRVz@IoJl%b ztTc>N1y2H3c}ag?=1!_*XX&#`-031;RZYq{{(p_AwyHo;SwofoQaL4+tx1)bmH99G z9r~J;cI4!hjl4qo#LSh-UMX#-aQa{2W zJmTv!?yGu4{q_aFQ3#CJd>_gPhce|V3#Iar3U{NjYpLn+EO=3c!Sj>>iBcZCOcnAK z^*L47;k&Yol*-~NR7e5dmzCBzJexvzeWHa*8|!mok6*=3ZzWplXL4JgXvHI1tEyH) zI3I(4RC%l*|5WZuAt{n2ehn#Jscc;&`80!!nnIeWoTJLosw}-iA*hU}pPZA*I)<AVnkvig<4!6&_+O4M)S^(%FO)x2 z>SqdB9>zN(XuHb2`uM7}x|BYehqr*eOx_NsQOr~kh>LG@0R7*tnKT5TbN}^u%G>%9-y6D)Qjk;QuAW+rm}>KXZBkbZ-AUac^iqFU?+W#T>d(;q zLp`9L_P<_GN_?U3|LY0$Ree%jBh-89y6UHpx|{kM>OJ*))jsvaq2H^=|0% zsx|*9PKW-g#$5HC`n$4xRkIk%QiriZEFw}_nCh2yob3JprTGS)_7R|3Yk!srUoeL~tVb%RpO!hN>cZ42BiTa~@=L9Z z9@b)od+;-^D*vtKs+!HJ#?;SZ0Zl;3E&Sl5RZU<{u4b{CsJ-VkGg8%5D)gYjNGM&gkF>^hGsh*GGJ`eE{T7c+nM(+G8cGH<$qeUVLxG25wh!z~@pG6fD zv2oVjWb}P3cUN_G2B^wD?4@;SmxrD_gwLpO*SBe5Bw7BVpa-L9!T+M>6h1Q(eWBW= zW`n}8g>y}Xv(o5y)#eb|O5qXWXtlB|Ri|q6ld2?43Zf?`qoHsZVSGy2s8WM_%l@mX za}~-XftIP7-W==~Jvfurz@J!+ud^``Go2Aka5-8R$sUL54ZpifP0uKHn0&OuVK?1_ zPji74JTpH9**C7?IlP4lEhX>HN+!83xci;7TvaK|!^c|$e&8Fdu9}J?uuuQX*iHx0Li$nQ(h&F6>dx^Pjw6xqX(Sx|j8>$8#pe~@!+aHLjO~L1z?Un=&F^LKV zRJ$1azsb)oC+;u^pJ*XhYQ|{w!1vg~J9cu~}ubuPCYuulvziuMHJ3T60dq zb*LvVk0Q$xl#*tHC7+M?yc(3o02DmJ^aMfWw7tq5s_#Kd=0~{Z4vL2E701y&@Trrs z3-hrjeyfUXwc;*6<1L4=>a-);U4nVDiYLhLw$xuSu1Q59umfv&3ITt4PE;d0I$snY zzw8eZ;}rdJ8`i%*p!&Dc(j3}t(Mhf%+7P7|l@FX-sCh()K~5=m2i$>mfmUTg1L~98 zMxFq*vY0V_g)MzX*~a0-afbQ6o72Wr0mjeqiO+g|Z3&!)hF;d_Nkk)q))&6QuJSK? z87IC0L}$BD*H>E@;tqTZ%|X4G_`D~W&9=M)>vIjVxBKCr%&JYalWNtCH|{n&9sF)> z^di(H?3I7$6Nt1K+D$zP+>mEj%X(7@cMQJyoXn?Bu@V>Vd<_ z4xS~MkwB$FeR)p%TPz^fCG>KlfV@P+s*_gKbx_B+=Z>aQsgJW2EAeQ_UGv8TG z79*spr&}WY_83u^^*jUXYQQuGQHf^@v0T=G5yIL&PUfOlF z(O6Za^T=(WPuCtfPf^S0E~ZK;Bf&l$rZy{`-b?HzOZnQItj)21aZl(s^|HaXa%!qsp(a_EAg$=W!O@(yLkpc>#5+>+-#zMvIeM z3|f6-u%V0E3i>jWah&S0BCgM9sjYCDX}@r$?(Rg1Y3@gTla*V?t6=7*qH<$7YepR@ zMMt<^@93F8UcC`v#uvLT_OQ}jLym4bx$|Qv4o#9B$ReR>YA=Fwav2P&>&%-lk={i= zZlQb6X=m(l(rAa_T#0iAigmIOYErX^mP|s$@J}nss9{{R_u7uRQ_F326ElntGQ0Da z_JgOrRnKi8#v7CDW2pXAHPX1t?Xr4F{VyjOtlkeqcR7zLO+Vb>MYI@kz@3RTrq(%W zIkkwy)V11+mF^L7i3rGKk(#)|YbTT0z*)+A+6a!f?Cv44o9u8_stJ=Yhp#c>)o|DS z`d;U*n+1mIoK_9-9z@F~$Ky`d2WTDL-{k^%+$gCPh36vzU4xPGH_^>Gu8r64fEQ?{ zZ$-CF$Rfo34Ec_V>b$VPT-D3zYl!3gZaP{ZeyUe?ZIR0EU|QJnVb-}$?f@r2FT-x~tE|8-wv=7um7UbM!a4D%yyZr+qALZX zI(Sj@nC-1uc_OKr)`@6D=%uwA!AizAdO17B{Q|q_C?~BbL1o!4)~crRtSBkI(#G09 z>rvVaJ-O9cJE7;&F4~jj9zuEtX`nxFJB^kyHhmbqmW6ee=Y zOsomNXaRY_O~>jUrq`zbTDxh?H+DTI+^nxLzv8 znqA2p!2rcm4R;TUN90?EXiePg#tN&g-kN%)hGH&Qza;KXkp@kiH(FEqr|xq!QARK7 zjLN-j-_>?!hV}5j91fJW-lN-8|N5Djl=SBf%fT+ORfy z-G9xXwHWo*A$Vk7nc0{tTY?Wg+P=o;?OHIm&G;a-+BRQN&vL%}BhH zBD~YJE3zL1L^Wrn`xGUMiR#W!Mlz{8 zQoCZeGa9=4w5(2EW0QRBCf9SIYdIO!tw@wqCW}Qxep}kljPxRpli<|z?hD4d8I0ex zhk=crAIbH1AWHuevoBJcuLqn7*w=P5!iklSL?Myg8AW~aOU^F|PHT3br}h`K6tVO6 zs2S{|TK$Y@jo+W&y(MO%Gqzq<6D!4JyNQ%Yp)5Tp z*HW{;+L7H(KiZx8%V&1kaXS-QV>u2o)2l~l1tT$x)P6Wov{JxiVlc3LYR!Dof)4?nU6T=B- zsHdpRZ{0N->L8QlbdQj$?Ed1jnJXAAi|=PIsNt2=C$A>X&Pbrl;CBp zv8M&Q_D=nP=x0xIml+G)6}BU`Q%4@}v_cWDgYzBp`m|kzQ{QnpOaEOvY_&B;h{E` zKy+3KG%&guO{^cq2zH(@m`slu#X!W)a2FZb;U%t55GcL#T8;;8Jk*`)^ft3P2hp5t zBdXee>vu&y=LqN55@!0~7(KtRVIxkWmq$f(s`G=^2_;Xv3k9Q{2b)8m+duKiI`JqO7UNHA#v6Mq_+3Ox{k~OL>t?Vz+Y1a=2;1v6Hq(k@fdqYrzQw8OoHd(h z%k8aNL42V+b~!zdE;uPPH9I=t?1e?mO|rQiV?>AqI}PvIsg((KG*jT`ZMJ_kcIeT; zW$s|l1(*>F8Yi?=a)#T1I`Z&frvY)tqZL zkm-$1+GT5#K8e%CX6J;q-{@-(!k_y_Uu*5+L{yvK+FEA)Hn_7pMrU`M9O`O%I&C~i z-G1gaCy(r_O*NX^x!l%9jP%=wjjtH}i%weop2*_tV?SQxmbS<01<1%(A-?dZ+0<%* zhx@Od5O}NI&=+aN?5tXL^AME+uf!)&+F8!^vzy{q`Nea4mOGF$`v9k-*1=dUvpAPTIb*#22Yy^RaSg`v!f^9>opbIr zqY%Djb@rUf_EE5*RavP$@)E)FTXtzJjadP$n_kAR`h~zL=IB6~)SAQjXv5m$|9Cxuf%)YeD1gPn;Yv%bmhLQu0f2*__-pJ{W>iqQf_ zJ*TPsAd(q@;63e}Ucj9#+i-6C1m5l+H_9mP?z2Z~`OUoYq!n)V(_7j_Y1KfYU`6S;CGq_v>w~Sf`pdkK50Rl|{lz%Bq1Mjq08O_P3U47BW8v zS2!QdD6!9)?A9L+L$@S8T58mqZD?`z9%ml zr#a6)ck|-Au6GuoO>`HJy9EC04Va&cizjHL{^^X=^Abgvfjad85kYQ#3NgvL`j5^t zPF7RN*ZzhA@jcB0hO(llr@fON?G)#*@ z;z6rLcde^?$LXPG)y_Fzvs(`5L=l9meuRAFUc&obA`8G-xQY{aSNB^YVoRJAZaKZA zb{4+IjrtnxE9VjMy?lBTXN)^X_u+qNZY}*AEMcl>sKs$!4~Y71b=DYHOos*LJX$+5 zou4=@6#&Wg#ElY5IYkb3(})g`q+xrrT}bd%%leuD=+kG$If zYI^eWnbgGW77$;lBCepzvllg3_{yZ~9zd!4vAn|hVJyh8s>BBR69u?L43pKFIMOKh zGq|+#+95g3eV}y|DaoNH;CH0q1QHH=V@C4)jZtiRrcDK1epbNTCkKGcDkWxty}PW% zapL#@!f86avz2q{S12DIC!Rcz2&5lwhOETIuQ8_wl4Z`NUOM@|mGL*#OQ$&*7)IiVkcHvBrt$mM&n&n*kfqc&!dG-kjuV2Qpt9q9yZ& zuNRoh)wI809JnNx!Vt5CXk-|f>QO{qo)QJDL#z|sL?Yk|h=124dV7KYM^IyM4}KXk zt+aa;an%)A$wXqrBi&m3KaxnTLo~G%BM?KR_#Nkne#Czj7oLacc}mbJMTp$4CxTUm ztKA}=mXnsJC5n@ah!PM zxbj{T*DOTl{XN*o6x{D9ao6-jx8wOtF`mF?H%&=9a}qJG&2v^G51<$TRSlWq$7@ZT zvn}7%!`%y|Xb?th5f@4{A9 zyAP0aUIh2j5EwM(lPzxzT4Own9~DuD$pC86mB&y4I)F+<6%_vR!02)iT}W5E32Lkv zQG#YDqGZtDYJ>E3=3C=AY`G)MD07O|UVbFrQ&Zm#H%n2XhR0YD_i;|l3=;X6Gl)A> zm*YWR)$+IEcmAe{H{1p zTLU%VWJEaIgTNbyW}=0zNF{cRQXbn|&d4ikb7neeq=BI%7dVblGG)r!q1_e`k&q zVZA-?-z@b$)=Hh)8}2pxg~|~hltze1TD2$Gu-E`<9u!3)K`V= zFhtUZ9IsTV&Ut8$Qo*;{z1~f3U8-M&mX!YhqsZx;M^7S;lp!`uR>qQxi6U zsCFN)gpZtRa*{SdY$Uq4U&|oMYOR^86^PCcMa#B?7)Si4x~wcafU~blT)H|Zmi188xedEy!G8)Y`*oPXQX-g^}6>V4mWh#lrW?V))u{J7__{i9vR z7!wxMvyfpLDEd1i68;X{ho!EcH43c-N7i!E2X_bVfZ-cq-*-xC4fMaY%Pd(_+y#14 zV;~xLdpJY3MzN}sk=AGjrmnw!!Z>BlG7f05)PkL7mE{ZwMk=LSMLt5?=mFX_zc}Ss z8-7K@`wWr(e^3WIz-nFw#hO+k4At(V{G|;00$ima(fs5=ccRZ*4Ti-nXao+Vt_G%L z^hl$eZT3QYoQ=Pas!lz7jg`vU9^7E>HV%d_^^Mj3w4;nS)OgGk<-C=W{^PqXgg)Ds zFMPfGE|AnM7531(l}PI#xc5B%xVXjk3GrB7rygaiQ%fcUH~GH_ZUm-r%~|74Lt!k^ z{n=hfe5;t5(>!G)hckMX5v%7n3K|Aa-U+?P4(1^9l-NpzTswJ?EZ#hxTh~sJbxCcH zblceT46G*hJYl*j{@X_QbPCE_7^nhqEp|BEGouf~fd=bZqX~(RO6;mA|Yt9G2_; zGU&F~ABxY?vb&+6ao?T;QU*5x;mc{5{Y%to;1Kah=MoXii7$jd&4}6Y2a|Ssg zY6G0!)_2xcXFKcsXt|Th-qPAq*qlzY6HRhwX_NH=#(P#jwO6-f&reH+bGX>TPF^2t zIYzFoIC@1MPQRO(Wo5v!cY_JMp0n7gKt0WSu<2h}CxUN-9f)o;53ic!rTN4*-5)$# zy}OJjX4$0PNNJ97XBn<uQsaSa_kM~=r|2&4KuTFgWV=R6}loS)MRTU-)p z#W!$9zQMj9g210bgS@Q@HNZ|^uuFg_PG%(pb6U~XOt)UxmL$8v z4#^VETcdecF3%MsBBDjIX1?UwVQnbrO4BWCwM8ejt*4iB-5MtM+hgM!pkpvlPvgG# zC-ZLzHnQ>tvjx}KH>p9N%kGjIeVTp5(igcG^$wmY-h-Yp=HIMkxApnPZDXXl(_CQm z0Qb8AzO!0xb~y14Q0-LPy-&2TiSsNNX?+H-IT2pLqWDB7+{`d4TJ9z?+ZoZlIm3x~ zBU!esXg`ie;kld;M8g_@Ti;1F%qw)6vVjTmRxfE=-!Zw=9%pO}`^VGA922oD+4nw`X}<5R?fX%SwytZ?&bL+q zRk>*rYQ~?l3lP(28JL`~BJe&qC-^j2*vUwxdsFP}N3@IAX)#id=C1BJV$f*!Vribk14X>|QbjDnG^C)2LHrWNf#Q zZ(0Cv>%VApF91V-0#>`r?A z+~dq;f9-DtgI`-GEZeH#k&Die?^DJ@( z%Jb$CZxd~y<#Ya$O@qneTL)~nrl@A8^dCbLrGIc-FtFu^P;Grei5U@POu<W427Z7SUw*>T^zxHd5$4gGU~=-KN%n{6C|<&D7^&rS&&USk z8ozZ{%StkUD$5&tlf9aCHyyR2sU7;#zC*Q01*@91$};RBZVm6bh;PG6yHQRm;|60B zW44dD5_u|Y9&5*SZ$;lzvESJ%vUqnJCsA>4LC#}@zj8uudkGosok0kkH2wn(Y zvzxPjK0|Xr%5UK3t;0HdM!T+;@Eo8L^nrc~@8yHOTyJew@fPt=H|+MoyT1pc2CR`V zVeF&QYdgs9R*ugu;*OvX5IWttQrZblA>1<;`IDP~QRV zi`_}&@K!-Za<*@HWR>t3t)_e0bIiBKC@A-6V?Coy!=2?6#eYr_Op_q2$KYKJKoCC! z+Xv4D8U?G`mz*4O3sIeFaylbejdSNwcaZpKl=8GTv$2=8LeJXNM;qbZJD#07+>+vf zzL1RDZcfE*SGSqCx!T4B~d|ej&`6TG?sanwyXago8T3CaBEu8Ys8tncTt3MS&zffy<*lBEx z_Pq})D!MwWwPv2v=3`?@*o35$!w(ZXIc1jel{a&`YdNzlHD9~2#9Uiz(=C~BAlQnt zlDaOh_*t>_x=5PfJ1`9TYyyQ`Ig1uCP zo#A|6hZA-oG#>v%QSuWCsg23}%p*^klF?r*Z#wl*te-;e{}n5HN$j;2N+|oSwN$HR zv(h@}^prk3>_@T5DWTmoo0>luao&U^+ruOE;qLEdf8TFrYuL+^c{Z3sv_WLY>xxEp zCI6scDORVc&Qxo@RnRIF{5LSf%B*5~PI0FxD3H6HmK@H7*|lPNZ=<^LPA_gWG|K5k zMR(ow9P^AZ7I5;cVN_rzZ=x+APIwq=KZ*Kh4lPQ?+83QWGA&4*HsA>k%Y#^w!>DHf zS5=2Oc%0L@24ZXs5rZqRXAELro6dT7h*-dLo_-VSdrfye94g^50z^S>xKnyM(Wrqx zu!pe6-L|?j_RFm)c3Q2G$Lq^&9)fH1rT*SbZcH^-g%3|s&ok3qCwhgg_GS=O-1SC` zC%Ha?nA&Kf?hgW$0y~^O+F!D&t)qAODYzogKe)v<$*Y&(w0*&OD#_Tw3)2Se#P<3f zBGv!sxs3ICch;mHMpw@s@;Vn_yO6MB6yS`N7p4!tECqt4n?OO?xyoFB#nl|S9|ZkF zX6;OwpW5$ToL&Zk^SRD5{)je5EMD7HSg1#U{%8!^FDb0uqhJj_Nk5E$QKz`<#5sJd z^OYc6sgG+4wFsYjzlqkA}Umx)4HlpXq(8{8VhgCGR@YmFeh_?@6tHqb|d=o zk-X^~*rq`-$OTycEI4P@ICapT4RiLOb9%wrVNGN1FLvtb_q=z!)5HO%vAe+7Zf-{D z=41G*aG#OP?V!&N%kRBQ6-cbH({n`BmODWHeUceaf7&H`5Rp7i4pn)W2QOT%xsKB!KS=EgLCm+n1*JE z30fl%Gk>viHvmbLlvDUmcq!3f63%gkOK|oRw>d(*u^d>;H`u@>*4A3g{Q`J@OUU^* zXNJUp)$9f@as)BmGORsO*umHALOJB0tOSeE%H4tA-FlIkhLMD`*+EX<>B;=77{pp4YSW1Zmj=^mp-KOl zU9lu_nA(Y)OpVwzpORbO2}80UpLZG2-mEYOe1~tm82rpx=O6Tc4m#VM?Qn_|V@*70 z&$p}EFRg{NzOwvL-{C#$jney|5#LC^<1xJJJ;lQ+_|keh6PvtgT=Z@-W7u=18;#9w zqL^Iiv?Q`$+Iiso4Lh&v1cT=TyMkTpG0sysy*D_isO4Vmtdt+=lS9PXGl?Z=H@-(- zzL2P)UDo3DgXUoK40`T|*a;?yiC{b@vo_2Gzp9YH_uWVA-Dgp?zDJaEE6i1M$sHY% zzfw!|J1gKYBER1gugU<@^AcYb=krqVk{Q?;PEf1Uk)Ot3AM1(wC5K67AoZqsQMI1Q zjA|h>Q9T>$q>^Q*+NvhQsH*SH4(U?aHJ7u`G;kpi-o~Cb;uKc1QXDY5qb%Lev)%KW zC|wIJ1A41NjXPo{*6rvsM0Zy@3s`3k$uI7F?tj-wX|J>@;ANe48W7#=2?s@I_6Qe! z^6pfjSH~~Eik9vRB5*CJdl^6!D+*<{_Id-9?W5okYe`+)0@So$z;J$E+eozb2TpZu ziAE_j_aV-BW!dji5G$X=e6IppRIyz~(J$A)E=~i9JdvozVAh4Z>fFZ%lz4dAJIzQKs~jJtgOA# zzc(-IRGv8~#p**~D@kLlHntf_^}+a@MfJ=2DRE6}FGlJ^bT5dqBdB?ulB2?`*qF|d8q+oWVZFCvPaNfgtg^t7TcobfIh z8L|vu%U;5lwTao|5TEXh3f~emGx`w4c|jx~Bbkf~#Q288wpI<>xJN`ihTOq@bOkOD z_vwkZupLjj6COr!yv9`29u6jRkePErX_%3h5ktBR!gmQhd=fwVGDx)_hzX{l`ZN_@ z=xX?;^DwUz!}af=aAV{V*%!UCD)8km;CU>k1@k_MKGvXeU%Mi^yGKP8qo$GDxNiiE zj(QWuyOI8_ksJnuFfm3Sp-%;2wovAz!aD+{^B z)*Y#KyhT3gC2AChu%T17 z)U!@6PTIR<6H&*U;OS!QbeqbN+Dc;?d=JSz**R&R*WM84ZD*b_&T7@QC3;^_HbLr| zZ;^dk4D(ht?S%HlnGwt%oMewCio(elAE5{N=Inq88UV|>k1J=Gg@-=>MH)&vL$;CMR3|{mV zRv$|ntyeO=o}!*-FqEC7GJJHtRJV~xkhj@G~=p=#1dR3wgGA4Rqk#F2ky?q<~|iVv{sRHG{JqgH`AqQk{pm7 zPP3oo;zZnxIo1=_8diTT3$coAq7(SVltg7uk-^NuPd4Y9qU2vrlG95=|3|Yn<;OR0 ziQOOJBy=Bd=ppNOeOB;I)Sr~&y63oCCAb;0vA(b1q?jIy9zl$6w(@f&RoTqM=CVBF zUx$5g4!kMRa+G^tOQHABhYG2!6g3UoNHE?Qk>+?JGvh=?Jz5{8rxSUJ!#tthz8rX; zcFf|d#E|@8z~*!ASq$1B6}!?e*u^)TTynvC_8QFOFf2=>ZuonAysjY260n0Pu`V$a z8Bc}FDzaubsNkpys{{BaR^&Cjma+0HMZJeS_W<(S1MrTfP+_&49?L*P?1iWU7W#`@ z6Slx*pxC}8FEyJ_%_Htt9aR_CSuFdKTb)6b%y!~p72&FCN2W4@O4j=L%*Uyz%FijO z207Fm{4F(J=6dR-2g`qmFvQD=_~HFn#RgylWtc_x@M12(yYd}7?f2O9*ZKsiwspOh zvBQWpN^ridq?;&>*8>YbO3dLj(hd)}F4x$|r#jOUiMZ!S zw><0HGO%3p$)`^z`!@lvunxU8jA%h>p7jcL*OR*)Bq#g@zQQu1jQcHn$p-EnMvcfK zVyYX7I8~=--l4Sv74Ty_r_qGi^vZp!AHtKUD6GD}nkevC? zcoRkW$zqHTkR^YDMfYI^_zG`4IXg@$u*JDp4ZNIZK1v_aii)iGZCT+8F}Bs1%_GR^ zPbQB0I}xGU*#Ba&T)(8RWQU1HM`s;-@dPkWV?|Tyr&1(Z|If5&Hf_8_Y;X>}SC@Tv zKK(wEY}j-D(wuW+9_rHavg)WA)q-`sGB#3yyx$XSsR$lq6eo&D^w2|IuZcVrV8tuR zTBa)6O7s0!cn70l4)~6K%FU-6a{?=jM_HJ6rbZuYDUn8#>s@62uV7|hfK66u4tAjS ztTZQgW$S`y{&(OJ4w0dJ?$)IhsqhLf!S?VQv$G;6m&f$jT7LSQeqO^#N9nnZWPYpK zrr#NTrSWiv3d@C9zzF_78(V9Stu!Y-(g(Xdihp|$Yn{a>Zc>-$WxU()#3QNed8thn zxzXGiDQ>em4G_CTZ}A*u4&}{kV(pGd#A~!>mrn*8!x^qo0UxkB*KdQ}pTT#^K*a4C zJ6CP`E-UjQ0h~(-Ja|906hTiEr$1EXrQnKtndM5wq&mG^n|S3gY-u`Jl|R`V)ym(C zQ)_$rb1;UM1HHa@!sI~dIgs0+NZ-xDQkjJ-|gUdge8g`7JpiOFKG z_>5p~98~&d2bw9>=Gi&!1s{L*-TD$rF z2zT5=PrYEY1yNbWjIL^(6>j7OtBGO-_?Nvj2fbDi>neiJuD+JwUs3w6JnM5KR{*Gw`Hlb2 z=QWS@Z9msI&vSpIEh$;2>kyGys$F7c#;}6Cg=yjlxtKH}zj&Qk8~g`joepas$NF%T zed#1E*v~yS(^qR5jk;7cDqWSq+(YRH&ET##>G6HsMXh5>=`xN8kg7b+$81unktwPB ztb}b<UxmzdOYYO0vu|f+S$*t&7`c{?c%&owxeoWP!SiRQU*0i8k7LQlu&I0W z!WLeqx&KV=eu^ht!Ef)L(T?#5=QAok^O*f3fu~S7`wO)B zE`7C&J~+>~Ph$1@i44UI+OKMK|7Jy;#rn0L{dFNvJCwc~i&akNdytFFEc_k%We@Q_ z#Se2`d(7^glyhE$h-DAk4vyg<*q*VhEp@0&R!TC_jHS{8-o_Q?kR_i^AO67|r}CP} zYc79Lnl@KiVU%WNNXt`=_gn7f(E6mD z42*$b|9(TCE4A5>w#pN%@i=3kSYGBcOUGgrzq1A@z2_Cor8zv^9A@qqPNsdyclKac z8Ntt+$%I!!3GFQF|2ve8K5&AHB3GE7I^+|?S2hz#*v@MO`&40iE|#ZP`h9!3@>IO0 z-&t)}(SxeWJTxBj_-;A2d6XVIPtPe0O{Mm$)}~Li_8r*1)L4doi#2(sL8&+r8{Z;k5c^B7dLQy;TI}9ed{o)EYhb^^b^6&0~dI#OY@M zyI3g{A$PEo9>$)Rvl`4}GHWwiS&GlFameXaC_CHNb~hoN{D z!kLA+u%-O;UwL|{-2e1nLoBa0tGrSRs+!n)Rk&j@_70_Qq|ln#@VgZ2iqe`b!q3V- zdCB^qSYebh*5Sn2rFQ0*csRwdX%qh@HFpkT?K-}Ynxi+c&H(mrQEjiZ?e6ni_2~mV z!z`X4G?!LmbqlZr<$2BGy9GSmc%HR4w%3pEw$P4Jc$8pFh@xbKIL@p-FN~=8QYt* z?FDO6AaMmOj3tG1lk(6Lxv?lUBa0^Pv9;*0(418|*VTAuNM9*0@5;#xFGO!C&5dmI zhGJkb@M07blR|qw#Dby|*Z(KjgX#^%FQjyElQ6#((}Q9@P?|`AL_1Roz_;l2)A#~g zn16rMZ)=#P^Rc(-%qi7}GniSEvACb{OeW%+kH+$*^R8p~R7I$eUd(#-h!rjaHHYO< zW39k<4m-m}PC37_qqSrXRixL{P87!&UE^-+>8JkmT36Cbn0aSyzmgvcPhzg~~@!v${~?UXq^uL1A^3 z`cZbg48c|(drlZ65bfHr$!hUQ@i@zKqW- z?y-`--Or4@k2NS>55>x(wAGb*SGmO5Se<#jmfNdD??h?xsP$XvNMC1`Ut-)3F!~CoyqzadYUzrz>LhdMJny*kKi2#-am`k% zhSIBU$x1ewp4!5WdIV2+6TaPi;-9ne-}>P**W!sn`emuv;bO7A%ZXQ1S3bs`s#b~6 zZo7!@S29kESSyr|XDSj-aDHhUMQM^!Z=@Vr~iV;dNRJ~06K1lu;i356C^?fp~7UJKE z!bAU0vjr#vF(uWbO8YLL^LV@n~{pe)QKrKB0s$WcmR@42#4R8zW9 z@3^jdn&-4esjDeoFQr-bi1w&)R9b6_Q%B7q#l58%kwW?~kyvUfEKup>g#IaYGNnGG zUP>cPX~rnU7!^}d8anDHrA(iSPb$2;;))4Nv{0S)DRvvBZS$E|NCQYQZ>jr+#x|sI zqtvyOu8vx16f2L?08*o+&NAw|(AfQ_2c*^vbyfA(|K1;Z&wuK0>Qm}E>Jy5|NU4gc z>H?)*q$2W}=|N>n`4d+u^<+x3OW9Ta#Faf0Pm?0CACx|hVpvm*Lk{fVjNOTVTvzIG5sXr>K=Zt{&RR2K}%lh zGa>#m^;AlUERH8qx_fYIiilQ(mwpi-$N>N%HBg-Z;Ip2;AulFS&BnQwMw-!39bK6g-P|5;9Vj1DfMK^&rl~@ z_0(!!D?3qN)k~dsRWF3TSC*&xQoTFGQKUGvJc%#WdetAQcT|7;|6XeLE3PQT$D~$p z#SoQ({z%4Wl{yzM!by=;u?a zjW&DvPBOEqLM1uAVGO^^fPVXFX6;ROH+4=|_K=OcK4*Tr*x6HhPOSs!vF9Y5tv=Ax zN*PtPHJp}FoynT3?xwUmRa`*%YRV5#d#Tz}6S%4x31xAi9t*7msW@ZK730KMDwV*w zlQ~F`M|tlbAiwuAbKc>XevS9`3csxzvDlNuJ`y;CPr)}i%g)-06GC71G{x$;gB|k} z_b$eHr#i2kaPb`AiFWYs5pB(!Xk#`j7`C!PpOM7 z#t@USWfr17qj=K(_|mo5i*(v~k|)i^UZ9k9i!&0B*k6>g>SO$J6+OwzxL;Cjp@jTyis&2zBx(x5P z1XiYNEyyS5BC=V8cDlqdByBRtJ6^_O!kMLIXmw#`W?Dx09Zy^!(IV6GzC)aURa`-_ zs@>=5Z!rheKBV;CzYud%j9hBoDgDnU{PhLTrq(J|E2R{Oy;$BI-sk62;jD&{w5B+F zY92h3!Em$nN7SSR&D}yyGUX3tU0r zm#^`?V)XpXEKn-dLB7Aq-;QEg=Mw+&giolb%N0IzlkXH)naXAAiRTx=Q-*V|v^<@% zjRxfImJ*9nQT`A3e|w1Kw^{D@QpV}c*S(2e)fS>EjnRxKONM+K zcPL48q%(H#nZI6P43wVwDrU!8BA6w4h6|kKBs1n~=0`7jE`d2P3omjWPvoa!FB4<> ziYv{+zyFJ~0Q%kd%Q|-V;#~E0!iqjq7A1qn? z4pn+iL2C8V>0g)6ZNY}#F=MF_$9_H}o`VW8swrV%dP$W05LF&GvFZZMic?wD!&U0E=8*Ag#l0)gmf3C)#j#{y=hl ztu7pJlkhZJGz@N}Y_vWFIhX>X2N|Jg&J8Cx%Qfe8m;+0DO8(_{SU}f;}qZ4Zv=W-3d^F2uLJkCyM6cuWhm_et|UVnqaz-8HxoXba6&=_%^Nby

aWR3CPK znEuJU=tI0YPK(vf(CT{hL~@?94fc7F5x7sB>psz$QTXJ_x|E7v0Y=)x{C>||97jw$ zFLzWNy}x27@0jP?i9M(|#(2&(1}j~CMj?*VXEU-dandB)*c01Y&bnTcb?Fue>%U-T z(!jmspnBv1dD2XbS7t`4CEN>HsiUi-C(&P1WtLS>4R6p$I3}L*ybZ)7R=FfR^9Jr+ zhcT685Pv|kE;D>XFC9?w{IrWi@`87$qD@Tf1U?y|JMhjCg+VU%#_jff&W64GDVE#tHmsXt5rQ}JgxcA6N_LE?6$aKD>P5H6Ro?sn5lkZ5+v-coAG@lrB zIrk#Sf#psnXPF&iHJc(w9W?G z**Yu$^^L9YTeNgPS{(!B{73y^RvlScm_`*Ua0a_4Y#EFWRnaCiuY=vT6{IY1ZJ7t2^_qOtG#Ab+w`XK<7}+P~yiS6~Z|WKgbj$D?)_ zAUkvb9;wZ!k@SSICObR$D00qKweDywBe)tvz{y{!I#ET;|s!v*RR7wJ3o2owz-8jp;f;DOpxjj3r3vA;vu z?EcpqlD{5iqNm(brA>l_oVue3Tl}|j``XXpW73GSbn0K#<&-}`GPi60F zMO1XM_*V?nKj}Ic_3l*9tRNS3pE|(HL=JY5TlQ$Jc=BSX5=O&2G*Ld0`N(J<0B0KG z{0UOoM-9G)ZH&ZPGf*3P5aib_=>rE*f_!l=Fl1%0sN?9{un&lv#sN_ZzddSOuOmwY?FRltJJ~^Xk_?Np(P}sz6|GV77I^`Q-k9;@Ae< z)4tA+)D|vs#!*Sx#>i^EFrMi{1T2L3L|w?0!c%38HIC@>L_@0Y#%q<@ zv??e|9CijcW5K1(lmCyTa{#j=Yr=4!?!Fktwry*JiM6q9+t_epV`JO4ZEUQKIZ4L2 z=sx|wJ^yo`$?o9x?LMee_0?BjHRt5Dv7lry2Ro3t=!yq7Td5^n^n*Ob9M*u#+6^?S zCcK$jA|>pW1;Wc&RtuJ8FKRRP$gE`Ada|pwF;d6PV~on*(#ct|lx*S!EW#&P&c$J0 z)nd0@;V)0X)gBGg`2(E*JVpRx+XT(1%GxLWqhqmSt*&Z4u(d5jZtcFl#<|e-%DGwZ zEH24IR4o-W^ID_CZ0u1V<$@XQyYCt4IqGd@BuGPfCfuUDl|YA$-2O0QsVprfXlwNz zdZ<=F%%RRLD?FLv>SOJ^{*OLb+b0ZWvJ&;pv*F>kf{zk`U0WnO(VyaeQWvErWYcZC z-NQOZADTk^C3#o)LlP#hP$%(eiSUG;ik)hIZHLxaTd0o2-`$v$xqMGOD%?RV>;Raf z)u=>?H;=#>+Ju#U1IJ=5{L3k3epnVglFom07I58%G*2_?z{`3$2~=zALwTx^uM_W%V&98_CT~=s=igWR((4WeI<6?*h*e zZ!%OUE@OFXC>>=>0E~mdzONeXeKIF!R zyhJ549So+KN+~NGW&B9kt|Qo?De;#t!UNRhAbJ=%>1op&4gFu@IVZ#veD>@xhaPan zPw;AbXrt8RA}^j#5h{0Ta5C2uU6c$~LD`QQ@su#27AE<;$Kc3S!lE>R{rpY-NzCA; zM*FLYmY3NIucZQffh%xIa+%ePlk~^DVDz^xsm&Y#&N#;*eYIK~^|DqXwf@MN&;6B- zp7C0sQVZqF60$g4_C|UxJ)78Xj`Dq_GiYny2(z$qLL?|(hJ6@1_xD&7>=( zAFj(B>kECFhEO>iroGlXI?l5uk0}=WdjtH;1?oZARcq8m%3oCFne4+a)ciCQ8AY%% zTINEdsJ+qH{2*bpP{(%>HpVl!ZR6!$Ycg7J;o47aiS|ydLnTNvc%%EN+lo}5t0vrr z=h)rtW=pvnl=cQzWwxwgUNuZ3r}+@Qp7N}vIQYjU%p1mTYQf_0FG8tw?Zn@lHrf~( zJzMipk@Z^7=uAsxVH+)*xG5(pl^jjzWl_`p$kj&gz-g399#TfBs{TTMuBBG~@k@GL z>Ha+CQ0q_l0hS!?uj9>2ox>0#kM&Gg>Q&S~2bmr!JLk$M<(1aQu~EOTRuQ_A2Op%o zn5*5>C+a1%iDC;qX^x;uwnF(rJz{zh4$E;9JuKfD$0c>EsB5&Kcg{w-J}s8uQzB4( zsIFDZi2UHrN2%5HiCWqOy5>pZzkir-GVDqnxC^0FJA3K%G1g2E%W@9>@m)AF$Bjw; z-Tu60IwGKKC>Dg9PNM_uYN|8m4`J|LGk*AVQGq9)zs>aFZyL>G_5kh@Ohv3E|@kpq*&u+XY+?Ii?^Y#ig^l7YFZ+d%2p^{ zQnr|LP^Zl=I;wtD6$ay7SLF4OdfYAd-nCqNn;vO`(p3S<(c*NrfH#h^+BaI;6g# zBA_vOt-A1A=TbQ^$y{$#MEc2xJ9`A0<|3 znH-a}A@q1%j|Ubf`zm`?D%`36c|upyvF2p?7Ry{xjKE5NfU#GKN{ah*;n_xo_Ex;u zGN_!EMCV`vs(}TpvsmIS=>HZI$&^oYO^!A%Qc>QKNG+xH&TM1W!A5j8>(DE3G+oiM zaMD~hrlF(o%V@=D9I|ff*`4duspuUJ5^I$$>UFmjw9pkNo}v-lh6X^zwBzPh*{2=$}UU+j-OY{xzsoqwaWs(gY>r-^N+9lCoZn zR!gbb@w@6O-{l0Qy_Q=~iSl3?ZMaB-=h>7Kx;otdyA zbDlXv_JsX%i*w{LJb%-eZrU1ZMa+fN!#wjpHVi99OVm55ONb@r!bPjCpK=uo$mJX% zexr)nN876xqp$IBJ-gD3Y8r#ytu@f5?yc-KGx#_9rW;Z63!XwX`dN+h-|@ZjYw|t_ zL>>Cqo}ixMx~aki?uf5&1#i^mOZ8J;iS77--L`<7sR6t+tu6j&; zCsu)9tfKE#92Frc)C^)LCvtcExL?=;M#{>^3iwAJVLWnChk1Z*Q}@if@-6IjKe)la z_?au@0Jvr2z+i?D0X>q@y#FgGPA9zAad>05h<)-{5Ans$(RHx{Rf64l#hb0EEB&R6 z7o9{VB{kJ$8T5LNKj<*tP&<#G9i$F%baocgzq5xYgI1nV{#A!+v#E)?X+C4ex$zv! zp*r5mtWTv`L$f=x76XHHn^}?S!)InG_C*|7-Ae5F65QzmIib*PW2O0;6Kk5DB0Wm8k(AHDY3zv3ZuIxms z=h$ll@!068N{nsae_X` z;iwdxB(_ZsSM?~d%|WU*vWg|-_`4Ay$G`-x$L_Y(fa-z-ymDbcbwNCeRxSAxY;y@n$RRM8vfv)SsY@6qGT~2W zh2N7L0u=d(*i+%#`q)2tnbA>Lsg>N>6QYeO%(Bg!X~wKCAObx=4t5?K z%Guy+=iw^v0P~m$##9SDw=!dw7fksvD;9M=_SZBppbqe9%fYoR4F1uN=UNT!vm%L) zFdJmOC5YTCkkI(3|l#+XR14@cQGP*kvZ~^T*UP-;$0@ijeR>o6mwNG*X3-SHdaqq+M z2`Yg-CSW@vz;z3QLYD(yXbbDD5Nq3Jkgf+^$iWB&f*`#HKRd?j4Zk@GN^bMzPNN>& z2d~-#vbKl6pGGvY7M#BsnD`WS@-)tyXikGKTxABRR}c(=Y)XPmp>z|a*e4IH$sp9t znVpW-S#q;4}&4yrlk9WcxW|}t*I91OK;)IjCuw- zS02P;*(|>k1@^JZ$gb?>j+{%q>6W)b&I3jGihW2eCU7P>sZa{AGNPa!sOD$aqUVHq z_ZI3{GU5>iiZVn*$yhbbK(x{N0{I((*K21w!$Ep`5J?th{i{R;)GmP*y1)<{DVxBt zZT+T4SVFQk;wmHcmNnOfv6zHSI%Hcf)?P5RLCLWQw?TOBv0D`G^Z`G4W*MNH4Xx_3 zE}HgbQCUo@-ZJyhgU?3Dm$QpJptMd--T78l@qsGaiL+A z%{HF0IV$gO&`vI7MN|D4Zw^s5pwwTS*r<=v%G{=|Mct^CRYDY3vdJCbAJBP8y|R>c z#cD@nz7kuTf!s&|>km}YyRr)Vv6Bm+m>n&f(kHJn)p5_jZ*St0P%~g=rxs_bGS9qY zHB>*zhvsZ6LVaSn&AC*J9S6Z2NhiCj)ao5aeDvb> zLfS)Wi659j+))E_Ar_#%7-LZ1N{`Gf%+Fl$${0FDd)cyzfMW_nk2|sbdL;A3NXZqqb0nOVg?(BIN)Pia!`FlG(luzGP<9 z5aX;>oCf)c9BwHC*rA1f;WM;ZxCZMi%7mw^a5zk;$ZOVvuUu)q`&dEsptR`5mC*Z%UnP?aKde5p?+W*fF8gwx_E_TebI-X}{~ zx~N7!VvD^O#HjY6`+$nKISvf|Ke?5;9;UpKf7_N^I$1F_u@Zjj<_fc)*OCQ$N~Mdf zJ+g<66K}Avweh-VDxuaF&b=Bki~3$p!>&i;iLOJ1idqjb7CZZfIh#q$Rs;-gC-!F* z8g1Q}t$M8QhE$Z);(Qp0jbB41*nx8AO)CRkEs_x>e8Y1ZCC+jZ-M4Hh{k`Wet=@9dI`K@nux>9nnX%`-!yRjH-v<{>AJ8Zu&rJgC_iTW~Q(3nU%y9 z*2Mraa?Lm)mRdc;C%KmH8U^VB5z5M0B1dCIUxScVQfnyhO&=W6L}iyjhD}XHX6Yzs z?FRFf*o0;M&Mr(v<~y2^{*x6LK@9ytcH(|K@(r1dJaUaP1$F#IRuT2R_a@Pbs-F+|GQ-SHcyx(oK0Lc0=6xmqXbz<=8>_ z)CG7^ZK)n>PDSQ@Jg+;Zq~p#gEW%E%9nD-X!M{94j&eP(oT8GPL~L1*6_Hzg8zC+HYB6mj;piQs zJd8F+4zdB%_~H}(LzOtnA}C43Nz{qK7&oKGmxx7jk^5|ARg}%aplrU(6Z#^(Q?|*Q ztfO7vaxt9u-^EApJgR_M!Dr-hC9m4U8f^Atm7QaCZ6OminBUwd@?NX7Vstm-k?#;y zvC#qOibQ~Ar?HBNljO;ZD+`HQ2T+T13@gxv%*#lh>#1mLV=J<#sHsdNlaWCU z0gGKr#yO1B;vHiN!<@)$f=tgDg>C?erZw z#@K?boM!WhSUM^B@QuS@LbOu1pu5mltiV%QWbII^vIk$_1yzSL(}?-0EygfE^JIIf z=hMq*Y{+Kfib<#&Y?Ak^pQ1b2nTts_Iy;>NW+_{ErZIRhDdj;WL6kDbT9-su<&NA) zCGB1FDM;)TWv8rys&-HC|3+e%GEW|&7fAwMdn~@gK<@vOyr_7{pjYSqK2Td8%M;C1 z>|R;{C}iDbCr=|EJB3*6xm6l1kU5}YRjk9TD7y-%5Wad|<(^sET1}j7cK{npj4{dD zB>$ubZ!0mE6Kp*C!};;IM_6gqF}4DV^@zwK3pO^5NQuR7Ds0|;b-a|#1tS3!ACIW7+1xdssP?y@GxRI&^@ zKBMGeYroopbL8nn4me_Mym(ReMN$$p>2lfP|!WW0yLL+Zzx`k>_FjDBmzT1y1XJ!>3Q zk0_s-v(R8#$jZ&7{stSE%jzLg!WX&5d1iBby2GK_M!vEl$VG&#fG^%3UjWr#aSD~} zUicf2Sqs$o;Eng^-x<^t+#pk3k*dyva8W9P3{MmVmAdr%3l+B|b-v`3PRb=Hw%)f^ zVBzt8u<&dhR*=V9>4bF7^ftd%8T?O`Cdy~$8*U|v|B-`Dk6z^x6tpaqDAi@DSBy2<$Ji#<@Viere7M-#V< zpf0i(SV{&W!aH~j@kDzo8IN;}$|iD@E@Gmz#I7Oy)`F*DQXlk`bMvuWWpyM1oNv9Q z&*C^Z_Dx{T>qLdI)(Nu3dGHKMv5(r~|JeM>a{OD2Xn!AZYZq3l&F>*5>u^xzZPtNSDfr*nA7Sb)enLj!GYzEc?yt*(>`t;1r zd^lJ)IT=P{U4rp8rV;PDSs!&kkn%86V>t(UvD$i&6DbYGkQ-ER1=i8WeU`-zKL)|; z0XMrQnc25wJa)hhdP@#_5I^|@4=6wLc$fG+H=~k*7~u;~WAK#AvCb7)H!ratdBGAI zV!3Z~-%IfI786B|=JRvFC#lLPoX3xfh z->%>}!t+l8#aKe5zL1$&!g+R&kvfbom4nZoimh)>Ubio?S~+qo6WH^8K(P|=5iMm5 zYy2>C*Nq)r0?w9Qk+L6OwJA7RbG+QlD2U|bEUwB={sdKO$n^~>s2g&I*XIik1_&9A zgYd8JVlSSs8lNh+QR`X+m#GEkNfEC2k$XA-4l|NnNnQnHrU0BN7ks8TUL@bxlP;!?ClkxRINbSr;6=h65Gr?oB{)_gXBiOqyA7_{vij0&>bTO{mWbj ztD!Z|`3g=@KdPoXGP8NXu-;MeRDkQuCwj3h+I;j_1JyAo_Uwj%@j!8tu_y^2!4J-M z3lwPpBcre%?W)dpATf8D=h@WFa`y87hndk+*o)K5c?_%8t|9)wDzQ6=+Eo&%87s;9 zdBU}>GV>dG?<8iTBDh%qETznr&K+Kq$I-r+4!&3cl>aVBc8D2aJTPBb-PGggOPSm=;FK7*bhFq%gEZQ$#~a6*N_lDPvrt0m*p9NgBWjs~^NiJC|l$lwb2 z87+z0mtku^Vv*|;Y1sW|1K}uEW_?ZONeaM4`NICSYpKStB5L7(Tw~o&;Jf|ADN*c) z?;tuC*fRz;B|UcU7rd^M#6))0@j`Y$e=yJ1Abcelmye9ed3hVcnHkw-ATaGk zLljnOvEy2cI(S68;Uv~0&L7D6=1toFyRnh08Huy3kX-CCTd!a?7VIFewe0!NAYUrz z!M}`2YF1=ntYkIg0MzyCnRe=l66Rg4#zA9#ejc}>6;`>YP^79YM*7?rEx z_#&-Akq4k%nMtG)S@5HBphxi*T(%x-BnuYT4F_^1h^}2bxETw52rs8S^+{71>098~ z&#-X+aotq>eKPEC1Poa}HoP#WN?LZ3&2_iyWxkRn*u}ZIg{!P#)!1r*-M|F9VB2cr zL6(8Qy*Wr`2NURX;<)u^Y-} z9n~|8(K=L4y21zg19oE{I9N8>8ow;!mw&SMA7#4eY?@&tn(uflfMB!`#`R;^t#dHF}Qzn^9=} z991)EVOpd*Qq8PBXH){oWVAq$svv${No+$lv6DOb0IqtBoON-udlKp86HH!YAxbcz zSpQ_qK?+U)LxwOrE&234pxJcn0hcNaLf0Lntty<;)TrGaBpz9U72eI*ZB5Dx%wvZ) zXP5kB_l)FIs>42S%+r9E^1O*KWK%KfSr{L?nq9J|USNNHaCb9s%2(z*tA^iDlpLcQ zcK#LC?^qeXJ<^r*v%M%ALpocW3wZ$KwCRYaBmoh2LkAMX_!4 zL{=0%UN{jOi}zxnnxHn->S|Km2Y>4mqZ3cX!2&)t6Q4bS%*1eBqqvh;R%~ll(_gR- zf1=O+9PPT|C`28?5AV%he+wp?jk$H>OZdRUZ9K%?_YZq|4l~mZOP3GN z{S6vSbLDcdpu)T-9rIWTMWn*&aW%D;Qu~047U+@;e6#1+l%-LPmTuROo z72N3^d%FR9_W@UGh+WHqeJ(^E?>M=odaUW+c;a30g0^rT?PJAECSN`nkGLIJ-vA=d z73{kQuyCV@#6MHDd!4m3gt-W{c?aD481~js?9T$M&?~$$7dxvMzyE`O?aJ!}etLnE zeK)Ja?uc@g`?G7oQ?k+u@Upvq8=Pt2koZnhSs%lx^!=N0-plpxB+bN|q!a%P2*3t`(lcO8##He#9xpek4Du1fR!NJ(>$9*8v-~gHtR@R%WGr$73GI%K1pGw_Aj= zZ&mQD6WE@qSRcD;-L4>R#F^ilQ-3P2v3TKa@S}6$``pLhT!`%$Kpxo6MlNFw+1eGh zR%LbG)t=FA%>Na{CRvQdT}JE(&%c*{FY)eINq%!8JJMD)d5h0KR{MY34Upk>kS z!i&EOgEN|Exx;++=VN#CLgz|E=QJQ7-l?MmhOe0Px@s9|^NGfv&{#0QYuYsRk2!PP#KsXdT%dd*>V*t^LdhjU!zC05*4 zN%)TudB~nU$O>D@b4@1RqOUmfF@dj%yvFgYjj-IQK@x5;MjjCGiHvh6GF7cbf>@`z zwfuD0=uXE4r#47UNKzrHK!)K@?ASs)biDcT#$ftv48B4PVsGF50!@cF&@%yk{D-(1ZDhfy-J-1NQP2Z*nJ$ ziWvBY3t5#j*c0=yOCQuGjL-mW75-yYxUN#rqni6245G9I&Doo9AMU}`bf5#a93M!^ zM$Ft%&I|kBBaBQRPK!az`v^|n_f!S+#xm1=1b^a@h!F8qSzHyXs9rHanhRj>CJ_^T z;ZC;84eZuyjDB&Ra0#=Yn&r(jZmAjq$HzMh4ZW&QEOH9T`BI- zNsZ2Xo_RGZqifRHSQ@54S$yx-%;pmA?ir(h!uF2yB&!~SqQzt z7(AN`@EPXNJE#Sn<;KbXWJ}JDrpkZHWKl;=uC3Repz1I5jQU$fX}tOX)s3#?P#@w| z-6q0FPP|eQ#{FSalWp)BGcv~ZT7F9moR&LHHUQw?wiOiS}mStA>6JXIrb4fO|VLPSeFwSpE`^U%D?#Jh1oY@tn#(|K9T#YL~J;TF<-$| zW@80AGQyc))t|(sw`R72t#Iu1VXW+4zE;C5nZ`(U;p>WQ&iOHv)24@LNdL-Ey{ukA z@2elum+EmSs#YV8S_?XSRDMLWeJ{%8>5ZIp0(xjvq)Ngxx1yM8*B(~l-cM5Z@fDw9 z3YPdEu$tZcR{a!ViofGv#4NoaY`g5-mW*q) zNt-isG-T4VJ&qQ708d?fS;}yUHmb09WZ=j{6(o) z8OLh6EpLH&1`=iH#9K?L25Usl{4P3;k!WUzqAAgzocv|FEhJYnVdWMQcg(`u+sQeb znO>*tgqVGI5tgYnY`(r4nJs7URFs%3^5eGA3n3hE2I+RuzmEN6aa&W}hu z;?;20H!<`3IW11Z)k=^#@j%;Q+fLw@=O^x)2PS8u2tMv67TjbPdB%Epai7?gjX*vg za?(s?M2diTWTuBfQMza3=Kq6X`&~8P!>D@)D|9w<>rubx%bl5B7o3xwC!J@Uza72w z-D(hh=|7mAjokh+zJ=ZrFqkL9+nwk?XY4Z3+yyZ|E1Gf&2C6CHUWUQVoJuAt2WWB^ zX8eiJiEiqs_2`clfJfnFXC$}U%F{5?vl#>Z`RSZ+*k6*aK*!9Z@;XRkB&zNcwGVU| z%de-`U!Z1uoGTA!<#Y!}8$nL>80TkCSWrdKjrN&7GnFhYo8ZN6krA>Pqgw!P>=s$1 zUa%QXiX&LrcI?$BtXcH=@w~Eu{O_b1pblQ0*9@mxq^dD#_ zb>%!Y5qH!`t()_#dvL&6bpF4%4!F8Glj$p9Q_MC8_{Vzld6p%PNgR@RC~=r4z*p2z z8I=g@v(krNVqUGEc27N|o3i(`>|NqJ_GvLJM#|!rI`UpR=iFyKmtRq?6!q!GmA3@i=Pv&Fu zDKSA-z7K;lU!Ewc57u}HzFiyG8-a2+NLLSYkIW!SYf-Row>gfxYXr3kIuKCKeZiIJ zT%qR@E_vVo$h#nMZ+!Cj!||sR<|hvF2;XdD2z->Kc$i(a!FqAWGkvW#O)bK@*VNhc znemD3>T7iFcdI^Xo&Hphl2bozel-5`cc-`0AD*?I8F1&j`EMDQBn$xcy5^(%$s6Zk z=VyoJ=&7I7wx~J8T`X)S>SIz`cIS_evJjU1Ix3v0>DJPO9tK{cEpbnEG^*#2RsJjv zs3BTW`aq4p4#}*OQ!9cDS)Oc^!sFZJIsUgKMxX1F^iP*@+W~!PbF)*vgBS z_}ySQz++*ynU}cYqf7nMoMI)lS)#^!*u#!~oG_x|9Dg9r( zQ@qW*37*rQTHd$5P%}GS5_YR=^uo^1&T!`^My0m%sJ=_3-!eG+NBPC9jD60FZgK$r z=NnL#TuF6Y5zKWE3a)AJ!QPO`86qCQ9ZgOD!lCqudQ5b@3u_c&g~{jUY4aRo@e)q_ zc|0=mm_+HV@P|?|TH~o|y8*N217AfL{S4MlMsT)y)97V%pvy@ot%#$$qo%W6z~$iM zA)A8Ex$ip*I#SqOS$%&cE{Pu#n=NK#%-gtQiRFFt%aWW!+Dzw87gd3d`_%BY)PJb0 z#3elBf}lGcxX1EZJoPh`$+Vu5!_3zHbl#1LJrgr}GI{g)R?v;4yYgGL^!?5h?x*gl z?wYPgbl(`O9T)ShW9CKUpb=%{LT&b%xlDG&v%g0kU>&>Y0=#a!9xWvvzy-3z-RTP# ztfkYg!BvcWjotM_uYaGecU6Vf92zGPj3U%Asb&#S>Q&^s``dhFQf zEKyaWR>v$#*x+NuD$CSrj{B~P0hio&o!8hqZY{GKN1o*-c>81cWtXV2nW@FYqf2Mb z_BZie^h75diGLnHIPsTvlhJ`Jb_U06SKff10cis_1{8B2c05zZ!lc`4jy1aaV|^3o zV&pMB@)$_`D168~aN_!~mo8udCKAuJATu3-kFl2CQWn_f5`h+`wF55hIP*`iy5Dfg zU*Juo$BQ{Z?j$vM+hTm+^ju*Pw)tTaLo*Mac|~$reqx5s;O9H2p_uF2fdc8 zr%oa?a6r)A(D5ltrh1cXZ=iJC6xWQ5iHl?MM9GL|5l132MsJAE?oY5%P;XYx+1MQw z@FZZr>tFqwTAS0iAAG>%YHn?`mVlb}C}yn^y%o}WcPAW;9~0jyentGJgdN_==6Inw z0^BnLWl((3&cN610M}PNUQCb&{8N2fe5w7d{1^N?jbPcI{McV$Da-MI>9ULN@f@oJ z9?~AAIM`H6Z3SKQ3ahQf4Ag(uQ<0VqKQsp`E7lAq=l7T_#uu0}K5~(T$X#8=<33HN zv>+I^8$ei0PVtHOK(U<98_n!yq7iRwHa`1<{e65tykES7{PV0)j{boQf~$ozPPQUt zM5+QQMg^^PELZyaFT`unYa@apaz>nuNQ@dB-_U8eY|Bd6tR9562+hx@eS5?y2N ztNS^B+G}BYQGL6*Uuk3Yk~<7+PyFuK_OXA(RY_>+3H8sHW7PuAECDrxb_c%+?iw`S zUC(hsy@;xHDI=e6xVM0>yT2Dzg3|mX|0b^bi-^%yyxIS9BSa78aU<-b8f zI$DS57El3?%x4UuQ(`uAp!tWnl+)4cE9i^xF7Q`UmN@?oni0|{bbRu4sivmRn&Mnw zeSN!p<9Qp?C}QNVO~0o6eid;gIyzyAIZ+F8HFR%umkxLoFfrh)tGQ!_c1@j$2JTXI zn$}Ujt_=~}7?D2ymEKDUpJS)Ql#Cr1Kg{#Zf6}_5O>l(;Mg~0&_62_m${3Kwk(>U| zFO9kWg1%bb>E0W@TE<`$t25zs#DWV|0iSAz21o%c=OOWd`r{DlE(d6})CxkQ+f7fi zukn=Hs{i~84JQnYNG!};aRVQ@C>DM@oTUgny(M)0Nr5dn1?FR`oHQpUdtg1`gkElr z#4~ANy3LI2xQ}$P(YX`)839_fDjKs&eQD(+aPMc{|-qc(Mhb_kn$RLx^; z@)!1YO&l2CJGNzvFQ#Suf1d2-K~dJ3EO1P4xsZ#&*MjNu;2=EN9z=HzaEr!bm|9MIq0QH`I`TOx>s#p9)7!dj)-lTaJ-*5Q33$f~ z<$J3GoeZ91UwlNYXYgh1b;bpB!@(z_rC5dgS@NY9b zw`B6A8EU@vPxkpeafv@YJ|tsYvM|J{xMpMg3dXCn?sI;o(`E3ls;gu^N!X?d1s9CE%M&-&cR#WWxO`oWB3WD z>F-g3{XRvufg|>hw%D=K`PO-yYReJScKNIbvx;%Yciy|$JKs0jm<1atOq5Vv+F&i4 zR$BFg1O-{0h--?G#asZIS{xi`7LnR2;-PBRY0&eD#D$&lh#Qh)a>$FsWjoDFo#h#n_#$zcKfAU$Xi$oIsYaysra7P1m3nMQHOEx5d_v*Koj-1TyZht#?-kLN z;^%uk^0=NN;A~)2KyLR+=WNFgJ(XTnZ|NA|oao$(WY7hDh1N!C>aU(~D8?7HF{)|w zAF(SEyuQ=cANnA-Kd4>k_0X8$Jpt973$;Im)4Fby_TTl@_lFu6jX|uFl5&>Wg&wG% z@pe1Pt5z2=PYZY4bd7W8a4W8Sj;-o-%VnCrDDPJ91MhgB^p}N|T1d@|wJC_tcTp^+ z2jx?qdjRpDS8jo`@Ps&MEQrv*;1Ve~f3^`lr^VVd1^@TsxjAHO>XrJ_kF%H zD(p_2yA?t3GIE01Y{4+tLoM)9;_!`iZ1*WJtNAF1_mY?C@ioUtVe9ojx)~g?)3rL0OcPR@JTaG*e;4|L|7!6gSHmCwfjb+;yG1v^in~_*oIK{{>*~E6sGqWuM1;$@|({*ca_vU{taet4kgITyd^t zu6fR^j$Ty!*PxCpwXxiP+ke(bF1t|MaS?TpXRL+EJm(?2F&EF;8x(0WyT;B87KT;R znw^pt@2@JF4&Q9{8;Hwm)`_6ATpDA(zoUPE{~Xs)dXRjCLd!8;ot9FLCptyGLe^jEd?UciPj@+$TaEL9X+}-_u;}ToKM_ z#~J+{xamqTf~9JHdYC>iM*7xzwk7UNobSo*%WaI4@k(vYqsKXlJ1;n{>bTSYdXgm&1H>bBSs#?-!ulXI?Vd8^>IUB zrMavo^t$yMUFl)>mip*U##JNIm~X7`XYtkd{F^W$G15O?ZykCq&5`u;)0ay3Fm-5h z)16D)_Lhx}`JMXbxt|Rqv&Cig{7s~P&8RA$SeeCVaazqvT)WBLBuEcEpR9Yb=E413 zA!rJ>wz zZHSHd?aSsZ<*DbX;yvxFZ|pZCWL|K9;dFg&Oy}!Q>RT#mW6(0GO+LOi9MA0_&N2A% zKT-Hd52g|!$H6yh3;s5W=eAXFGhtejM2FvE22`-nDN*}J#cpV zq7Ej`8g6Xxck`w3HuqHZ-Znm|ivv%jD3#Wou11=%DW``f4_K)#HFhVyja?sIJo-vZ zuXxSV*Sp_)#2e-t;n(mr^NR=Cc4z&7@j)p=UWbH*JPCZ|9H(xVJN)CknkOtVed3J7 zCZ4X|9ln%CeNK}VvNLGiP<4o2(7Dnz&;8jw(OuQG)Uj8a0P{D-Xz8!yyY2nx?c@ve zfAJecJU^&k8cDWdEZAuceB=jOGi`>t2&U?GJo+oB7&WGE_X~Pb&cS9jC0fkPUice4 zas~{i#xOy1gWjye_X{U7-VFjXp1gc!kYhLXkX4ep6g_|s6an5<#^1xY#oN}q!uQ*3 ztZs9@4=k8$SxPI_uat$8CAh1rP0eh+`-w9W)WrUtf!@>Jm+b5~kf8PcX~uZ74An0= z^cv1kPJvp~+2?Wmt@)_%>0@=5)6jUxW5%FnGmaH_kY2KNOcz{(oK%1QP)}*8^^$s! z-iO}jgT+wTxjyc76;HGY%;*Jg_5+_)`GvMBMX^M?B)I& zk%?`{mB)f5hQkkkLq*0UGPzgbO!Q!V-zL8^g?;{nH9n4Oy^&csKkZDyf8>WdfQ^qM zrdUi1e1+8&(Wt9jb9N+FmVhk4Fwyrt5O9b8&zo@Z74LJApgL~acPDTU$ zn84Eyhbyv`Pd)Pzr5OP1s+Rxbf2<{|Qc7Bw-`$mmZe`l|{0)03yJO%^9F*nd~{ zMYJ^t>>!5x&s=uTe=x@<@q~L=G4(lR^U{C!7T;xOKRsa7Ho*WcN(b`4xaxf62=*hB z5{_nAHDd5IzCQd;oW~4@OBOJo&u5Y;I;#p7JqKN+zN;YB5lB*htQ@ z5nMQWinCH?!qh&5_E#+!?hj!@4p3rH>$$_M6=kMXR%vx|gquMf)AFt`agB=6V&oDZ zf=3mCIh;)FV}!p-$?O$`D_31^qZ-sjRZR$MClxcBm+n=gWJVa`HcNRi<24_f{Q+ce zCe=ed=?!{SmQvEH`>1C*V#dk$qM(Z2E&g92xQ43o%^WCeiQ4L8)R+o@X{xBA-6PiQ zYgHhdkV&~Nb6IwFp@x-UwznFJM75mwOh3XO)_b*?dRNXc_oLJEM%jTSH@%aaEh4YD#DKKC-#)iJuO1;dj)XRm7hO%3C^3ZGaU-tI*9cpjMC z3t@B}Ay0(bF(aXg@i6j7P`f`9_IY-}o~62?2s$$c5kej+NM3Q?dC6&qitFf@(f5)2 z4h5&VBID^c{e~W{#31TdPPI6yckbz%B>dC1}eEYIVOq6T9D|;OqxvJ=8)d8;XvMsjkBr~46uh$pYdVSsKha`3 ziWbj9I!td>M=6bECu5Oyk55XcOg1W;gT!X-rn1JoV4hGms87(S3nrryLYH28pqTB= zF3LqMn>xzKWn33|wEE({S;?57^wfjYCgw(;+ltjcf~A!9PcXZxq53E#JF!o5g?^9V z)<=z@vbnfR2g_+h^F`Hn+AwRdv4&d4-Rf5=?CKe;e{GilL)igI4gfF;lgCVxFvbsRca(b&dF5d1b|CI4YAddh;z5OPHSS$XKi z>=%2qV#+I{pLy0wt(8>^G19TJjaoz$w2t#k_tkX@nGv&ubs2p_$;l6&SUF9uFPSw2 z)^A2s2tI=puSN;uFD%e4bC;D@jTCNb?(WFUVm>sp=ceCWqWncpD-z4+vF_0sZ?SxB zx|A}g(|rRY*k?8nDYdmqZn(*Z#9lQu^=%JLw=#+j*p+2rv!K!xZHdqFltI2zJ*E{> z#(?xcPATf0+iNa$ zhuq@tC^wQ18KG=5KO3X01L`qqV%maceiLpr+RA6F#|2{2NOi-?w?+wGcg~nMNT4)pLV6|Hrg=X1KGzLct2bC}bWhG^nwnxl1 zFBn3ZuDwuSTj%M0yH}Y_JpV}!A$R-<+@q|RM`hkdJeusVg2xb-uoZn93cw>IK4#q$68Yx{S&N^7-@%AcneRYrrWZ@eJ&`u6X7m2U>8! zWK0$**Ti6@6729)$~kltH(5I9cuQ>Kdd~G;)Bz0OBr!SXr;`106RC$`$v@!N3}H2e zuqytBTeE|_LR(IeNpSBr;sr#3Zajle@i&#Xmzkji@>}INN3tmKaz37HH0b;s`1if2 z&TS0}@``h58K-DNp0P4ahf{ctn_%zT`o~i_V|Ji9)d@eq!hc#}eIRQznsX)u2GJ!r zeEso*spUjr>?59>gC|9w32P9DKiUS4(MNpmBYbL4yzygrpfUK_$?zM8;Ym*Cs;wB$ zr!assi6B%XzRIc0(_+TcX5d!gv^)nlsu8T0K6oeZ<#VeVHB$=Qre^qSOYq{}P|H3A zjf?t>>KIOq)=8Yx$9OlXU`q^zYhG0*@+`qrvDC)vPU7hI7i%OV=l z&ovX{oq`?GS6L4J9FIrR4DWIr=W1{8c^6S&2^dk|@#!<-*=qczGoQH_b%)}_hGC3X z9z5MboWyjvgKPH>p3)g2$8bEI-zf2Vc#ivw$T5(a7<@_zZ|NFmdN}!`_Mi;QnVZ6# zc3rr4ohq`NRKXO-f3>++vCMQqeEIZzKZ2NK9J6=?p5se$nJeI^-sBE`maE+$8fl3yw23>6A@0~myl|GM>(2b6 zr^{$}r`l&byC)Rg(edo3udKujScN`h+_I5fo5;T}dD6SAlUF=>SMD|gWBwAq{s#Yg zxvs%#*~9NcS+k9}Vh`3)Gg#r5`LrW&N$mHZ$A_rH)jBY<^{^CkSZ4!?B2N$#^k2nd3)9Wf6>bb7H^=#4^SB-EDYg+ptfI ziGFIqhn&D&bYiXpxc@8c+!I)xkwiB4_^iH+RT*qrL*D6NTn^*2E@0LYu+3>;PqyN2 zd%^JggWqn!whUrb+4XgCL^hR(ygKq1jfjBkih*Euq}|8z3F8*P-RENcR%9GW_{RwYYuTwH`wW) z*iT=0zpbNQn1AgWk8D^sjpvTxw{|}iC)caU7-vZ0y2kU97pyRQ9el^SC&P<~XTAck zNO5F^?aD@*ahnNm-tOFBS2o(cxa{r__7i>Q6YY1#F^7>!l_hp33A>BN|GOSV@*Dei zG0a^8*SB@)Q?n9-K|T_(kSZ(D&GpEYVLfaN;u9>&5dPx-br^ORkVv9=yNgN!pPYl4 zvb!!7z_ujkDmFLM;1j}m4!b+e|GU&g@RW9COFHg8E3fqYork}$>w@m_tUtNav|K9- zqmYtuuzTg$U0Qzee0~_*_A>@^1-m}R?%DdCwPKHMBv^`{Yuo)Y0+_WpcH|d6GlHMH zvEp{;0lUYDn=9Cz>g+2<@#Gf2wR>mS?+Z*Cce}HPy({eAW;(yIyJDu}ssG=zB`f2d zG3jOBpWUg(?l)t9rQum@+}56#|MyICvGz1#d%NR`-I?qEJvZz=Gj{ix|95Kn%)Q&a zdF=7|$PIt6qRzX7?vbfvv8|S}DfPvn|eNPONixt+rfH{)yM1Kv`{TdY(fd7fXod7G0$JbpUvNfd|nNBagWSL z@&?Kq!PE;Th%mLKT2hUsI;cCG-dF4(6vtQ>9YO8>;xyh2$~v6&Ai;av!&sh9wbTdm z6uij`tk`O>o$B)BV;T4P!V=BkWKC6jsD;5sdZI?t4=cKWy&MT2^DyjYKUjMeqJSe< zkRaBHi*c~6t6g(xcREkbx%f9absvmz4*1N@BzSa)vg&jQ|ogKLy>-5vwT(yeu5EC^vGr?(C=WFZh=eyvm?hi52o3+6T zd%>7Xt{w!vuRnS5wgB_8;{J!IavY+t@HfI?A;yc!RAx7{RI`{|4 z2$W?6Tk;o2QEYM$oqN>+S~1O`orO2M89cQzQHdWu^#Ah*zszjR{S8?F>+^zI@U}ej zLgs8Cm2RW(%7){AA0QIBNW}4-Imtxr!c}%dN#d9JSg0FvB%FZ){)tjlipO(8k~tYN(sA>#cF+H+q%q3ZWJqxF_J^CEk7F$hCfc;|`o0BS3( zMFSL0XX$MmZylwa)tvu1gyXVSMcswIRcG_8zqPLi45nM24BkiH_r7w*RkIzc9C_3~ zwNm<7{WH}JHT8X3G;F@&L>x~zI|7I~-%w|k4r|eY9nppv3`BRnD|OV5shBwfml+0` z9D|lcC|T7#YD4X!7NdQ{!s=Qhb$}Q`w6qMCZ&MDyH}LN2nYZBihGM~NRJyzvOK0JE zbh2(wx6Mzi^wyjlf8s$k$Ceb}JPn~DbRT1ojyXF4uWy|^0vb?)v+X$u=L|+T0eka> zIAIYUV*qToQP{Setg>M0fl6TQ+T!^Rfj67PC4`ZoEynl-HK;-o6Ik9r5k zP&g@*^~Kr}R1E%QCv_$o`$YXfe(c6EvBQmOiS6h>l<^y}K7j`}!k^4>7jNt5Ez+Oy_A3g;vWecw*c*y1P zh@Nx0+7;Fv`TIZks)mJYNflOIY;#q3>$%A|1`{jZ21i~eC$V~m!k+WUr(nss)E3%b z^cp$fEbq$UTIf9F=%X)JyC~UZUSpCkueX5bWTNnV_f+*g@-H$+SvABRHC)@IC+a`- zW%@5INXw|!r?$8R@x&JF?=|dkB=*?W&3+AfwFox(5~GvRkDO}*bivz$+}svX>Q`;0 z{v8f@7k#s~8$7cnT5SQc4%H1M&zg)`xBGN6mM6+^|cml0lqD9r<@9 z8Q3h~k_}J|IElaUFERNUEc$y^^+PiqwQ~g-uc5rZD-|~TQ2Z%IRxlgN?pg2|=3;}l z!o)7inQYgocA^7WODx_GVzYXdIH6wF zgrhm!jy^X>fc7 zC)J5;0=0Zk_odck!^4fj=2q%Ba^g$BMv*g+vn+#p8})=Gj7Kf%m%5?d+z@`UMvNLq zmTe)Hb_WBkHz45l?N4R4!b=y_@CW}s3Q3EWO#=qu+)k2x$>Wi zYM}Fmt6V^0z_x&v?!L}kI$F%~Pns=GO+1>=G2wE8=1J@8Voa3{m2aYomR$cs_tLeX zsCHhhNhhGo!Y>-Dv*0ZkRriBM{tHqYVXXHr_C4`Vq+7{)UuxrtxfLbxBHA{6y~F3Y z=E&yA4?BLI_=D`(Xw<)6P}7qQOY%Wx#@g8RrW>f;$POc+JvMqSwMVN_lE0|c*GKB3 z@So;ut<|?+;aOz{v$gTg|Iy#kC`A2y5bt|GyBY%x%<$pOJTNNo2B%br#FHwV6G+j{No$8OW*L0l%jOzG^VcrD4?69i@`w zn{nMJMi+_(%0~5re%CqEohPtxP;B6cfKXQ#y_JYCXZRi@R!-OqZ@WUmsl?pAr$%8b zUg@gN)T--$>Yub;)ZoUeSEw7gujbb#X=C7hJJtIjqeskee-&Rc?*z|m`Y`nLRW>G} zm-R>;PN$&O&Oy#Ejwkv|t%&*qyeJ%wdkv~lM{{nirsoZp? zTCc9xn(C+ZaQ&g4Lyu4wif#(tm)u45;awxv=n6|@0I|ta@SY8vvHvL=N)&FGXB#*V zdgB|~UFa5YdKF~_eZWrT!J5s)b4s-SV4t?9O47yZ9SSetDl2{|Y?6n>2}P2!fVIgX zuB9W?F|(HW2&+@jXp0{DUKA^LJ5=}kfUQB7gQo;<4BX-Bq(4yJ7*20s!u+`Aaj)YX z2_F;t`f8ictw$o8_LY&VswbdT(oq{vzl{=lQ@xg+6MyES>K1S0ZsUm0P2n9Z(WiX%oOG&n2{OH^X4|Wnml$DvP7Tg^ifnyW3QC4&SBXq;K6hw z&Pjo1yam)|6jpHypESoL>u1b>l{C~iZ;UoCqgPp0yXW}By(sXX;PxRygVO~Tb46-l zN_AtICpvy$+`ZVWacAQ5c%J!+;*WF?LW|d0>g$QrYC6(8zUgTl(;Sl=SM(`bCUv6H zKtBFoQ)dAuRr&q@xiz!1!3G^t3QCuRh^RD3gGhs-h#)NpBGMg#DBVhmq*4+h-6bI1 z9Sdws+?fCS%>DZM{h8OovO9C{bI)_mc}{%JIp(HNtzhRsdSScB>sqhr1zy-IMRah>a`hPr`Jo* zp7A)dY-oTv$zDQlLicMqcB%oI(juLdF=`jhkH7bqep=tEkI~=Pk5R0ClMLNk z%;8yW%{MQHhlS^cbE5NKTE(2fbamUS%vLLbd4H*Ssk9!4r`TWJgP)O`yzMBYd?Q>^ zo>OrW2=g}h9e;vN`H{%&dL;Hw{K%3R=OgOZL!k-hIYf~P`{B1@4}n_8;K3X z5OMJ!0hftP9)#b<5LuP3OU22cWTk)CUVM)ane3B6{oM>|yUT-Yt73m)A7Gw|O+Tte zh7q&d|66RHxQDUp{9*5BT3(QjP0ZGze*)hIatE`9JmGEO8RlT71(n8*wj(8#rD|*a zrg7W*x$g(x2fmNJoAo*BXeu(Y*t^XA;o9M9;mP4dbGBI$4qt9lUrG+6v{GGdj<&p{ zzXmrQ)r;yCH74tjgV{(gsIvAts~eMnx`7P7Vb!va*!{tdh9bMb9M_;9)H~W|RKpa| zE>lx6gqbCSK@^_g8F#`H&+XOr8m#;eqS~(&k&xQtlmhg4Dy_UneQ`x*4uFsuQsRe^Z-KH~e#Lt!bV~unwGBD564@KcL3EC~kHr z)?zFX&1`hdNmr_AWAvWJG9!y|k6wJ$l_o@I<5}Z)DDwn6G5}5|zxB6y-fWL%wd}o) zOhN5PUM_%Ee_g3WglL=csyY=5_N@9HF{dDW0)iL4H5mzc>8NPv*?3l}V{#tMa|#fJ zc@z8J&%CwX%6IfN>_v>JE*@GLYP#}KFMSdpwE(fyPl-^rfN~AVnXiUoKTsFif!OX! zrvcIV#rXZ}i2N<*DQmL#ZxKP2?l+RbABm-7BgG~0Wt-wt$lTB!tmX+B&#&3l`B19^ zogzF`?k~YEt%yzkz&>c7uyyW`1{!0fvQTSf^!5JBgu@ozcE$^Bk-C~Srs6r$KR>b( z-?WKU!yFf`6&?`IO+2a!wVB=MBmEcs`THt+!MQ9~i)(AO!g@!&qCQXCNG+z|t@=dq@~cTyZ{MfZtqDC?ax^*?hQEVRd9nju=3Yp>6D=QC)4)QJ+~u0>r$QXz?-yFyXb?w^<%pF z-|!Fhsm5@%qvt-cq({zeVwdlDVj{8jx$v{#-N4I0rBGw@XZuGm1|#UI(uuh77mB5v z!J`_jKhvM<)Aftm9rd6>T$I^^lkq~Ag2%R)=rti4%P+|+tDrU19_d`9CT@v3NxP&i z(h{^*>TdXLn({9k`!=+xt<(oOncr!T?G>~tP;Z=*cve>=D--z+a;iNCi9U{QiS@b0 z*ZNVtf!>As-^QMEPA)K1J+W`rk(+4*vTvNd*lKJYv{LOx^y6L#3TYGlwyT4&Y)w8P zojmss#P!S4OW+#u!yvVj6jGYpt1g(FFM&kKcHc6X-oV(OeVE%$%EfHV_PT0*ADL>2- zf^}n2|UlYY2VtsG@jLq*S3li^SIK_yGtx~_& zRNbpD!t-lD=ez^hyv2w<vvAh*iU zi|{xSP+hA+6~r&<2_jZ|BX{r{+D0{l>EuT$>o}+B_;|){=@dlQon+d^$MD@^P95pd zRg%cYDs)i=vQw{MkG=}Jy#O}jBKU7Bn*McXJ^s)WY=h@kF}%o9bhv4UZn);r)u~!7 zBad%>%xHfZ|83u6YA^FD6(fy_Tc1M<)S>TKfBOrj1zrvG3gi!N2t7Bybym_TxQ3dD zZQqNzIGM_;+E9IzVH>B6B;z!wy!oE)NPY<>FO8(9X*ru(7^@lasS)_tWziq~i2Ttf zipbs~EWGL3O0AC;QhOo4e-qvRnZ737(ICaasN{93QkOlLiK&(F;-uGHE-a8FY>RHp z&izN-peuIbHr&$z&Yf<*YissldZui$&QPa6hkD7|%r!i!EY?OD zQ++M{$+4gKL%u@ZJ=%N9pvV*FEzYVGPPr)&pR>j+6B-n_mDx71E11(vx0|x&SoHx< zcZ?j6LL@q*^)y=IkJa?*MqaIg!X$O4vU8PlxrF0&zO|p@zZSQym>0}?Rx+JI7CGM# zZEK;VDxa&5)ep2LTB3G_UTK|~;Piw{+u!WSL45mskrGZ*eDibY+Z}crbW43~gzQ92 z4o2%}YU8g~P%X8AHi??0cG_9B3q4Vo?nq{KIJvp*@O~EO1$h(8zGR=YgLru}oZQ$j zvp|W|qXTLIWgSseK{4Dx3uK}T)?gi;hich~8n2>0tuIpw^AlJ36PVyTr)5+vqEOUz_+0_7Jb3U zO~z^8Qs3T-SYCN$v06*NY=nrY*7pw8zfz}xo!Vl*iEX}*YUIDElX=4)P5t@`^O-ru zs*J`e0|qO33KThw$LPB0~NL$e=mOLG`fqslFIHJWU<0e2#v-LzJl)@419!^CiBn$7#!~>Bja; z`x1GF&&hFU$`0a><;ZtuU=y_ior9;&_d`G$t|boDmFkNbRLnM@6U=B^w-;Ic&^v25 z^Jbv!ve0L{yqZVv@4XZ=B6fT1C4bYHQ^r(vS0oqFj_mB~Q4sgZ%;%V477JAk6wWN4 zc_-k6>w(lfqW()4o9pUq@?1%D2P&g&)L-|ec^7-%H9pmPDq|yO?E}_A>#CJN*ML{; zd{m^?vr?%0yh2>P5;2kY$iXdPcArX(+c~avPy2;#1Fx#Rkkb)Js$dBY5j`J6#-kCP zQZ`tobt4I){lro1;~3z zKd@goArBFcX$d;85Z2*pwB9MAtR2Xi-@uNMK7y^_>GH_z3w*!+oYhC^{#4dZvyQWK z7cJechiB8unNM%~4qAI-t#7~oT?^Yar)S3H+SHP8(!mI=*NsU1tO8Ug9&xPJZh%H1><#SG#$puj)TCToqfS!#d9!^DL}b_RP^j_II{ zh-?-llJ^%JG>v|f?Lq84_smmLnDPEQsDpX%+2ClNA_3{XhAq+qYc2;JhHU!<-rpMZ zbS`_FHOX2)Z>aHhE#fbIn1=qdHjVn{qy9Q^6XLeUMq=LdPEm_SMq3rk3E>0bedY~j z0&cUuFyq22gF6Ci1D?=abEPw0$*mV5PvrAe@pdPYUR>X)Ezp(`Aze&u->YD9yW3f< zeCDR`cj0E?PT>>bDP||ElBLcoo}Os7L~S$?niX`;7^rR2f|{)zp?BW|wWKmNLfw|- zm@~}fRD52y#@WeqqRViq^K?GY`hAt`L|;2;Tj|o)g+1%0E+=9jvvDecN*;(MQHKs{ zb@4~XVC!FT@(>5u4Zff~=jTgLUMdcBS#fw6*VqZ-lgP zqyFU&syIHi z?wKc7{ASc}1 z>Po%x`^>QX$|{A&_bGWYm6NusYS3%^V{N>)m>KS`YHRu2CgpE1e#fvNn%Fz2Shz-4 ziClE%8;|F(5jnm`{?$gC9mg+x0ao*GPz$y2hDLI_>_dmofU;AF;N3=&Lqup2Ijs$7 zXoK>2KwN4Fwwy+FW>yN*891KrmJb*-^nW^L(uVV``*#tlSGh)N+6`p-D zev$`mm=}*U7pKZ{^h1OW)Nk6ismA}@s$tbfzvPWr%E!jpm|XGWv%H`9C?PX8K);@; z&a-gCpg*uN^LA#dK*?b1(6`}%<~VDQodibzvFC}>O7j@Ayia`D{OkSi`RDtRjneAd zk&i7S{6jD{SU5NYN+*Sz(Gz69orhUDrs7BT`supS-I!_2CCZ#^WYLGJ^s91;+Vjnc z;q2ie;c?+)daLZU&QW1e0yO%0HK=W+M@2KEv~ido4ehik>LjI&$B679>vrG#&YXfL zc*i)n9)*kd@At800*Y3gClVG}A1LZe%FntAE=H56)9LLzf;+Q>|~33#-J zv7^)AxFJp-q;W9*-BGe5mGLra;>oVUbLhz_p4IAOK7(iCiQ~>yUwSiQ7i9@2l}}De zED-mlcb}5e_RxQ(Rr;BdmU$(k0oB!$LyfJZNEf9%S=&j*IeHCDH^$Is>XNSL$wcaolAVcn zLe#5QFuR5yg&u~wgsYq1S;d_M&q1Y<)=qB;H&*sKMDaJ1xh$iu0U4g`WVdTs`^;al z|K5XMhwKaoe3YUxSN0WV)jk2kl1hE%W~{L6Oj1v`H-ObRfvvU?3wsrrqAB20;_>z$ zgMxjZBg1V_0~n@0qNV+b^%WgUriZeIu7rNKsw=&{ zW#gJBc1Zaw>xtyk2|4^T^ofytVLN@|i)zoaKEL+7X&inM5S{eV$S&!Z-KdfO$#IRp7KHl-KLNXgtC=n@`pkMO)y+Yo;);veQ;5%Z-l z#`}prNd3!0Phw}5{f)KB{K%|oeqi=6|1ujRVXf^qK%wmLe4x(LF6(!Ve%>KozxO?( zyuO$GS++<=yMR^MT#1gXVV1F)*xj7f^b{b`}~}P;x3Kh*~|RAHijEn9b@>@a#~fP}fk7ux`eIK6}?0 zMc2J7N@r!6l9fK@RkS@?eSI~t_!jySx&j_nCNZVyd%HNaTNEBe*QpZL9J>J4_&p_F zJFk7f8NP+?kk^!LAl>IfwQ=;Y>qs;vJDySvA|qeWY3l(m=BE)GtqO*G3YsmIyy5py zZyYg@?cktd@vXkbO5Q+xvj=gr@%Re1mBq5?Gnf+kGc?XhQqqiuvEhV&lRiy(nEX>> z&G^`uNotzeIHTS3WlzsO8T52sYQ41Uf$r81o-um0m~d=3u6Eoa|3P1-kyG!k)>M{a zj}E1W#zJB$Oii-`aB~w*k>p^{;MUL@^S=Eabz;+vGBLSgBeDBp7y5tnJ<+c+`KOgt zAlxI^FyIYb2y_ZHHMexi0?gZJxCe6JOj z^gytALqO#mAots!m{3=0wNH^ftjYN>9G;s={I@md!eMg46YcHR0WwaZP&jnlY#dpm z9rC>r=g(3;>28vlI4+@3?C-|&$huI0^p4NxKg;p#?`Mymw@Uvb_@+Hf*<`%s?;aP3 z^Tkc`hkXUTS@jT^tbCEW#LWM)Q?PFO(zo%Hb=NGxDOx$4BRnYl1-9Nfrwlp7e%{Yx zzV&bO*YFpPsqa0k-G$?tnfd8@mmC-pSQdOgoM6SHrxLMC7U*8%p59uYs4Y>?EA>G< z?joimrjH@?zbdq}FL(1)daWgeOwSMaBFQ%oR4wkjMDy5B*G57p6VpsTA#eCsAZ(P()z-N8X0bQvx*h)TO zSmX#<$t`40_l38GQiCUh^Fwc&o$UjWrs^3z*|$FCC;v`=e*Xer1EZ%pEs|iznBzh- zf~|u$gK^1yQWC=)Mu`d>irz7vZ$SWcmLBjdIDFX}%u*H~2?zS@?S=zn0H8Huh!0qolqm zx04qomX80zS4&wEzMEd+#p|gBk<7)ZsV~0^G_iJjCg_8G?_ilF_|=$yyc3O=)RWrO z*J#RWo+PYHAcDXQD?62*ZwrIf11&R~WzGnshaOwuNSb=w_$a10&4JIxZubA_i#2Q} z03Nimndx{LtwX+WWAm=n*V)8WLq8dWc&Zhdy~W(Cot|=dO@-+X+fHw%d-Rf8L>WhI zY8yMjDr#;CUkh(FgI0UzMC27HU7o&9le8f5_TtKG%+#Dn^g0R5XJ2sKOb4OoTwi?j z^5mHoqqiOsgZ!M+u`#jSpUEo-rnV6gw(3|SeVk49H7mwSHgAOv1=Y}K^KoR7J}2hG z_;BKo6o1yu$wd;I#$7kcMwSGpr7eE`TIz~tAEXX?kt3r^XsTUBNzqMXuXl`hzfln6 z!w{mdOX)r{SpUp;Vk|Uz=wsBfo^kfNa7r*|=ArZ=>D|+xW^50B1g+lCP8gPNtiN#V zCjYRQjoxv36}5zCBN(bdSn}1$l8*>th)+JZXOK?DN#8 zSLRiQ-%*{;e^^yUDkL(vEsDUA;@RKAvRzU`P#A&KQvXeR`LS(t($uYIbGt zAMtt-kP^F%w#Gs1fNA(X=eUMKFa2NDy-I&%W;nRx{oze?WUd>^67Fd>v%iWAR$eD- zHW!pY0q8v?#KP+- z%joS=8}$2p{K^?<{+yhJYmm3=oSh@_&R4))an5b_%OW;AfJo3i{QAE@dv)S0$b&`m zhFL58ZFr?M+_O{vBW8O1iNqYqDM{gkwsFgSW7H>Bhv5E<*o<-+S2BtRN^_hB%}G{@ z{T;dE0N5dBX?j*^L%mmH_Qn1l_e0#5u{~nGF#c6KI4#U4!IgpIfo;J}q4D8y<^rn^ zyLFEq&Ki-1Ms$`~tRB**dsq09Vy^r0`L-GRwcg4rk*fAHEZVa4UTh5x<8!k0ML;tw zq$7(#bn!3GP34+;TAM>1#y|7`DyO}o#w)3jqs~2ac@rYqQ@BG7qCETYv9pr-TSGp% zCV00b@@X^4+f-AoW5Yf~8)e1*dP0ZXyil+!_}&`yKb#DPTWQLo$OD{ ztD(8UrGY7dO~KV+zuk(Q{wiXs!}P9n!EA_c^O9aroxzL@)Vt~Xv@g|NRA=|GH7l!m zA^e(o8;w*NKcFeq(+lvz!(O>|VYwO8-T?V%>Ir5?N3*PfX`UpI7J_4yQ z4q99TciPh#3&l$kubs^l`;V!(P?f3J3JdVU>k`48Prm&und4dHNT+bx4zp_#t@sGs zt5jc9K{5|xi(Cbv@S&N-oNra}RMr3Vjf@+VC1;i|Vte|^dLQW3)ZFB?@^IE3z#kon z2meF(a%Oz`()8__J%Y`{pV{L*X-W(A8#P@mf{kc_)aayZ-a$Tp%w}H&y3}=LcK0JX z5^gk$TUF>|HOqQPHtHg=oJV2BJY~LSzjP`oW3-j}PkJ4_h`tbX^v}?siNwpaF<@-s z)$P>7cecA(MXV0+b%Y$k=4hN|C>r=z`U`_9AnKNfTc7m7fg{S?8XDu}r z$(|qa411&Fr5||`9Hii7w;^89ioR@xK?F`lZ zHJFC7| zOVa8SIc!daL_su=)EHeSg7^{k@f0GZ?dWNqmCB62sS~+QM(Qi_13tR_DB#UI^7VmI zoecd0D%F3+*8Lh>ibc+NKqLXneG5_T?PU6g!9^c&E;iyd0L``pTAc;OdW^{TFs%P} z=*B%{5N6Q9^g!f-8la!QgEafupM5-oR{y zG_Z;3oJr-ae0FKF5Mx1Xly$1n`LjA1)?7$iPpy`kNrhHp`kc2_+YpmqrWR8B5vxjN z=T%QT@?^)ryx#$Z`WLa!gJ_MqaLgtosV--D5;^XbbYvNXzx);wR2Ld{U?xojyK4@! zP^y5v$^d!#H&q)~s4V>w4b&3UQEs%&ZD$`^+JuudB3?3Qb_!_r19T=zCmLIvJama}CQ`9p>Jmb_rj)E9z`=AB}RIk zx~$QlU|wV8=gEyoXOBs|WCG9ka7<66W)~PBsXxEWd3Ke`nm4HCkjhAzgD3sM^7408 zvX(iBMoWjUOuho3TK)tfw30m2PEaww;7uLM2hv1i`#)Kdn7An5}R`!YGuXP$@JK>e(udp=R#z|%eD>7;bh zT4`ZAHCOdaL>i<%^8i_x91$;Ex{CGGkIbVMa0PMkyzodxo~i))axn3_H>oX~Nv2x^ zhrJt}aG40iZDit0{Iy>>4aN~q&doI+lL;G7HO~#=?o+8%xldhHL9#sks1RxeJ<6d| zd{oq*f!B&)fltA2xDQV1YrKbjQ155%`7695)y*6D%?rL?1H}W-OQw=OC(|vsjr*)N z72oDA_@L)tvjlB*7}VbeXf~c}$ZWwj@KWj4W?Fvv8SCO)cIK&S8*ERH0;{c0zE6 z#nA&**~KoA)!=_}A{(u^V^6ft2Mc{sYuN=Vteh#F)aCLqnBkhTc_rgZ*4TphP=B{<2TGJ?os}+2{ z0cl!59DO+myF5`;ok@(Y8q_O-Z1*5*+?`79yM`wG6OVN#2>GQ*@N((`S8%uGSPAR+SLT78;T=0bw%y

()EA8l9`5M}BIvCdMOl8k> z)`F1P#Y$4C(0KxutRWfW6fl_yc=tO%FkQy_xlMfc89dMc-(p_0t3x$*ye1aW4%RG{ z`j=VnXf)>pBrH4RE zDEKXu`y6akSFD3EP^~-E8o|$6L$e;#4RwL*Ugf=l3^2h#_`zXFW&btkvldHZBa$lg zTgLY?UwH~t{2Ojs0Vf@SGxzZQb?9~zIti}eCHy8B!VvVk1^u2uS3&9?hG%77@?2ii z;J)^7UrY2{H{MfQu$kt;rmVy-;xl`ul!V=RAb2U0Cne z*pGwZ4?#)B6D>GT)n`GtaTDApQ_1GC+SyTjYj-T3HqcAvZcc%6BjLpb$dup>&#|sk zXc`T@^#+n#9*tg!{rCXteFmTPrf#nRoHm2+8^ZlxgMI9dbT@~Z^QlX7Kt6US`xxO# zlQ`F}Lmko6Q_Zo3 zWQw)?Twc@Qg>&q$AT|GG2Q5Bb0UmxI>8*+_@BvafjZ;IqMtlvw4MB?gW5rAbQ(7I0 zPUF=M%-EaMLskN*oSXAP5DAx|%K@Z7W(}`^>n8KH4!ySkov{?jTLA4g;gJXj?%?YJ ze0n>o}`GN_sQ(sRn#cwz}7rQx1$`?Ft0(|j3gF1GRh0zU}v6Zb+^FPN)MhN z;JfbZOs8meuopSLU#(4T6jj8CnHsF{Zv732`YQC>_LN*JM zTj)r?uTJ3fLQrZT&oC0MXu_AD&&w>qVeIKr5UT+qcwzGO1|E7@kbn8ed%bX~Kr5NW zzJRZJ@cvyqy@u$pdT>tzsQ(?d+!3$<2eID1fi^Ow?E;*(7|IPnK1afTgP>g(tXioK zs)U@DB+mL9*V+v=cEewy%YJ}PqtUa!^Rr*kwm(8Uv5N)2EqdVyRFYQ^UcZjEkgi-; zpx^;kFK3GAGU@;8&KIo@nmK_u(|Hh=ONerGAtU-M@)MQMNkl5|c$%sel?9v(G5Fra z6tROJtEbh4(6NrPnaOGc@nEJ?Yh90XV=AjJiWi&>xjP|WA*p?3nwK+# zZg6}ro|JTd2_Vl>gK&-WbuX`VT<3de{WY1*F33l1DvT1bHiGz{YpMBf2kO?3x8EKN z!w;U1ox%1fIv2K9{{V~dL@jN+qaJoNPYG(1*DI?cJ(NLO8}(!MyE+-MMB>x)=#v!! zMb`r>>;M*TKI-rDfY|&4L}3D+?mc2%E%4n6an9VrkE_h;>97W+-iF?V;f}Rv$29nA zD=3x1%&>b#)lns-i1M71=sGB}1yJ!kv}=fuREB(r^rgx{--SQmgNx`KnIL)s37P^w zjYR9*;`+k;|Lj{lm5|2mCoL-BM%%x4>9|Lm+GlCUz81IZksuAu+lanJkuS9)n>Mr5Os zpW4=Oc!^(Vb+p}Ny&5v%PEjjjp$}!BGmyNKU>h1nMuW-PVmtOaZ0J5z1GFbwe-5fs=@hN77yw& z=kjYv{d-7*O0CR6&WKyk>p41W0;f`YPQHOrKmSd9O;9sb7G$Mk-fZ%i-N85JcNT!j zY-l%ewt2!tf-OAwsjR&kQJmS@9I7fdDRY_matl9wJJ_!?N(HRQRrLMv(QAQzT#*V) zI~$BAdMq-MPAHR^IMa{r1kxvCIym}fL}v0(_jD0{ABdbvye|cQsZVY-jPC1-E!~eA z%@lCP1Mxb4p)#Tic+kUS)n?!$q@d3Rz*}EoanO~M*fM=6ux8hz3sdMXA-w_aAX&c= zbFD{g(r8|E{? z?;QAtK6)9wu-XHA>#}DpIKu=rSs5B>Pe%QF=LP4(L+UMaf}z-Ljj$&{k2A!ZniJvp z2|V<_bnI=BNU!#$(0U5~Z!G=nEcQ>LRYSo@ zZ-E$;m)wA_RrUjGK7HtC zdpc`9wGN(i`#il}+vrb>TY8M_>YfqYOn(M6} zorkP1pVN?>?R_Rn&A0!iw(=yn>vuu;zvr~2E9En)`n&R+lj+;ILHUI1D90YGV|wNiVg(*7!Xh5&VU-6PWFGtTkjPk7^v-x}gG< z#ne^w)#<7AGFBMvwRKGS9ggSO%hQNX@6$X1`+IYnIo?*WJ9{`2?C-2?)CBwO4o)>> zJcGKygP`eJ(`~$p5`a5yv!9jd0x?Ui%^cBh;H4z+GsWrsv_fsB)<%BU;3sWHN`_#W z;w3TPGSyz_3@5IZi~g0pI350WE`hFI40j~Z!E*t)-)Y!=9f_%Zg+CxR*JQHbS;$UC zkf?E-dD3I=2D?~-OyfN#KUR1@Jd6@_FT77xdkQ*f0(@14^&Uba=mraINs1mjCRyXO(vyAx^-x=p-oJktVd1?5EKp7M{;*7rcqPWC3%sljuA zrUFg9)fvZRno`yc`#$G^*C}YfX6-QJt=B;`deO{Dkn*3%wGx-*yb$8cY&$uCfk^3*H8|nkzfW zAnpW3QSu~QEZVh@`9H+0my9v_kDzG_eP5>-Cqkf%J#T;&8g zvJf^^{YVzR! zIZiCLzyoBi1JhqWLCBKte7l2Sj?1?%dR!@Ora^miha$eER*E6iMH z5+{F0(5mC?f%ZfvkEa=k{u#Q^tyB*XAt`}nv5~X+8)XGEJz6+d z$e@iPX1Ngg>F5ly>(DoDr!xv3y$fI6A}>qzAlNStpY2S%cNb?}h(4!vu=Gb`1#H0v zFF}W+rQ}tEbnNk(1Hx_TAhN>K#%uYSYgO%XRyEHO-SBnx*N%J7KUrNtuIiCmDCTs` z8{Q;sY@`qK-=>%oJw>(l$_#r<_$s|L{teZ#`a3PDqO4&rHS3$(EZdntr>i)1pE_2p zrL^}XD%+T|+)w{gzfM1wo%DdtU_#(kmCAhem?vkXsGUe{%4{bC3HSnJ`BQRgqpc%$ zPpa`3QH3}W8)Xsn-obS`X)UxpOd@SdR)3OegSAn#4(bj1kQ{aLVtbaSyISu^W2dgY z)VfJ;l(5quFLV^>i05_+8xSblY`k^)CBM_r=z zq`H19(~_@Hqu)*YLpw)Tl3(;@Y9CIf1z5it9sXZY7DmEWDEw=9k$D|-*EO(0Q$SiC zHqTj8oVPtQ=v#k+Y|mI~%JwT*a9SPx6;S_$w6aV#xXk3`ndtF~N;f)glyEjs7t_VI z?S@SFxlJvfO}6Tuo$9Qlf6JHX`7NBCCGeAG6GzXaV&oQ)$D-J;-xAku#%cVV48}EP zz#gSe%0ljIf*`7Ch0I^f#>mTKre<$6TSsyk75xQce%B^r{Z3M^=~H9gPneJ;IkuD9 zjoPFEo>AIV<2`Q$eW_D4m?h&@=9BOxTZs&Cx>$N>TIQ+D!=dq1Y&BB?oG@>ITIuR> zlr7r3#tYCT$Mi;2g{;(W(IMwFn*Ajef2nBa<6!OYnycCUu3*D1TjR{H!d1d=oA28V z=x+N7QRxqf@Q#b5kUdzZJNjsSGnwkQRhdP3POGM`2J;ohPFEryU^72P!wezknA85& zdc*2u#glzC@I1dnj=v-_up8|3bSm+>(d+6LU!5u=`WlHpWV_em7^CXRVT+GkS-Z9R6>8IgL|z!t2%I-sZ8N z#{1(|d;2I|?3ZT1B1@pQ(^5U3Syo_N#*&PifqG^xES*$)r+Jy4K=sXh%*!pW6$Brj zTN|u?26D29p34}^tmsO{D7_yU$6{CllhO7q(PMccm7Tkwjr)e*4tvaY)^2+$RTlqJ zf%MW^ZdasN!WDIgc2JuM-sp3vR7Pt_|A^kY%1pbP)K?~BPp09QOrc_5MecT4?WtfX zNA*{B^!Kk=VShux-y@%(2NuDDxzseYe{EvG&G7JhlY6>IgnJP7z;w_6Q@HoX0Qy50THoaL@^-n=(*u=>5t2j(3!iPhU!Jg{k^| z?_gg&Z%sX)`eh{1u4DBEt3M1akZRR2_k@-QjnJX+C94kC`4MZs99OOzIY`y-aLq@L zkW6g@^Pih(Z)sJb;3c|`9w+)moCWQ)mb`OICSLcoKPRKR%le4A(w){q@_2K|NKVJY z8$n$A4J_P{`hYs6B)uNfqas8yMk)uX(wqtUGM3Iv{o#f@^kaC-sf|VW$bM!Qr26SM z`h|Q?{H`MLj(@)Gxmms7xo2QJ{7yQGnPaq-c3bk2OL-odm`0&OIe{k(cdy2>1Fj#zz}Vv%4)7r z&D+t~tL0F$c$$Nfok7j@Q6dN5*aNI@K%ou}=P|p0nr%%EvkF>l9Tl{jnG#)-8Nki8 zpXd{^l1i4BYG3s^-fv-S&q?^I`#GhnfaHd^uM>q^#w?gW zm?msv&!?!Ls%fg=FqYB#^$!raL$Sf%06)4N9?KS)3D!u*YE6JEW@1G(!D9Z2lf#4U zTZcL~m7Wzl#l<9ei}Uvj^Hm0X_V~?iMm8K|IqxJS}XFE}b8pPDwkd^NR^2B8B>_P2! zY9Vi`Ly1TXV0O&{qA*wSJBovJtc+##CX)wGSrO|CbnRpi3I*|CS`dG}7nzK2?I=Uk zP4pF6tIosDkEdQOgK0FS!21k=Zf&9eW@M``Xo(R>PSS$kk-*+Ue}gB)-Pt zmvqDWuYpgSNZer&yA=jmR*%kY%|N|$^?aya(R5=FbG)Z%JLx-+TidPurgvdp=2ESL z(jYS39z~UMrj=*&N~|RSFRNH5j~#N_*sbt(+1vl_I^U>BZ) zzPUo@0+}W=fT(sO@~)raM-D)*Eu%iK7M!DFuS%Egl|OA`7Rf{VmPuRi9TAs97Dl{Y<>)DmmtOvV=GIPBOx)p!Q(0 zf@O&Z+{9a2k2Y)xwlf6cX@dP3lWhw+zk%uVVK2-y<}+*cA27G~u(UqXiqVPXBna<& z)ZiAjli;~>bQu^zFS?;j4asVaH4m7xt)uq4kzc5zDyhCnr0_a?Ql4q@#X;}P)yirM zsbtBbyv;PLW<-yE0{vbb^zurs(4Rh213~0IV;ag+XDs=>eRPBSlrFAIsb<&=7QYNp z+@0zj`tF6

fzPui`IcV$GY5gU)V(*KrYqNLzfHon*xK(Kku@VcvlEvMP<0AK8g- z@ai`Zui1nA_26_J40nBst}{3tcEYu$Qx+LrfS2++Qo5el*lD7LdC88Jh2BA;|8wB! zM$j(>PCW+7Vgk?bE*4G@^Kf-M zbJa4Gw>%S>0KF%CA=E#7+Pn|v)d3&+4E@!aiH@36F0#inUcCrPX&xxKRUme@GAra$ z^&mQ{47S!??4x;L-G2w2)_||$(2XfVOoyolK7Kg5c`i|{b)L@@Kh;g2sZE$(cbUlK zY3H zVgalJiwsI`weVh^QA^X$IqAH|Y56menjGYHXJhrYfu6mH z2R$cxIZ)lEC}0|%kY)Xq48{xM^4o|#9N{^N5Ye6Cl!uG*l6`9lCC;K1cJhu-xZmrX z9kr-v7)4y@DL%!fXjK1SK0O+}W^=ag1viz2@8!IhgUn7P+q8tM9Yl_waGss#`$>u9kpM5qQ5mn~0*qbr{Hesc4-@&3DFn+yfBI*ISzRZJ)_6nQ#HY@!BH z#U%3hWr%aN1W8^8{q;T(i|>fLW+!htgZ^h1;KRS+58coM-8zok05g5DLL{SE4a87mqCDkUe_JBT7nE(vXlhfc(%r5bT~RM! zVRWT0cTLcsWwC}l*hJmA#uRM%rs%5dtn+K=nuJ7rh6f~=op-Qa8j->68toHZ9+^oa zgCiNnD?|b>p?!8rT#2=AL-$<64h|DZi$#j^LBY46L=(_6qllo*CAYL7nky~B>2!!4 zTngVT=d&_la0%ZnCkJ~M1mkP$V0m&XUNCDnxzAIo;qvm+QbYsd;O6sinV?{{aPPmN z)Nv?%hTO_O?8#9i`!e>UbZdWwIB@}Dx_+Clb!6X$z+^YpC58>=m{J#Ud-54B10{bnKCqihc3fKmMrzjfT-HXs(^ipz^ zZXVB3_<06;V;*u4i;VUnKGA_#bbz%VW3TUFZI@(S!Wq|i>a*;+OsU8gUFT!&d>m<5 z!k7m(@QZkW3 z`u{)Y2~v5VO{`aip9GQi2(Ej??>+c`_n^{ocH=y|^(qu<#ZyWyhslZDB^NaM2tN7< ztG5)Cb@@Ozzyo!PvYxtR$m+AQY~0J?r#U%^YIBV;NYYdG?l}7+sMHv^P3Cpz@CC*& z>$6ykkCn?@3@^Kq##$s_eVgw*WE2zOPMMr_C;_4()ZE=Pf`;uR>=}>iK08#a{Wa1upx3#5js9VmIZ6D8G6`cD>|b+YO|ih zV1*XJQ-_J&yai2a@$6Zn6u!Xq&$7b%aP0%|LD`U}ci@d&JjWx}ewtlS+2-;8Tamsoo;z^*%KI5L>enypscZ6^_bm7w%IQZV)`uJnp^+D&!-UlN{AsXOWnb z?4)=fE#T1V?9eTE<#*2ei%_c|>nzVIGNJW9?ByLOlMP8O1chHf*L$os4bCjeJ!DqT z3s!O)uArMH`xpm}Qlp%3hR@#Pr!wCr3w!c_|2>8JGTlXHu{_~ZXW{V^aIs9dQn7%a z!pn#GoI}2$3Q|%ITAbkI-NG|m<~{kNI}_m(XIN`0T3Ys{9#2vpEmMFyX~@YVw84I! z<`OHaiUx1XmGWU1-h$#MxoZ+s`hcAiS$&42AA#CoY=Af6z^1%vaSp|@qo-KiaVV9I z9j?PQiXcO0*~_2NypN!MC9d%SyIh33UP6oRK=WQY^yZjxBN{{TlbXb&GK4EAVX2*O{Jz5x!P2n0(c=ktdh3t1a zTqUT0to$us9ysI z^%mr6GL=MdC^w^PmQy2x6(({wnKq=s#do3oohX0G$yAWlX;AkO?|&HGtDIcvHKgMW zUaxSs5ce0o`Vbmt=XYh`lWII|Y4*7w^hji1@AG=WKBqwcg3vC+z6+w^BtOjyU8=z? z??BIFs1x8RW!7ovf8WW>ADO9Q!cPSKln^~fMa~|f6WrN89#$;#fU?0G zf{)6=Ge3;#w+sI}_vGDoShJvm18}`e2NKF<|KE2xd9u7*+l*>+hwECb$Y$5lpwA^f z_n6Q5c(>fg-Ss^1R2g{k6;>O|)e@rm<#nj=3h#(xO(L^i)=R1Z9+GDw&&KEF9Eq?i z&v-rOxfQOP9gQmUlM|1xPO7CPkRin;l`8r>JsCXZR4k1J(`E;5rk23`)Z z?;hSGJv$?uKnCBtXFwX<=ALX1xT35|<&G(QUSS4hjFKK!F1AU?D!03rfl?D8a}JS~-ospYyj=9LPB* zFIlI^hdis!`*qeRPvGV1f}hD1edS=q*|~NszgJju1Yc36Z3zvs@zgprl`}6Bo|XAo za;gd?1R3U9b4K)OWyNC6dZV;Ap^%)FX~?L|D2d}*LK~@&$Q$Jp(I#1<`zA6d)49@k z-aAnVb#1Z`G!}Ze^SyHLUXgH6l(dRpO+_&=gDR6RS>GnbZeQ;<>yMm ze{sA|_+4gf$#sR^@_r9@a9=_dB}(7qC_Th(bf5htD|pNmg=>Xpg@1*LqQk|C%HVtX ziD&^?zwlQmx{7puE|Zev3FTSjQyx|rW`$z4$WN5$$~Ar}y29m4xt9F#a4(^%d`>2( ziM?w_e`;_Skt*Q}6;2Sk$WzJxTzZOqrE*W1v!n11(E#qtgHE&9CD{w?OzlD>1+}Xz~!rFyT?En2M)D~HiHHcih&nWb=qLh)l$Sw<~gm}r5 z$l6WTB=>TuAT!zIGw#z0t!d>0^+b2e3Z3X@ghsACEx(P8u2VkaTAM;u;RX34eRN}4 zvAb^uE0AZ=xSmYg6WNrX$aFW6aW%R^*)icKp^W=`p|6}1-sl>X=z3-SB2O|wPwwYZ z5pX1TcX!97gYcKjPqI4MM?Jb;`MmsJcuubGo<}05@=S7fiqxT^$cHNrPW0Mxby=BQ z$z8kL+ubSQ!3bZn2mgI1*ORAnDJJ)ms|$7H9&)~jyvx1{w+dg%ORnkuUw$Ig6WI#! zo&1q!aQVykzjeq;gd^k%E_ce`vI^N*S*={drLXLi%U$wb*;UuWa48~p7pZlBCcG}% zNlru2JMvU=EqQv4pNTdV4lts3m7lp>EWV29wEsBnKaP^s#Beo{6S=Rf_`mxKU%Ka@ zd`j*i_Y$ddc(>44iy8oZQk3uq7US{!goR~;T_SVLIvSj`B%6>IL@_l z-MNlJL3t9{JNYBeELziwaaJ19BaaR{7)du?OyQ zWk=>O3I(`i^r}KzSJLEXLN)o6dnKWUNRl8~-Br7i>B^AUL9&Z}ek;7@=bi4Ji8RW- z3jJlB?p6QWCHdFg1=(5ot#Gx-pFFo6Y+`#!l|d{i3c=kC2_zudbTXf8ea#6mT> z=l|`F%P;a2?taR>UHOtVxXh6xo{Y8r8kNZivhpR7ya;}Xa5+~P_Ge;;TyewA~i_pivvJbL; zS3kM;mnV|Dx$hTE``^0U9hN7PKeB7CjuQElm;4bPmiGyTZ1ClpSW_r*q^R_tF_ zT6xp|o>^8YR}lRpcak-_r-S>PE>Ft)-93^u%DUzM?k7YNW$m&``LtX~{uK(!A6It^ zUF1*nM?9bVNB)(cxVtI4AX-TN_aEQ@h?r-EB@;~__lyK?j%7AdWaE1G? z{LJN2_r#KS${PRMZ~51?21O#|-SU2y&t(n&ahd!~WI*mL^l|AepAhMEcSEl2(qHyL zp4jCrS3mrBX3Nvc876jz+}-tc<%&WH_j7Vr(LnNYe=47n-50vaI^>=%f6DIvXGM$c z={}vj$CVTJlk&H$TcpcfudGylB75ZCTeO4gcf0hIwYdA^Qc-lL?4T>vLS^~1dlJcS LgzrSIg(CkCS(z0W literal 0 HcmV?d00001 diff --git a/flash/audio/speech_recognition/data.py b/flash/audio/speech_recognition/data.py index 620602d8b7..e41427e40d 100644 --- a/flash/audio/speech_recognition/data.py +++ b/flash/audio/speech_recognition/data.py @@ -13,6 +13,7 @@ # limitations under the License. import logging from functools import partial +from pathlib import Path from typing import Any, Callable, Dict, List, Mapping, Optional, Sequence, Tuple, Union import datasets @@ -21,11 +22,12 @@ from torch import Tensor from torch.utils.data import Sampler +import flash from flash.audio.speech_recognition.collate import DataCollatorCTCWithPadding from flash.core.data.auto_dataset import AutoDataset from flash.core.data.callback import BaseDataFetcher from flash.core.data.data_module import DataModule -from flash.core.data.data_source import DataSource, DefaultDataKeys, DefaultDataSources +from flash.core.data.data_source import DataSource, DefaultDataSources from flash.core.data.process import Deserializer, Postprocess, Preprocess from flash.core.utilities.imports import _SPEECH_RECOGNITION_AVAILABLE, requires_extras @@ -40,30 +42,12 @@ class SpeechRecognitionDeserializer(Deserializer): - @requires_extras("speech") - def __init__(self, backbone: str): - super().__init__() - self.backbone = backbone - self.tokenizer = Wav2Vec2CTCTokenizer.from_pretrained(backbone) - def deserialize(self, sample: Any) -> Dict: - return { - DefaultDataKeys.INPUT: self.tokenizer(sample["speech"], - sampling_rate=sample["sampling_rate"][0]).input_values, - } + return {INPUT_FIELD: sample} @property def example_input(self) -> str: - return "An example input" - - def __getstate__(self): # TODO: Find out why this is being pickled - state = self.__dict__.copy() - state.pop("tokenizer") - return state - - def __setstate__(self, state): - self.__dict__.update(state) - self.tokenizer = Wav2Vec2CTCTokenizer.from_pretrained(self.backbone) + return str(Path(flash.ASSETS_ROOT) / "example.wav") class SpeechRecognitionDataSource(DataSource): @@ -163,7 +147,7 @@ def __init__( DefaultDataSources.FILES: SpeechRecognitionFilesSource(self.backbone) }, default_data_source=DefaultDataSources.FILES, - deserializer=SpeechRecognitionDeserializer(backbone), + deserializer=SpeechRecognitionDeserializer(), ) self.processor = Wav2Vec2Processor.from_pretrained(backbone) self.collator = DataCollatorCTCWithPadding(processor=self.processor, padding=True) diff --git a/flash_examples/speech_recognition.py b/flash_examples/speech_recognition.py index a8d528af57..69b9973da5 100644 --- a/flash_examples/speech_recognition.py +++ b/flash_examples/speech_recognition.py @@ -32,9 +32,11 @@ trainer = flash.Trainer(max_epochs=1, gpus=1) trainer.finetune(model, datamodule=datamodule, strategy='no_freeze') -# 4. Predict on audio files! -predictions = model.predict(["data/example.wav"]) -print(predictions) +# # 4. Predict on audio files! +# predictions = model.predict(["data/example.wav"]) +# print(predictions) +# +# # 5. Save the model! +# trainer.save_checkpoint("speech_recognition_model.pt") -# 5. Save the model! -trainer.save_checkpoint("speech_recognition_model.pt") +model.serve() From 3db4dad5d3cd8bea06600a60c6f50112b2bdb076 Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Fri, 16 Jul 2021 12:24:35 +0100 Subject: [PATCH 19/46] Fix --- flash_examples/speech_recognition.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/flash_examples/speech_recognition.py b/flash_examples/speech_recognition.py index 69b9973da5..a8d528af57 100644 --- a/flash_examples/speech_recognition.py +++ b/flash_examples/speech_recognition.py @@ -32,11 +32,9 @@ trainer = flash.Trainer(max_epochs=1, gpus=1) trainer.finetune(model, datamodule=datamodule, strategy='no_freeze') -# # 4. Predict on audio files! -# predictions = model.predict(["data/example.wav"]) -# print(predictions) -# -# # 5. Save the model! -# trainer.save_checkpoint("speech_recognition_model.pt") +# 4. Predict on audio files! +predictions = model.predict(["data/example.wav"]) +print(predictions) -model.serve() +# 5. Save the model! +trainer.save_checkpoint("speech_recognition_model.pt") From c54acf141c81494d9d3823761ffa4ac3d3963891 Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Fri, 16 Jul 2021 15:30:26 +0100 Subject: [PATCH 20/46] Add tests --- flash_examples/speech_recognition.py | 5 +- tests/audio/__init__.py | 0 tests/audio/speech_recognition/__init__.py | 0 tests/audio/speech_recognition/test_data.py | 103 ++++++++++++++++++ .../test_data_model_integration.py | 85 +++++++++++++++ tests/audio/speech_recognition/test_model.py | 88 +++++++++++++++ tests/helpers/utils.py | 3 + 7 files changed, 282 insertions(+), 2 deletions(-) create mode 100644 tests/audio/__init__.py create mode 100644 tests/audio/speech_recognition/__init__.py create mode 100644 tests/audio/speech_recognition/test_data.py create mode 100644 tests/audio/speech_recognition/test_data_model_integration.py create mode 100644 tests/audio/speech_recognition/test_model.py diff --git a/flash_examples/speech_recognition.py b/flash_examples/speech_recognition.py index a8d528af57..0bebbf3e4e 100644 --- a/flash_examples/speech_recognition.py +++ b/flash_examples/speech_recognition.py @@ -13,10 +13,11 @@ # limitations under the License. import flash from flash.audio import SpeechRecognition, SpeechRecognitionData +from flash.core.data.utils import download_data # # 1. Create the DataModule -# download_data("https://pl-flash-data.s3.amazonaws.com/timit_data.zip", "./data") -# +download_data("https://pl-flash-data.s3.amazonaws.com/timit_data.zip", "./data") + datamodule = SpeechRecognitionData.from_json( input_fields="file", target_fields="text", diff --git a/tests/audio/__init__.py b/tests/audio/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/audio/speech_recognition/__init__.py b/tests/audio/speech_recognition/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/audio/speech_recognition/test_data.py b/tests/audio/speech_recognition/test_data.py new file mode 100644 index 0000000000..918c27355d --- /dev/null +++ b/tests/audio/speech_recognition/test_data.py @@ -0,0 +1,103 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json +import os +from pathlib import Path + +import pytest + +import flash +from flash.audio import SpeechRecognitionData +from tests.helpers.utils import _SPEECH_TESTING + +TEST_BACKBONE = "patrickvonplaten/wav2vec2_tiny_random_robust" # super small model for testing + +path = str(Path(flash.ASSETS_ROOT) / "example.wav") +sample = {'file': path, 'text': 'example input.'} + +TEST_CSV_DATA = f"""file,text +{path},example input. +{path},example input. +{path},example input. +{path},example input. +{path},example input. +""" + + +def csv_data(tmpdir): + path = Path(tmpdir) / "data.csv" + path.write_text(TEST_CSV_DATA) + return path + + +def json_data(tmpdir, n_samples=5): + path = Path(tmpdir) / "data.json" + with path.open('w') as f: + f.write('\n'.join([json.dumps(sample) for x in range(n_samples)])) + return path + + +@pytest.mark.skipif(os.name == "nt", reason="Huggingface timing out on Windows") +@pytest.mark.skipif(not _SPEECH_TESTING, reason="speech libraries aren't installed.") +def test_from_csv(tmpdir): + csv_path = csv_data(tmpdir) + dm = SpeechRecognitionData.from_csv( + "file", "text", backbone=TEST_BACKBONE, train_file=csv_path, batch_size=1, num_workers=0 + ) + batch = next(iter(dm.train_dataloader())) + assert "labels" in batch + assert "input_values" in batch + + +@pytest.mark.skipif(os.name == "nt", reason="Huggingface timing out on Windows") +@pytest.mark.skipif(not _SPEECH_TESTING, reason="speech libraries aren't installed.") +def test_stage_test_and_valid(tmpdir): + csv_path = csv_data(tmpdir) + dm = SpeechRecognitionData.from_csv( + "file", + "text", + backbone=TEST_BACKBONE, + train_file=csv_path, + val_file=csv_path, + test_file=csv_path, + batch_size=1, + num_workers=0 + ) + batch = next(iter(dm.val_dataloader())) + assert "labels" in batch + assert "input_values" in batch + + batch = next(iter(dm.test_dataloader())) + assert "labels" in batch + assert "input_values" in batch + + +@pytest.mark.skipif(os.name == "nt", reason="Huggingface timing out on Windows") +@pytest.mark.skipif(not _SPEECH_TESTING, reason="speech libraries aren't installed.") +def test_from_json(tmpdir): + json_path = json_data(tmpdir) + dm = SpeechRecognitionData.from_json( + "file", "text", backbone=TEST_BACKBONE, train_file=json_path, batch_size=1, num_workers=0 + ) + batch = next(iter(dm.train_dataloader())) + assert "labels" in batch + assert "input_values" in batch + + +@pytest.mark.skipif(_SPEECH_TESTING, reason="speech libraries are installed.") +def test_text_module_not_found_error(): + with pytest.raises(ModuleNotFoundError, match="[text]"): + SpeechRecognitionData.from_json( + "file", "text", backbone=TEST_BACKBONE, train_file="", batch_size=1, num_workers=0 + ) diff --git a/tests/audio/speech_recognition/test_data_model_integration.py b/tests/audio/speech_recognition/test_data_model_integration.py new file mode 100644 index 0000000000..069c7a1879 --- /dev/null +++ b/tests/audio/speech_recognition/test_data_model_integration.py @@ -0,0 +1,85 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json +import os +from pathlib import Path + +import pytest +from pytorch_lightning import Trainer + +import flash +from flash.audio import SpeechRecognition, SpeechRecognitionData +from tests.helpers.utils import _SPEECH_TESTING + +TEST_BACKBONE = "patrickvonplaten/wav2vec2_tiny_random_robust" # super small model for testing + +path = str(Path(flash.ASSETS_ROOT) / "example.wav") +sample = {'file': path, 'text': 'example input.'} + +TEST_CSV_DATA = f"""file,text +{path},example input. +{path},example input. +{path},example input. +{path},example input. +{path},example input. +""" + + +def csv_data(tmpdir): + path = Path(tmpdir) / "data.csv" + path.write_text(TEST_CSV_DATA) + return path + + +def json_data(tmpdir, n_samples=5): + path = Path(tmpdir) / "data.json" + with path.open('w') as f: + f.write('\n'.join([json.dumps(sample) for x in range(n_samples)])) + return path + + +@pytest.mark.skipif(os.name == "nt", reason="Huggingface timing out on Windows") +@pytest.mark.skipif(not _SPEECH_TESTING, reason="audio libraries aren't installed.") +def test_classification_csv(tmpdir): + csv_path = csv_data(tmpdir) + + data = SpeechRecognitionData.from_csv( + "file", + "text", + backbone=TEST_BACKBONE, + train_file=csv_path, + num_workers=0, + batch_size=2, + ) + model = SpeechRecognition(backbone=TEST_BACKBONE) + trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) + trainer.fit(model, datamodule=data) + + +@pytest.mark.skipif(os.name == "nt", reason="Huggingface timing out on Windows") +@pytest.mark.skipif(not _SPEECH_TESTING, reason="audio libraries aren't installed.") +def test_classification_json(tmpdir): + json_path = json_data(tmpdir) + + data = SpeechRecognitionData.from_json( + "file", + "text", + backbone=TEST_BACKBONE, + train_file=json_path, + num_workers=0, + batch_size=2, + ) + model = SpeechRecognition(backbone=TEST_BACKBONE) + trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) + trainer.fit(model, datamodule=data) diff --git a/tests/audio/speech_recognition/test_model.py b/tests/audio/speech_recognition/test_model.py new file mode 100644 index 0000000000..3124defb11 --- /dev/null +++ b/tests/audio/speech_recognition/test_model.py @@ -0,0 +1,88 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import re +from unittest import mock + +import pytest +import torch + +from flash import Trainer +from flash.audio import SpeechRecognition +from flash.audio.speech_recognition.data import SpeechRecognitionPostprocess, SpeechRecognitionPreprocess +from tests.helpers.utils import _SERVE_TESTING, _SPEECH_TESTING + +# ======== Mock functions ======== + + +class DummyDataset(torch.utils.data.Dataset): + + def __getitem__(self, index): + return { + "input_values": torch.randn(size=torch.Size([86631])).float(), + "labels": torch.randn(size=(1, 77)).long(), + } + + def __len__(self) -> int: + return 100 + + +# ============================== + +TEST_BACKBONE = "patrickvonplaten/wav2vec2_tiny_random_robust" # super small model for testing + + +@pytest.mark.skipif(os.name == "nt", reason="Huggingface timing out on Windows") +@pytest.mark.skipif(not _SPEECH_TESTING, reason="speech libraries aren't installed.") +def test_init_train(tmpdir): + model = SpeechRecognition(backbone=TEST_BACKBONE) + train_dl = torch.utils.data.DataLoader(DummyDataset()) + trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) + trainer.fit(model, train_dl) + + +@pytest.mark.skipif(not _SPEECH_TESTING, reason="speech libraries aren't installed.") +def test_jit(tmpdir): + sample_input = {"input_values": torch.randn(size=torch.Size([1, 86631])).float()} + path = os.path.join(tmpdir, "test.pt") + + model = SpeechRecognition(backbone=TEST_BACKBONE) + model.eval() + + # Huggingface model only supports `torch.jit.trace` with `strict=False` + model = torch.jit.trace(model, sample_input, strict=False) + + torch.jit.save(model, path) + model = torch.jit.load(path) + + out = model(sample_input)["logits"] + assert isinstance(out, torch.Tensor) + assert out.shape == torch.Size([1, 95, 12]) + + +@pytest.mark.skipif(not _SERVE_TESTING, reason="serve libraries aren't installed.") +@mock.patch("flash._IS_TESTING", True) +def test_serve(): + model = SpeechRecognition(backbone=TEST_BACKBONE) + # TODO: Currently only servable once a preprocess and postprocess have been attached + model._preprocess = SpeechRecognitionPreprocess(backbone=TEST_BACKBONE) + model._postprocess = SpeechRecognitionPostprocess(backbone=TEST_BACKBONE) + model.eval() + model.serve() + + +@pytest.mark.skipif(_SPEECH_TESTING, reason="speech libraries are installed.") +def test_load_from_checkpoint_dependency_error(): + with pytest.raises(ModuleNotFoundError, match=re.escape("'lightning-flash[speech]'")): + SpeechRecognition.load_from_checkpoint("not_a_real_checkpoint.pt") diff --git a/tests/helpers/utils.py b/tests/helpers/utils.py index 5bb699b664..0625fc1ac5 100644 --- a/tests/helpers/utils.py +++ b/tests/helpers/utils.py @@ -18,6 +18,7 @@ _IMAGE_AVAILABLE, _POINTCLOUD_AVAILABLE, _SERVE_AVAILABLE, + _SPEECH_RECOGNITION_AVAILABLE, _TABULAR_AVAILABLE, _TEXT_AVAILABLE, _VIDEO_AVAILABLE, @@ -28,6 +29,7 @@ _TABULAR_TESTING = _TABULAR_AVAILABLE _TEXT_TESTING = _TEXT_AVAILABLE _SERVE_TESTING = _SERVE_AVAILABLE +_SPEECH_TESTING = _SPEECH_RECOGNITION_AVAILABLE _POINTCLOUD_TESTING = _POINTCLOUD_AVAILABLE _GRAPH_TESTING = _GRAPH_AVAILABLE @@ -38,5 +40,6 @@ _TABULAR_TESTING = topic == "tabular" _TEXT_TESTING = topic == "text" _SERVE_TESTING = topic == "serve" + _SPEECH_TESTING = topic == "speech" _POINTCLOUD_TESTING = topic == "pointcloud" _GRAPH_TESTING = topic == "graph" From 62175ae93523548cea0a3752727c2763a634d405 Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Fri, 16 Jul 2021 15:42:43 +0100 Subject: [PATCH 21/46] Docs, requirements --- docs/source/api/audio.rst | 30 +++++++++ docs/source/index.rst | 7 ++ docs/source/reference/speech_recognition.rst | 68 ++++++++++++++++++++ flash/audio/speech_recognition/data.py | 2 +- 4 files changed, 106 insertions(+), 1 deletion(-) create mode 100644 docs/source/api/audio.rst create mode 100644 docs/source/reference/speech_recognition.rst diff --git a/docs/source/api/audio.rst b/docs/source/api/audio.rst new file mode 100644 index 0000000000..fcff953691 --- /dev/null +++ b/docs/source/api/audio.rst @@ -0,0 +1,30 @@ +########## +flash.audio +########## + +.. contents:: + :depth: 1 + :local: + :backlinks: top + +.. currentmodule:: flash.audio + +Speech Recognition +__________________ + +.. autosummary:: + :toctree: generated/ + :nosignatures: + :template: classtemplate.rst + + ~speech_recognition.model.SpeechRecognition + ~speech_recognition.data.SpeechRecognitionData + + speech_recognition.data.SpeechRecognitionPostprocess + speech_recognition.data.SpeechRecognitionPreprocess + speech_recognition.data.SpeechRecognitionCSVDataSource + speech_recognition.data.SpeechRecognitionJSONDataSource + speech_recognition.data.SpeechRecognitionDataSource + speech_recognition.data.SpeechRecognitionFilesSource + speech_recognition.data.TimitDataSource + speech_recognition.data.SpeechRecognitionDeserializer diff --git a/docs/source/index.rst b/docs/source/index.rst index 34616e011d..3607ccec4b 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -55,6 +55,12 @@ Lightning Flash reference/summarization reference/translation +.. toctree:: + :maxdepth: 1 + :caption: Audio + + reference/speech_recognition + .. toctree:: :maxdepth: 1 :caption: Point Cloud @@ -81,6 +87,7 @@ Lightning Flash api/core api/data api/serve + api/audio api/image api/pointcloud api/tabular diff --git a/docs/source/reference/speech_recognition.rst b/docs/source/reference/speech_recognition.rst new file mode 100644 index 0000000000..f9220d65ad --- /dev/null +++ b/docs/source/reference/speech_recognition.rst @@ -0,0 +1,68 @@ +.. _speech_recognition: + +################## +Speech Recognition +################## + +******** +The Task +******** + +Text classification is the task of assigning a piece of text (word, sentence or document) an appropriate class, or category. +The categories depend on the chosen data set and can range from topics. + +----- + +******* +Example +******* + +Let's train a model to classify text as expressing either positive or negative sentiment. +We will be using the IMDB data set, that contains a ``train.csv`` and ``valid.csv``. +Here's the structure: + +.. code-block:: + + review,sentiment + "Japanese indie film with humor ... ",positive + "Isaac Florentine has made some ...",negative + "After seeing the low-budget ...",negative + "I've seen the original English version ...",positive + "Hunters chase what they think is a man through ...",negative + ... + +Once we've downloaded the data using :func:`~flash.core.data.download_data`, we create the :class:`~flash.text.classification.data.TextClassificationData`. +We select a pre-trained backbone to use for our :class:`~flash.text.classification.model.TextClassifier` and finetune on the IMDB data. +The backbone can be any BERT classification model from `HuggingFace/transformers `_. + +.. note:: + + When changing the backbone, make sure you pass in the same backbone to the :class:`~flash.text.classification.model.TextClassifier` and the :class:`~flash.text.classification.data.TextClassificationData`! + +Next, we use the trained :class:`~flash.text.classification.model.TextClassifier` for inference. +Finally, we save the model. +Here's the full example: + +.. literalinclude:: ../../../flash_examples/text_classification.py + :language: python + :lines: 14- + +------ + +******* +Serving +******* + +The :class:`~flash.text.classification.model.TextClassifier` is servable. +This means you can call ``.serve`` to serve your :class:`~flash.core.model.Task`. +Here's an example: + +.. literalinclude:: ../../../flash_examples/serve/text_classification/inference_server.py + :language: python + :lines: 14- + +You can now perform inference from your client like this: + +.. literalinclude:: ../../../flash_examples/serve/text_classification/client.py + :language: python + :lines: 14- diff --git a/flash/audio/speech_recognition/data.py b/flash/audio/speech_recognition/data.py index e41427e40d..0b61f51208 100644 --- a/flash/audio/speech_recognition/data.py +++ b/flash/audio/speech_recognition/data.py @@ -16,7 +16,6 @@ from pathlib import Path from typing import Any, Callable, Dict, List, Mapping, Optional, Sequence, Tuple, Union -import datasets import pandas as pd import torch from torch import Tensor @@ -32,6 +31,7 @@ from flash.core.utilities.imports import _SPEECH_RECOGNITION_AVAILABLE, requires_extras if _SPEECH_RECOGNITION_AVAILABLE: + import datasets import soundfile as sf from datasets import Dataset, load_dataset from transformers import Wav2Vec2CTCTokenizer, Wav2Vec2Processor From dc2e72c366e03b3322605eaa4dd0cd4d65e8a149 Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Fri, 16 Jul 2021 15:51:01 +0100 Subject: [PATCH 22/46] topic thing --- .github/workflows/ci-testing.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index d26d8ecee2..e0eaa8fdf6 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -61,6 +61,10 @@ jobs: python-version: 3.8 requires: 'latest' topic: ['graph'] + - os: ubuntu-20.04 + python-version: 3.8 + requires: 'latest' + topic: ['speech'] # Timeout: https://stackoverflow.com/a/59076067/4521646 timeout-minutes: 35 From 8eccdf97720c3d8babda751d75d2474ff7c641d1 Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Fri, 16 Jul 2021 16:11:43 +0100 Subject: [PATCH 23/46] Doc fix --- docs/source/api/audio.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/api/audio.rst b/docs/source/api/audio.rst index fcff953691..fbe72f29d2 100644 --- a/docs/source/api/audio.rst +++ b/docs/source/api/audio.rst @@ -1,6 +1,6 @@ -########## +########### flash.audio -########## +########### .. contents:: :depth: 1 From dcfa9130774fe599a030bb62e53b43062604d60f Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Fri, 16 Jul 2021 19:57:11 +0100 Subject: [PATCH 24/46] test --- .github/workflows/ci-testing.yml | 4 --- docs/source/reference/speech_recognition.rst | 34 ++++++++----------- requirements/datatype_audio.txt | 3 ++ requirements/datatype_speech.txt | 3 -- tests/audio/speech_recognition/test_data.py | 10 +++--- .../test_data_model_integration.py | 6 ++-- tests/audio/speech_recognition/test_model.py | 8 ++--- tests/helpers/utils.py | 6 ++-- 8 files changed, 33 insertions(+), 41 deletions(-) delete mode 100644 requirements/datatype_speech.txt diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index e0eaa8fdf6..d26d8ecee2 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -61,10 +61,6 @@ jobs: python-version: 3.8 requires: 'latest' topic: ['graph'] - - os: ubuntu-20.04 - python-version: 3.8 - requires: 'latest' - topic: ['speech'] # Timeout: https://stackoverflow.com/a/59076067/4521646 timeout-minutes: 35 diff --git a/docs/source/reference/speech_recognition.rst b/docs/source/reference/speech_recognition.rst index f9220d65ad..4672c6a1af 100644 --- a/docs/source/reference/speech_recognition.rst +++ b/docs/source/reference/speech_recognition.rst @@ -8,8 +8,7 @@ Speech Recognition The Task ******** -Text classification is the task of assigning a piece of text (word, sentence or document) an appropriate class, or category. -The categories depend on the chosen data set and can range from topics. +Speech recognition is the task of classifying audio into a text transcription. We rely on `Wav2Vec `_ as our backbone, fine-tuned on labeled transcriptions for speech to text. ----- @@ -17,33 +16,30 @@ The categories depend on the chosen data set and can range from topics. Example ******* -Let's train a model to classify text as expressing either positive or negative sentiment. -We will be using the IMDB data set, that contains a ``train.csv`` and ``valid.csv``. -Here's the structure: +Let's fine-tune the model onto our own labeled audio transcription data: + +Here's the structure our CSV file: .. code-block:: - review,sentiment - "Japanese indie film with humor ... ",positive - "Isaac Florentine has made some ...",negative - "After seeing the low-budget ...",negative - "I've seen the original English version ...",positive - "Hunters chase what they think is a man through ...",negative + file,text + "/path/to/file_1.wav ... ","what was said in file 1." + "/path/to/file_2.wav ... ","what was said in file 2." + "/path/to/file_3.wav ... ","what was said in file 3." ... -Once we've downloaded the data using :func:`~flash.core.data.download_data`, we create the :class:`~flash.text.classification.data.TextClassificationData`. -We select a pre-trained backbone to use for our :class:`~flash.text.classification.model.TextClassifier` and finetune on the IMDB data. -The backbone can be any BERT classification model from `HuggingFace/transformers `_. +Once we've downloaded the data using :func:`~flash.core.data.download_data`, we create the :class:`~flash.audio.speech_recognition.data.SpeechRecognitionData`. +We select a pre-trained Wav2Vec backbone to use for our :class:`~flash.audio.speech_recognition.model.SpeechRecognition` and finetune on a subset of the `TIMIT corpus `__. +The backbone can be any Wav2Vec model from `HuggingFace transformers `__. .. note:: - When changing the backbone, make sure you pass in the same backbone to the :class:`~flash.text.classification.model.TextClassifier` and the :class:`~flash.text.classification.data.TextClassificationData`! + When changing the backbone, make sure you pass in the same backbone to the :class:`~flash.audio.speech_recognition.model.SpeechRecognition` and the :class:`~flash.audio.speech_recognition.data.SpeechRecognitionData`! -Next, we use the trained :class:`~flash.text.classification.model.TextClassifier` for inference. -Finally, we save the model. +Next, we use the trained :class:`~flash.audio.speech_recognition.model.SpeechRecognition` for inference and save the model. Here's the full example: -.. literalinclude:: ../../../flash_examples/text_classification.py +.. literalinclude:: ../../../flash_examples/speech_recognition.py :language: python :lines: 14- @@ -53,7 +49,7 @@ Here's the full example: Serving ******* -The :class:`~flash.text.classification.model.TextClassifier` is servable. +The :class:`~flash.audio.speech_recognition.model.SpeechRecognition` is servable. This means you can call ``.serve`` to serve your :class:`~flash.core.model.Task`. Here's an example: diff --git a/requirements/datatype_audio.txt b/requirements/datatype_audio.txt index 03c90d99ec..2941e503a4 100644 --- a/requirements/datatype_audio.txt +++ b/requirements/datatype_audio.txt @@ -1 +1,4 @@ asteroid>=0.5.1 +soundfile>=0.10.2 +transformers>=4.5 +datasets>=1.8 diff --git a/requirements/datatype_speech.txt b/requirements/datatype_speech.txt deleted file mode 100644 index 00b7271824..0000000000 --- a/requirements/datatype_speech.txt +++ /dev/null @@ -1,3 +0,0 @@ -soundfile>=0.10.2 -transformers>=4.5 -datasets>=1.8 diff --git a/tests/audio/speech_recognition/test_data.py b/tests/audio/speech_recognition/test_data.py index 918c27355d..9574d226f4 100644 --- a/tests/audio/speech_recognition/test_data.py +++ b/tests/audio/speech_recognition/test_data.py @@ -19,7 +19,7 @@ import flash from flash.audio import SpeechRecognitionData -from tests.helpers.utils import _SPEECH_TESTING +from tests.helpers.utils import _AUDIO_TESTING TEST_BACKBONE = "patrickvonplaten/wav2vec2_tiny_random_robust" # super small model for testing @@ -49,7 +49,7 @@ def json_data(tmpdir, n_samples=5): @pytest.mark.skipif(os.name == "nt", reason="Huggingface timing out on Windows") -@pytest.mark.skipif(not _SPEECH_TESTING, reason="speech libraries aren't installed.") +@pytest.mark.skipif(not _AUDIO_TESTING, reason="speech libraries aren't installed.") def test_from_csv(tmpdir): csv_path = csv_data(tmpdir) dm = SpeechRecognitionData.from_csv( @@ -61,7 +61,7 @@ def test_from_csv(tmpdir): @pytest.mark.skipif(os.name == "nt", reason="Huggingface timing out on Windows") -@pytest.mark.skipif(not _SPEECH_TESTING, reason="speech libraries aren't installed.") +@pytest.mark.skipif(not _AUDIO_TESTING, reason="speech libraries aren't installed.") def test_stage_test_and_valid(tmpdir): csv_path = csv_data(tmpdir) dm = SpeechRecognitionData.from_csv( @@ -84,7 +84,7 @@ def test_stage_test_and_valid(tmpdir): @pytest.mark.skipif(os.name == "nt", reason="Huggingface timing out on Windows") -@pytest.mark.skipif(not _SPEECH_TESTING, reason="speech libraries aren't installed.") +@pytest.mark.skipif(not _AUDIO_TESTING, reason="speech libraries aren't installed.") def test_from_json(tmpdir): json_path = json_data(tmpdir) dm = SpeechRecognitionData.from_json( @@ -95,7 +95,7 @@ def test_from_json(tmpdir): assert "input_values" in batch -@pytest.mark.skipif(_SPEECH_TESTING, reason="speech libraries are installed.") +@pytest.mark.skipif(_AUDIO_TESTING, reason="speech libraries are installed.") def test_text_module_not_found_error(): with pytest.raises(ModuleNotFoundError, match="[text]"): SpeechRecognitionData.from_json( diff --git a/tests/audio/speech_recognition/test_data_model_integration.py b/tests/audio/speech_recognition/test_data_model_integration.py index 069c7a1879..8fb611ba0d 100644 --- a/tests/audio/speech_recognition/test_data_model_integration.py +++ b/tests/audio/speech_recognition/test_data_model_integration.py @@ -20,7 +20,7 @@ import flash from flash.audio import SpeechRecognition, SpeechRecognitionData -from tests.helpers.utils import _SPEECH_TESTING +from tests.helpers.utils import _AUDIO_TESTING TEST_BACKBONE = "patrickvonplaten/wav2vec2_tiny_random_robust" # super small model for testing @@ -50,7 +50,7 @@ def json_data(tmpdir, n_samples=5): @pytest.mark.skipif(os.name == "nt", reason="Huggingface timing out on Windows") -@pytest.mark.skipif(not _SPEECH_TESTING, reason="audio libraries aren't installed.") +@pytest.mark.skipif(not _AUDIO_TESTING, reason="audio libraries aren't installed.") def test_classification_csv(tmpdir): csv_path = csv_data(tmpdir) @@ -68,7 +68,7 @@ def test_classification_csv(tmpdir): @pytest.mark.skipif(os.name == "nt", reason="Huggingface timing out on Windows") -@pytest.mark.skipif(not _SPEECH_TESTING, reason="audio libraries aren't installed.") +@pytest.mark.skipif(not _AUDIO_TESTING, reason="audio libraries aren't installed.") def test_classification_json(tmpdir): json_path = json_data(tmpdir) diff --git a/tests/audio/speech_recognition/test_model.py b/tests/audio/speech_recognition/test_model.py index 3124defb11..d22ddcfae1 100644 --- a/tests/audio/speech_recognition/test_model.py +++ b/tests/audio/speech_recognition/test_model.py @@ -21,7 +21,7 @@ from flash import Trainer from flash.audio import SpeechRecognition from flash.audio.speech_recognition.data import SpeechRecognitionPostprocess, SpeechRecognitionPreprocess -from tests.helpers.utils import _SERVE_TESTING, _SPEECH_TESTING +from tests.helpers.utils import _AUDIO_TESTING, _SERVE_TESTING # ======== Mock functions ======== @@ -44,7 +44,7 @@ def __len__(self) -> int: @pytest.mark.skipif(os.name == "nt", reason="Huggingface timing out on Windows") -@pytest.mark.skipif(not _SPEECH_TESTING, reason="speech libraries aren't installed.") +@pytest.mark.skipif(not _AUDIO_TESTING, reason="speech libraries aren't installed.") def test_init_train(tmpdir): model = SpeechRecognition(backbone=TEST_BACKBONE) train_dl = torch.utils.data.DataLoader(DummyDataset()) @@ -52,7 +52,7 @@ def test_init_train(tmpdir): trainer.fit(model, train_dl) -@pytest.mark.skipif(not _SPEECH_TESTING, reason="speech libraries aren't installed.") +@pytest.mark.skipif(not _AUDIO_TESTING, reason="speech libraries aren't installed.") def test_jit(tmpdir): sample_input = {"input_values": torch.randn(size=torch.Size([1, 86631])).float()} path = os.path.join(tmpdir, "test.pt") @@ -82,7 +82,7 @@ def test_serve(): model.serve() -@pytest.mark.skipif(_SPEECH_TESTING, reason="speech libraries are installed.") +@pytest.mark.skipif(_AUDIO_TESTING, reason="speech libraries are installed.") def test_load_from_checkpoint_dependency_error(): with pytest.raises(ModuleNotFoundError, match=re.escape("'lightning-flash[speech]'")): SpeechRecognition.load_from_checkpoint("not_a_real_checkpoint.pt") diff --git a/tests/helpers/utils.py b/tests/helpers/utils.py index 0625fc1ac5..d41c4d175c 100644 --- a/tests/helpers/utils.py +++ b/tests/helpers/utils.py @@ -14,11 +14,11 @@ import os from flash.core.utilities.imports import ( + _AUDIO_AVAILABLE, _GRAPH_AVAILABLE, _IMAGE_AVAILABLE, _POINTCLOUD_AVAILABLE, _SERVE_AVAILABLE, - _SPEECH_RECOGNITION_AVAILABLE, _TABULAR_AVAILABLE, _TEXT_AVAILABLE, _VIDEO_AVAILABLE, @@ -29,7 +29,7 @@ _TABULAR_TESTING = _TABULAR_AVAILABLE _TEXT_TESTING = _TEXT_AVAILABLE _SERVE_TESTING = _SERVE_AVAILABLE -_SPEECH_TESTING = _SPEECH_RECOGNITION_AVAILABLE +_AUDIO_TESTING = _AUDIO_AVAILABLE _POINTCLOUD_TESTING = _POINTCLOUD_AVAILABLE _GRAPH_TESTING = _GRAPH_AVAILABLE @@ -40,6 +40,6 @@ _TABULAR_TESTING = topic == "tabular" _TEXT_TESTING = topic == "text" _SERVE_TESTING = topic == "serve" - _SPEECH_TESTING = topic == "speech" + _AUDIO_TESTING = topic == "audio" _POINTCLOUD_TESTING = topic == "pointcloud" _GRAPH_TESTING = topic == "graph" From e4f0a69dbaa8625303a0ae05555d431f5e610da2 Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Fri, 16 Jul 2021 22:07:42 +0100 Subject: [PATCH 25/46] Add serve --- .../serve/speech_recognition/client.py | 25 +++++++++++++++++++ .../speech_recognition/inference_server.py | 19 ++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 flash_examples/serve/speech_recognition/client.py create mode 100644 flash_examples/serve/speech_recognition/inference_server.py diff --git a/flash_examples/serve/speech_recognition/client.py b/flash_examples/serve/speech_recognition/client.py new file mode 100644 index 0000000000..a7ade326ce --- /dev/null +++ b/flash_examples/serve/speech_recognition/client.py @@ -0,0 +1,25 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from pathlib import Path + +import requests + +import flash + +audio_path = str(Path(flash.ASSETS_ROOT) / "example.wav") + +body = {"session": "UUID", "payload": {"inputs": {"data": audio_path}}} +resp = requests.post("http://127.0.0.1:8000/predict", json=body) + +print(resp.json()) diff --git a/flash_examples/serve/speech_recognition/inference_server.py b/flash_examples/serve/speech_recognition/inference_server.py new file mode 100644 index 0000000000..076aa3a053 --- /dev/null +++ b/flash_examples/serve/speech_recognition/inference_server.py @@ -0,0 +1,19 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from flash.audio import SpeechRecognition + +# model = SpeechRecognition.load_from_checkpoint("https://flash-weights.s3.amazonaws.com/speech_recognition_model.pt") +model = SpeechRecognition.load_from_checkpoint("/home/sean/lightning-flash/speech_recognition_model.pt") + +model.serve() From 14795f3561f9c72e6501e33b07979b1e1a27f36b Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Sun, 18 Jul 2021 09:02:19 +0100 Subject: [PATCH 26/46] Fix path --- flash_examples/serve/speech_recognition/inference_server.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/flash_examples/serve/speech_recognition/inference_server.py b/flash_examples/serve/speech_recognition/inference_server.py index 076aa3a053..7a4762e23a 100644 --- a/flash_examples/serve/speech_recognition/inference_server.py +++ b/flash_examples/serve/speech_recognition/inference_server.py @@ -13,7 +13,6 @@ # limitations under the License. from flash.audio import SpeechRecognition -# model = SpeechRecognition.load_from_checkpoint("https://flash-weights.s3.amazonaws.com/speech_recognition_model.pt") -model = SpeechRecognition.load_from_checkpoint("/home/sean/lightning-flash/speech_recognition_model.pt") +model = SpeechRecognition.load_from_checkpoint("https://flash-weights.s3.amazonaws.com/speech_recognition_model.pt") model.serve() From 1b8eb08fa8f226b240b1e010c4e5ad392bc22268 Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Sun, 18 Jul 2021 09:05:26 +0100 Subject: [PATCH 27/46] Swap to audio available --- flash/audio/speech_recognition/backbone.py | 4 ++-- flash/audio/speech_recognition/collate.py | 4 ++-- flash/audio/speech_recognition/data.py | 4 ++-- flash/audio/speech_recognition/model.py | 4 ++-- flash/core/utilities/imports.py | 7 +------ tests/helpers/utils.py | 2 -- 6 files changed, 9 insertions(+), 16 deletions(-) diff --git a/flash/audio/speech_recognition/backbone.py b/flash/audio/speech_recognition/backbone.py index 1aef8419cd..9661ee0c17 100644 --- a/flash/audio/speech_recognition/backbone.py +++ b/flash/audio/speech_recognition/backbone.py @@ -14,9 +14,9 @@ from functools import partial from flash.core.registry import FlashRegistry -from flash.core.utilities.imports import _SPEECH_RECOGNITION_AVAILABLE +from flash.core.utilities.imports import _AUDIO_AVAILABLE -if _SPEECH_RECOGNITION_AVAILABLE: +if _AUDIO_AVAILABLE: from transformers import Wav2Vec2ForCTC SPEECH_RECOGNITION_BACKBONES = FlashRegistry("backbones") diff --git a/flash/audio/speech_recognition/collate.py b/flash/audio/speech_recognition/collate.py index cdb30bea24..208bcd5402 100644 --- a/flash/audio/speech_recognition/collate.py +++ b/flash/audio/speech_recognition/collate.py @@ -16,9 +16,9 @@ import torch -from flash.core.utilities.imports import _SPEECH_RECOGNITION_AVAILABLE +from flash.core.utilities.imports import _AUDIO_AVAILABLE -if _SPEECH_RECOGNITION_AVAILABLE: +if _AUDIO_AVAILABLE: from transformers import Wav2Vec2Processor diff --git a/flash/audio/speech_recognition/data.py b/flash/audio/speech_recognition/data.py index 0b61f51208..e8c3a5cfec 100644 --- a/flash/audio/speech_recognition/data.py +++ b/flash/audio/speech_recognition/data.py @@ -28,9 +28,9 @@ from flash.core.data.data_module import DataModule from flash.core.data.data_source import DataSource, DefaultDataSources from flash.core.data.process import Deserializer, Postprocess, Preprocess -from flash.core.utilities.imports import _SPEECH_RECOGNITION_AVAILABLE, requires_extras +from flash.core.utilities.imports import _AUDIO_AVAILABLE, requires_extras -if _SPEECH_RECOGNITION_AVAILABLE: +if _AUDIO_AVAILABLE: import datasets import soundfile as sf from datasets import Dataset, load_dataset diff --git a/flash/audio/speech_recognition/model.py b/flash/audio/speech_recognition/model.py index a7b364c74b..3d1188e76f 100644 --- a/flash/audio/speech_recognition/model.py +++ b/flash/audio/speech_recognition/model.py @@ -22,9 +22,9 @@ from flash.audio.speech_recognition.backbone import SPEECH_RECOGNITION_BACKBONES from flash.core.data.process import Serializer from flash.core.registry import FlashRegistry -from flash.core.utilities.imports import _SPEECH_RECOGNITION_AVAILABLE +from flash.core.utilities.imports import _AUDIO_AVAILABLE -if _SPEECH_RECOGNITION_AVAILABLE: +if _AUDIO_AVAILABLE: from transformers import Wav2Vec2ForCTC diff --git a/flash/core/utilities/imports.py b/flash/core/utilities/imports.py index 511b122eee..578ffe60b9 100644 --- a/flash/core/utilities/imports.py +++ b/flash/core/utilities/imports.py @@ -97,7 +97,6 @@ def _compare_version(package: str, op, version) -> bool: _TORCHVISION_GREATER_EQUAL_0_9 = _compare_version("torchvision", operator.ge, "0.9.0") _TEXT_AVAILABLE = _TRANSFORMERS_AVAILABLE -_SPEECH_RECOGNITION_AVAILABLE = _TRANSFORMERS_AVAILABLE and _SOUNDFILE_AVAILABLE _TABULAR_AVAILABLE = _TABNET_AVAILABLE and _PANDAS_AVAILABLE _VIDEO_AVAILABLE = _PYTORCHVIDEO_AVAILABLE _IMAGE_AVAILABLE = all([ @@ -110,10 +109,7 @@ def _compare_version(package: str, op, version) -> bool: ]) _SERVE_AVAILABLE = _FASTAPI_AVAILABLE and _PYDANTIC_AVAILABLE and _CYTOOLZ_AVAILABLE and _UVICORN_AVAILABLE _POINTCLOUD_AVAILABLE = _OPEN3D_AVAILABLE -_AUDIO_AVAILABLE = all([ - _ASTEROID_AVAILABLE, - _TORCHAUDIO_AVAILABLE, -]) +_AUDIO_AVAILABLE = all([_ASTEROID_AVAILABLE, _TORCHAUDIO_AVAILABLE, _SOUNDFILE_AVAILABLE, _TRANSFORMERS_AVAILABLE]) _GRAPH_AVAILABLE = _TORCH_SCATTER_AVAILABLE and _TORCH_SPARSE_AVAILABLE and _TORCH_GEOMETRIC_AVAILABLE _EXTRAS_AVAILABLE = { @@ -123,7 +119,6 @@ def _compare_version(package: str, op, version) -> bool: 'video': _VIDEO_AVAILABLE, 'pointcloud': _POINTCLOUD_AVAILABLE, 'serve': _SERVE_AVAILABLE, - 'speech': _SPEECH_RECOGNITION_AVAILABLE, 'audio': _AUDIO_AVAILABLE, 'graph': _GRAPH_AVAILABLE, } diff --git a/tests/helpers/utils.py b/tests/helpers/utils.py index 27f25fc1df..bd57cf570d 100644 --- a/tests/helpers/utils.py +++ b/tests/helpers/utils.py @@ -29,7 +29,6 @@ _TABULAR_TESTING = _TABULAR_AVAILABLE _TEXT_TESTING = _TEXT_AVAILABLE _SERVE_TESTING = _SERVE_AVAILABLE -_AUDIO_TESTING = _AUDIO_AVAILABLE _POINTCLOUD_TESTING = _POINTCLOUD_AVAILABLE _GRAPH_TESTING = _GRAPH_AVAILABLE _AUDIO_TESTING = _AUDIO_AVAILABLE @@ -41,7 +40,6 @@ _TABULAR_TESTING = topic == "tabular" _TEXT_TESTING = topic == "text" _SERVE_TESTING = topic == "serve" - _AUDIO_TESTING = topic == "audio" _POINTCLOUD_TESTING = topic == "pointcloud" _GRAPH_TESTING = topic == "graph" _AUDIO_TESTING = topic == "audio" From ab3a437e8dcef8e00135a621ad7e4d8f17fa6f55 Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Mon, 19 Jul 2021 10:26:16 +0100 Subject: [PATCH 28/46] Small fix --- flash/audio/speech_recognition/collate.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/flash/audio/speech_recognition/collate.py b/flash/audio/speech_recognition/collate.py index 208bcd5402..0641719228 100644 --- a/flash/audio/speech_recognition/collate.py +++ b/flash/audio/speech_recognition/collate.py @@ -20,6 +20,8 @@ if _AUDIO_AVAILABLE: from transformers import Wav2Vec2Processor +else: + Wav2Vec2Processor = object @dataclass From 13eb84f378104e821827416cb752925001d876d5 Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Mon, 19 Jul 2021 10:32:14 +0100 Subject: [PATCH 29/46] Some fixes --- flash/audio/speech_recognition/data.py | 4 ++-- tests/audio/speech_recognition/test_data.py | 4 ++-- tests/audio/speech_recognition/test_model.py | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/flash/audio/speech_recognition/data.py b/flash/audio/speech_recognition/data.py index e8c3a5cfec..ca3889acc0 100644 --- a/flash/audio/speech_recognition/data.py +++ b/flash/audio/speech_recognition/data.py @@ -124,7 +124,7 @@ def load_data( class SpeechRecognitionPreprocess(Preprocess): - @requires_extras("speech") + @requires_extras("audio") def __init__( self, train_transform: Optional[Dict[str, Callable]] = None, @@ -205,7 +205,7 @@ def collate(self, samples: Any) -> Tensor: class SpeechRecognitionPostprocess(Postprocess): - @requires_extras("speech") + @requires_extras("audio") def __init__( self, save_path: Optional[str] = None, diff --git a/tests/audio/speech_recognition/test_data.py b/tests/audio/speech_recognition/test_data.py index 9574d226f4..7228cc57de 100644 --- a/tests/audio/speech_recognition/test_data.py +++ b/tests/audio/speech_recognition/test_data.py @@ -95,9 +95,9 @@ def test_from_json(tmpdir): assert "input_values" in batch -@pytest.mark.skipif(_AUDIO_TESTING, reason="speech libraries are installed.") +@pytest.mark.skipif(_AUDIO_TESTING, reason="audio libraries are installed.") def test_text_module_not_found_error(): - with pytest.raises(ModuleNotFoundError, match="[text]"): + with pytest.raises(ModuleNotFoundError, match="[audio]"): SpeechRecognitionData.from_json( "file", "text", backbone=TEST_BACKBONE, train_file="", batch_size=1, num_workers=0 ) diff --git a/tests/audio/speech_recognition/test_model.py b/tests/audio/speech_recognition/test_model.py index d22ddcfae1..e7209da061 100644 --- a/tests/audio/speech_recognition/test_model.py +++ b/tests/audio/speech_recognition/test_model.py @@ -44,7 +44,7 @@ def __len__(self) -> int: @pytest.mark.skipif(os.name == "nt", reason="Huggingface timing out on Windows") -@pytest.mark.skipif(not _AUDIO_TESTING, reason="speech libraries aren't installed.") +@pytest.mark.skipif(not _AUDIO_TESTING, reason="audio libraries aren't installed.") def test_init_train(tmpdir): model = SpeechRecognition(backbone=TEST_BACKBONE) train_dl = torch.utils.data.DataLoader(DummyDataset()) @@ -52,7 +52,7 @@ def test_init_train(tmpdir): trainer.fit(model, train_dl) -@pytest.mark.skipif(not _AUDIO_TESTING, reason="speech libraries aren't installed.") +@pytest.mark.skipif(not _AUDIO_TESTING, reason="audio libraries aren't installed.") def test_jit(tmpdir): sample_input = {"input_values": torch.randn(size=torch.Size([1, 86631])).float()} path = os.path.join(tmpdir, "test.pt") @@ -82,7 +82,7 @@ def test_serve(): model.serve() -@pytest.mark.skipif(_AUDIO_TESTING, reason="speech libraries are installed.") +@pytest.mark.skipif(_AUDIO_TESTING, reason="audio libraries are installed.") def test_load_from_checkpoint_dependency_error(): - with pytest.raises(ModuleNotFoundError, match=re.escape("'lightning-flash[speech]'")): + with pytest.raises(ModuleNotFoundError, match=re.escape("'lightning-flash[audio]'")): SpeechRecognition.load_from_checkpoint("not_a_real_checkpoint.pt") From af9e0c160cb8c99f04c1b0e32e492d5d284ce3c6 Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Mon, 19 Jul 2021 10:38:04 +0100 Subject: [PATCH 30/46] Small fix --- flash/audio/speech_recognition/data.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/flash/audio/speech_recognition/data.py b/flash/audio/speech_recognition/data.py index ca3889acc0..4ca0137e96 100644 --- a/flash/audio/speech_recognition/data.py +++ b/flash/audio/speech_recognition/data.py @@ -35,6 +35,8 @@ import soundfile as sf from datasets import Dataset, load_dataset from transformers import Wav2Vec2CTCTokenizer, Wav2Vec2Processor +else: + Dataset = object INPUT_FIELD = "file" TARGET_FIELD = "text" From 4bbc31c8d7b34afec21b42db178dbb02d3eedecc Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Mon, 19 Jul 2021 10:44:56 +0100 Subject: [PATCH 31/46] Small fix --- flash/audio/speech_recognition/model.py | 3 +++ tests/audio/speech_recognition/test_data.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/flash/audio/speech_recognition/model.py b/flash/audio/speech_recognition/model.py index 3d1188e76f..c0ed3ed421 100644 --- a/flash/audio/speech_recognition/model.py +++ b/flash/audio/speech_recognition/model.py @@ -29,8 +29,11 @@ class SpeechRecognition(Task): + backbones: FlashRegistry = SPEECH_RECOGNITION_BACKBONES + required_extras = "audio" + def __init__( self, backbone: str = "facebook/wav2vec2-base-960h", diff --git a/tests/audio/speech_recognition/test_data.py b/tests/audio/speech_recognition/test_data.py index 7228cc57de..6817b5205e 100644 --- a/tests/audio/speech_recognition/test_data.py +++ b/tests/audio/speech_recognition/test_data.py @@ -96,7 +96,7 @@ def test_from_json(tmpdir): @pytest.mark.skipif(_AUDIO_TESTING, reason="audio libraries are installed.") -def test_text_module_not_found_error(): +def test_audio_module_not_found_error(): with pytest.raises(ModuleNotFoundError, match="[audio]"): SpeechRecognitionData.from_json( "file", "text", backbone=TEST_BACKBONE, train_file="", batch_size=1, num_workers=0 From 4336f616e6c44d08d247c5a98088cb12f55b9eee Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Mon, 19 Jul 2021 10:56:42 +0100 Subject: [PATCH 32/46] Fix --- tests/core/data/test_data_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/core/data/test_data_pipeline.py b/tests/core/data/test_data_pipeline.py index 2b593cdd9e..b5ec52dec1 100644 --- a/tests/core/data/test_data_pipeline.py +++ b/tests/core/data/test_data_pipeline.py @@ -691,7 +691,7 @@ def test_step(self, batch, batch_idx): assert len(batch) == 2 assert batch[0].shape == torch.Size([2, 1]) - def predict_step(self, batch, batch_idx, dataloader_idx): + def predict_step(self, batch, batch_idx, dataloader_idx=None): assert batch[0][0] == 'a' assert batch[0][1] == 'a' assert batch[1][0] == 'b' From 51c640aef640c5eca71616fc895737c7736b3413 Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Mon, 19 Jul 2021 11:06:31 +0100 Subject: [PATCH 33/46] Updates --- flash/core/utilities/imports.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/flash/core/utilities/imports.py b/flash/core/utilities/imports.py index 578ffe60b9..d1ba3388b6 100644 --- a/flash/core/utilities/imports.py +++ b/flash/core/utilities/imports.py @@ -92,11 +92,19 @@ def _compare_version(package: str, op, version) -> bool: _TORCH_SPARSE_AVAILABLE = _module_available("torch_sparse") _TORCH_GEOMETRIC_AVAILABLE = _module_available("torch_geometric") _TORCHAUDIO_AVAILABLE = _module_available("torchaudio") +_ROUGE_SCORE_AVAILABLE = _module_available("rouge_score") +_SENTENCEPIECE_AVAILABLE = _module_available("sentencepiece") +_DATASETS_AVAILABLE = _module_available("datasets") if Version: _TORCHVISION_GREATER_EQUAL_0_9 = _compare_version("torchvision", operator.ge, "0.9.0") -_TEXT_AVAILABLE = _TRANSFORMERS_AVAILABLE +_TEXT_AVAILABLE = all([ + _TRANSFORMERS_AVAILABLE, + _ROUGE_SCORE_AVAILABLE, + _SENTENCEPIECE_AVAILABLE, + _DATASETS_AVAILABLE, +]) _TABULAR_AVAILABLE = _TABNET_AVAILABLE and _PANDAS_AVAILABLE _VIDEO_AVAILABLE = _PYTORCHVIDEO_AVAILABLE _IMAGE_AVAILABLE = all([ From 801b75277cfe44780f2013d853ee8b852d448e30 Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Mon, 19 Jul 2021 11:11:37 +0100 Subject: [PATCH 34/46] Fix docs --- docs/source/index.rst | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 72e1cf3ebc..b4efd51d2b 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -45,6 +45,7 @@ Lightning Flash :caption: Audio reference/audio_classification + reference/speech_recognition .. toctree:: :maxdepth: 1 @@ -61,12 +62,6 @@ Lightning Flash reference/summarization reference/translation -.. toctree:: - :maxdepth: 1 - :caption: Audio - - reference/speech_recognition - .. toctree:: :maxdepth: 1 :caption: Point Cloud From 683f67113b93436e39927b62a0ceec864319a303 Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Mon, 19 Jul 2021 11:51:41 +0100 Subject: [PATCH 35/46] Remove duplicate --- docs/source/index.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index b4efd51d2b..8f56b56214 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -89,7 +89,6 @@ Lightning Flash api/core api/data api/serve - api/audio api/image api/audio api/pointcloud From 8590052474078dbaae4d6d57422c60c68ca8a435 Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Mon, 19 Jul 2021 12:07:23 +0100 Subject: [PATCH 36/46] Add check for audio --- tests/audio/speech_recognition/test_model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/audio/speech_recognition/test_model.py b/tests/audio/speech_recognition/test_model.py index e7209da061..a435c497cf 100644 --- a/tests/audio/speech_recognition/test_model.py +++ b/tests/audio/speech_recognition/test_model.py @@ -72,6 +72,7 @@ def test_jit(tmpdir): @pytest.mark.skipif(not _SERVE_TESTING, reason="serve libraries aren't installed.") +@pytest.mark.skipif(not _AUDIO_TESTING, reason="audio libraries aren't installed.") @mock.patch("flash._IS_TESTING", True) def test_serve(): model = SpeechRecognition(backbone=TEST_BACKBONE) From 1c98625d1b4c885bd162b34e02885fe167d44480 Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Mon, 19 Jul 2021 17:55:31 +0100 Subject: [PATCH 37/46] Updates --- flash/audio/speech_recognition/backbone.py | 21 +- flash/audio/speech_recognition/collate.py | 27 +- flash/audio/speech_recognition/data.py | 300 +++++++----------- flash/audio/speech_recognition/model.py | 11 +- flash/core/data/batch.py | 5 +- flash/core/data/process.py | 23 +- .../serve/speech_recognition/client.py | 6 +- .../speech_recognition/inference_server.py | 1 - flash_examples/speech_recognition.py | 11 +- 9 files changed, 179 insertions(+), 226 deletions(-) diff --git a/flash/audio/speech_recognition/backbone.py b/flash/audio/speech_recognition/backbone.py index 9661ee0c17..425ef2eb00 100644 --- a/flash/audio/speech_recognition/backbone.py +++ b/flash/audio/speech_recognition/backbone.py @@ -16,20 +16,15 @@ from flash.core.registry import FlashRegistry from flash.core.utilities.imports import _AUDIO_AVAILABLE -if _AUDIO_AVAILABLE: - from transformers import Wav2Vec2ForCTC - SPEECH_RECOGNITION_BACKBONES = FlashRegistry("backbones") -WAV2VEC_MODELS = ["facebook/wav2vec2-base-960h", "facebook/wav2vec2-large-960h-lv60"] - - -def _huggingface_from_pretrained(model_name): - return Wav2Vec2ForCTC.from_pretrained(model_name) +if _AUDIO_AVAILABLE: + from transformers import Wav2Vec2ForCTC + WAV2VEC_MODELS = ["facebook/wav2vec2-base-960h", "facebook/wav2vec2-large-960h-lv60"] -for model_name in WAV2VEC_MODELS: - SPEECH_RECOGNITION_BACKBONES( - fn=partial(_huggingface_from_pretrained, model_name=model_name), - name=model_name, - ) + for model_name in WAV2VEC_MODELS: + SPEECH_RECOGNITION_BACKBONES( + fn=partial(Wav2Vec2ForCTC.from_pretrained, model_name), + name=model_name, + ) diff --git a/flash/audio/speech_recognition/collate.py b/flash/audio/speech_recognition/collate.py index 0641719228..9ee53a4686 100644 --- a/flash/audio/speech_recognition/collate.py +++ b/flash/audio/speech_recognition/collate.py @@ -12,10 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. from dataclasses import dataclass -from typing import Dict, List, Optional, Union +from typing import Any, Dict, List, Optional, Union import torch +from flash.core.data.data_source import DefaultDataKeys from flash.core.utilities.imports import _AUDIO_AVAILABLE if _AUDIO_AVAILABLE: @@ -58,10 +59,19 @@ class DataCollatorCTCWithPadding: pad_to_multiple_of: Optional[int] = None pad_to_multiple_of_labels: Optional[int] = None - def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]: + def __call__(self, samples: List[Dict[str, Any]], metadata: List[Dict[str, Any]]) -> Dict[str, torch.Tensor]: + inputs = [sample[DefaultDataKeys.INPUT] for sample in samples] + sampling_rates = [sample["sampling_rate"] for sample in metadata] + + assert ( + len(set(sampling_rates)) == 1 + ), f"Make sure all inputs have the same sampling rate of {self.processor.feature_extractor.sampling_rate}." + + inputs = self.processor(inputs, sampling_rate=sampling_rates[0]).input_values + # split inputs and labels since they have to be of different lengths and need # different padding methods - input_features = [{"input_values": feature["input_values"]} for feature in features] + input_features = [{"input_values": input} for input in inputs] batch = self.processor.pad( input_features, @@ -71,11 +81,12 @@ def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> return_tensors="pt", ) - label_features = [{"input_ids": feature.get("labels")} for feature in features] + labels = [sample.get(DefaultDataKeys.TARGET, None) for sample in samples] # check to ensure labels exist to collate - labels_exist = not any(x['input_ids'] is None for x in label_features) - if labels_exist: + if None not in labels: with self.processor.as_target_processor(): + label_features = self.processor(labels).input_ids + label_features = [{"input_ids": feature} for feature in label_features] labels_batch = self.processor.pad( label_features, padding=self.padding, @@ -85,8 +96,6 @@ def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> ) # replace padding with -100 to ignore loss correctly - labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100) - - batch["labels"] = labels + batch["labels"] = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100) return batch diff --git a/flash/audio/speech_recognition/data.py b/flash/audio/speech_recognition/data.py index 4ca0137e96..97dfde0f26 100644 --- a/flash/audio/speech_recognition/data.py +++ b/flash/audio/speech_recognition/data.py @@ -11,53 +11,76 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import logging -from functools import partial +import base64 +import io +import os.path +from dataclasses import dataclass from pathlib import Path from typing import Any, Callable, Dict, List, Mapping, Optional, Sequence, Tuple, Union -import pandas as pd import torch -from torch import Tensor -from torch.utils.data import Sampler +from torch.utils.data import Dataset import flash -from flash.audio.speech_recognition.collate import DataCollatorCTCWithPadding -from flash.core.data.auto_dataset import AutoDataset -from flash.core.data.callback import BaseDataFetcher from flash.core.data.data_module import DataModule -from flash.core.data.data_source import DataSource, DefaultDataSources +from flash.core.data.data_source import ( + DatasetDataSource, + DataSource, + DefaultDataKeys, + DefaultDataSources, + PathsDataSource, +) from flash.core.data.process import Deserializer, Postprocess, Preprocess +from flash.core.data.properties import ProcessState from flash.core.utilities.imports import _AUDIO_AVAILABLE, requires_extras if _AUDIO_AVAILABLE: - import datasets import soundfile as sf - from datasets import Dataset, load_dataset - from transformers import Wav2Vec2CTCTokenizer, Wav2Vec2Processor + from datasets import Dataset as HFDataset + from datasets import load_dataset + from transformers import Wav2Vec2CTCTokenizer else: - Dataset = object - -INPUT_FIELD = "file" -TARGET_FIELD = "text" + HFDataset = object class SpeechRecognitionDeserializer(Deserializer): def deserialize(self, sample: Any) -> Dict: - return {INPUT_FIELD: sample} + encoded_with_padding = (sample + "===").encode("ascii") + audio = base64.b64decode(encoded_with_padding) + buffer = io.BytesIO(audio) + data, sampling_rate = sf.read(buffer) + return { + DefaultDataKeys.INPUT: data, + DefaultDataKeys.METADATA: { + "sampling_rate": sampling_rate + }, + } @property def example_input(self) -> str: - return str(Path(flash.ASSETS_ROOT) / "example.wav") + with (Path(flash.ASSETS_ROOT) / "example.wav").open("rb") as f: + return base64.b64encode(f.read()).decode("UTF-8") + + +class BaseSpeechRecognition: + def _load_sample(self, sample: Dict[str, Any]) -> Any: + path = sample[DefaultDataKeys.INPUT] + if not os.path.isabs(path) and DefaultDataKeys.METADATA in sample and "root" in sample[DefaultDataKeys.METADATA + ]: + path = os.path.join(sample[DefaultDataKeys.METADATA]["root"], path) + speech_array, sampling_rate = sf.read(path) + sample[DefaultDataKeys.INPUT] = speech_array + sample[DefaultDataKeys.METADATA] = {"sampling_rate": sampling_rate} + return sample -class SpeechRecognitionDataSource(DataSource): - def __init__(self, backbone: str, filetype: Optional[str] = None): +class SpeechRecognitionFileDataSource(DataSource, BaseSpeechRecognition): + + def __init__(self, filetype: Optional[str] = None): super().__init__() self.filetype = filetype - self.backbone = backbone def load_data( self, @@ -65,63 +88,54 @@ def load_data( dataset: Optional[Any] = None, ) -> Union[Sequence[Mapping[str, Any]]]: if self.filetype == 'json': - file, input, target, field = data + file, input_key, target_key, field = data else: - file, input, target = data + file, input_key, target_key = data stage = self.running_stage.value - dataset_dict = load_dataset(self.filetype, data_files={stage: str(file)}) - if input != INPUT_FIELD: - dataset_dict.rename_column_(input, INPUT_FIELD) - if target != TARGET_FIELD: - dataset_dict.rename_column_(target, TARGET_FIELD) - return dataset_dict[stage] + if self.filetype == 'json' and field is not None: + dataset_dict = load_dataset(self.filetype, data_files={stage: str(file)}, field=field) + else: + dataset_dict = load_dataset(self.filetype, data_files={stage: str(file)}) - def predict_load_data(self, data: Any, dataset: AutoDataset): - return self.load_data(data, dataset) + dataset = dataset_dict[stage] + meta = {"root": os.path.dirname(file)} + return [{ + DefaultDataKeys.INPUT: input_file, + DefaultDataKeys.TARGET: target, + DefaultDataKeys.METADATA: meta, + } for input_file, target in zip(dataset[input_key], dataset[target_key])] + def load_sample(self, sample: Dict[str, Any], dataset: Any = None) -> Any: + return self._load_sample(sample) -class TimitDataSource(SpeechRecognitionDataSource): - def load_data( - self, - data: Tuple[str, Union[str, List[str]], Union[str, List[str]]], - dataset: Optional[Any] = None, - ) -> Union[Sequence[Mapping[str, Any]]]: - stage = self.running_stage.value - dataset_dict = load_dataset("timit_asr") - return dataset_dict[stage] +class SpeechRecognitionCSVDataSource(SpeechRecognitionFileDataSource): - def predict_load_data(self, data: Any, dataset: AutoDataset): - return self.load_data(data, dataset) + def __init__(self): + super().__init__(filetype='csv') -class SpeechRecognitionCSVDataSource(SpeechRecognitionDataSource): +class SpeechRecognitionJSONDataSource(SpeechRecognitionFileDataSource): - def __init__(self, backbone: str): - super().__init__(backbone, filetype='csv') + def __init__(self): + super().__init__(filetype='json') -class SpeechRecognitionJSONDataSource(SpeechRecognitionDataSource): +class SpeechRecognitionDatasetDataSource(DatasetDataSource, BaseSpeechRecognition): - def __init__(self, backbone: str): - super().__init__(backbone, filetype='json') + def load_data(self, data: Dataset, dataset: Optional[Any] = None) -> Union[Sequence[Mapping[str, Any]]]: + if isinstance(data, HFDataset): + data = list(zip(data["file"], data["text"])) + return super().load_data(data, dataset) -class SpeechRecognitionFilesSource(DataSource): +class SpeechRecognitionPathsDataSource(PathsDataSource, BaseSpeechRecognition): - def __init__(self, backbone: str, filetype: Optional[str] = None): - super().__init__() - self.filetype = filetype - self.backbone = backbone + def __init__(self): + super().__init__(("wav", "ogg", "flac", "mat")) - def load_data( - self, - files: Tuple[str, Union[str, List[str]], Union[str, List[str]]], - dataset: Optional[Any] = None, - ) -> Union[Sequence[Mapping[str, Any]]]: - if isinstance(files, str): - files = [files] - return [{INPUT_FIELD: file} for file in files] + def load_sample(self, sample: Dict[str, Any], dataset: Any = None) -> Any: + return self._load_sample(sample) class SpeechRecognitionPreprocess(Preprocess): @@ -133,88 +147,60 @@ def __init__( val_transform: Optional[Dict[str, Callable]] = None, test_transform: Optional[Dict[str, Callable]] = None, predict_transform: Optional[Dict[str, Callable]] = None, - backbone: str = "facebook/wav2vec2-base-960h", ): - self.backbone = backbone - super().__init__( train_transform=train_transform, val_transform=val_transform, test_transform=test_transform, predict_transform=predict_transform, data_sources={ - DefaultDataSources.CSV: SpeechRecognitionCSVDataSource(self.backbone), - DefaultDataSources.JSON: SpeechRecognitionJSONDataSource(self.backbone), - "timit": TimitDataSource(self.backbone), - DefaultDataSources.FILES: SpeechRecognitionFilesSource(self.backbone) + DefaultDataSources.CSV: SpeechRecognitionCSVDataSource(), + DefaultDataSources.JSON: SpeechRecognitionJSONDataSource(), + DefaultDataSources.FILES: SpeechRecognitionPathsDataSource(), + DefaultDataSources.DATASET: SpeechRecognitionDatasetDataSource(), }, default_data_source=DefaultDataSources.FILES, deserializer=SpeechRecognitionDeserializer(), ) - self.processor = Wav2Vec2Processor.from_pretrained(backbone) - self.collator = DataCollatorCTCWithPadding(processor=self.processor, padding=True) def get_state_dict(self) -> Dict[str, Any]: - return { - **self.transforms, - "backbone": self.backbone, - } + return self.transforms @classmethod - def load_state_dict(cls, state_dict: Dict[str, Any], strict: bool): + def load_state_dict(cls, state_dict: Dict[str, Any], strict: bool = False): return cls(**state_dict) - def _prepare_dataset(self, batch: Any) -> Any: - # check that all files have the correct sampling rate - assert ( - len(set(batch["sampling_rate"])) == 1 - ), f"Make sure all inputs have the same sampling rate of {self.processor.feature_extractor.sampling_rate}." - - batch["input_values"] = self.processor(batch["speech"], sampling_rate=batch["sampling_rate"][0]).input_values - - if not self.predicting: - with self.processor.as_target_processor(): - batch["labels"] = self.processor(batch["target_text"]).input_ids - return batch - - def _speech_file_to_array_fn(self, batch: Any) -> Any: - speech_array, sampling_rate = sf.read(batch[INPUT_FIELD]) - batch["speech"] = speech_array - batch["sampling_rate"] = sampling_rate - if not self.predicting: - batch["target_text"] = batch[TARGET_FIELD] - return batch - - def _convert_to_batch(self, batch: Any) -> Dataset: - self._disable_tqdm_bars() - batch = Dataset.from_pandas(pd.DataFrame(batch)) - columns = ["input_values"] - if not self.predicting: - columns += ["labels"] - batch = batch.map(partial(self._speech_file_to_array_fn)) - batch = batch.map(partial(self._prepare_dataset), batched=True) - batch.set_format("torch", columns=columns) - return batch - - def _disable_tqdm_bars(self): - datasets.logging.get_verbosity = lambda: logging.NOTSET - - def collate(self, samples: Any) -> Tensor: - """Override to convert a set of samples to a batch""" - samples = self._convert_to_batch(samples) - return self.collator(samples) + +@dataclass(unsafe_hash=True, frozen=True) +class SpeechRecognitionBackboneState(ProcessState): + """The ``SpeechRecognitionBackboneState`` stores the backbone in use by the + :class:`~flash.audio.speech_recognition.data.SpeechRecognitionPostprocess` + """ + + backbone: str class SpeechRecognitionPostprocess(Postprocess): @requires_extras("audio") - def __init__( - self, - save_path: Optional[str] = None, - backbone: str = "facebook/wav2vec2-base-960h", - ): - super().__init__(save_path=save_path) - self.tokenizer = Wav2Vec2CTCTokenizer.from_pretrained(backbone) + def __init__(self): + super().__init__() + + self._backbone = None + self._tokenizer = None + + @property + def backbone(self): + backbone_state = self.get_state(SpeechRecognitionBackboneState) + if backbone_state is not None: + return backbone_state.backbone + + @property + def tokenizer(self): + if self.backbone is not None and self.backbone != self._backbone: + self._tokenizer = Wav2Vec2CTCTokenizer.from_pretrained(self.backbone) + self._backbone = self.backbone + return self._tokenizer def per_batch_transform(self, batch: Any) -> Any: # converts logits into greedy transcription @@ -222,72 +208,18 @@ def per_batch_transform(self, batch: Any) -> Any: transcriptions = self.tokenizer.batch_decode(pred_ids) return transcriptions + def __getstate__(self): # TODO: Find out why this is being pickled + state = self.__dict__.copy() + state.pop("_tokenizer") + return state + + def __setstate__(self, state): + self.__dict__.update(state) + self._tokenizer = Wav2Vec2CTCTokenizer.from_pretrained(self.backbone) + class SpeechRecognitionData(DataModule): """Data Module for text classification tasks""" preprocess_cls = SpeechRecognitionPreprocess postprocess_cls = SpeechRecognitionPostprocess - - @classmethod - def from_timit( - cls, - train_transform: Optional[Dict[str, Callable]] = None, - val_transform: Optional[Dict[str, Callable]] = None, - test_transform: Optional[Dict[str, Callable]] = None, - predict_transform: Optional[Dict[str, Callable]] = None, - data_fetcher: Optional[BaseDataFetcher] = None, - preprocess: Optional[Preprocess] = None, - val_split: Optional[float] = None, - batch_size: int = 4, - num_workers: Optional[int] = None, - sampler: Optional[Sampler] = None, - **preprocess_kwargs: Any, - ) -> 'DataModule': - """Creates a :class:`~flash.audio.speech_recognition.data.SpeechRecognitionData` object loading - the TIMIT labeled transcription corpus: https://catalog.ldc.upenn.edu/LDC93S1 - - Args: - train_transform: The dictionary of transforms to use during training which maps - :class:`~flash.core.data.process.Preprocess` hook names to callable transforms. - val_transform: The dictionary of transforms to use during validation which maps - :class:`~flash.core.data.process.Preprocess` hook names to callable transforms. - test_transform: The dictionary of transforms to use during testing which maps - :class:`~flash.core.data.process.Preprocess` hook names to callable transforms. - predict_transform: The dictionary of transforms to use during predicting which maps - :class:`~flash.core.data.process.Preprocess` hook names to callable transforms. - data_fetcher: The :class:`~flash.core.data.callback.BaseDataFetcher` to pass to the - :class:`~flash.core.data.data_module.DataModule`. - preprocess: The :class:`~flash.core.data.data.Preprocess` to pass to the - :class:`~flash.core.data.data_module.DataModule`. If ``None``, ``cls.preprocess_cls`` - will be constructed and used. - val_split: The ``val_split`` argument to pass to the :class:`~flash.core.data.data_module.DataModule`. - batch_size: The ``batch_size`` argument to pass to the :class:`~flash.core.data.data_module.DataModule`. - num_workers: The ``num_workers`` argument to pass to the :class:`~flash.core.data.data_module.DataModule`. - sampler: The ``sampler`` argument to pass to the :class:`~flash.core.data.data_module.DataModule`. - preprocess_kwargs: Additional keyword arguments to use when constructing the preprocess. Will only be used - if ``preprocess = None``. - - Returns: - The constructed TIMIT data module. - - Examples:: - - data_module = SpeechRecognitionData.from_timit() - """ - return cls.from_data_source( - "timit", - train_data="train", - test_data="test", - train_transform=train_transform, - val_transform=val_transform, - test_transform=test_transform, - predict_transform=predict_transform, - data_fetcher=data_fetcher, - preprocess=preprocess, - val_split=val_split, - batch_size=batch_size, - num_workers=num_workers, - sampler=sampler, - **preprocess_kwargs, - ) diff --git a/flash/audio/speech_recognition/model.py b/flash/audio/speech_recognition/model.py index c0ed3ed421..588f4f89b2 100644 --- a/flash/audio/speech_recognition/model.py +++ b/flash/audio/speech_recognition/model.py @@ -20,12 +20,15 @@ from flash import Task from flash.audio.speech_recognition.backbone import SPEECH_RECOGNITION_BACKBONES +from flash.audio.speech_recognition.collate import DataCollatorCTCWithPadding +from flash.audio.speech_recognition.data import SpeechRecognitionBackboneState from flash.core.data.process import Serializer +from flash.core.data.states import CollateFn from flash.core.registry import FlashRegistry from flash.core.utilities.imports import _AUDIO_AVAILABLE if _AUDIO_AVAILABLE: - from transformers import Wav2Vec2ForCTC + from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor class SpeechRecognition(Task): @@ -39,7 +42,7 @@ def __init__( backbone: str = "facebook/wav2vec2-base-960h", loss_fn: Optional[Callable] = None, optimizer: Type[torch.optim.Optimizer] = torch.optim.Adam, - learning_rate: float = 1e-2, + learning_rate: float = 1e-5, serializer: Optional[Union[Serializer, Mapping[str, Serializer]]] = None, ): os.environ["TOKENIZERS_PARALLELISM"] = "TRUE" @@ -47,6 +50,7 @@ def __init__( warnings.simplefilter("ignore") # set os environ variable for multiprocesses os.environ["PYTHONWARNINGS"] = "ignore" + model = self.backbones.get(backbone )() if backbone in self.backbones else Wav2Vec2ForCTC.from_pretrained(backbone) super().__init__( @@ -59,6 +63,9 @@ def __init__( self.save_hyperparameters() + self.set_state(SpeechRecognitionBackboneState(backbone)) + self.set_state(CollateFn(DataCollatorCTCWithPadding(Wav2Vec2Processor.from_pretrained(backbone)))) + def forward(self, batch: Dict[str, torch.Tensor]): return self.model(batch["input_values"]) diff --git a/flash/core/data/batch.py b/flash/core/data/batch.py index 51d28d2a22..e7e9a30635 100644 --- a/flash/core/data/batch.py +++ b/flash/core/data/batch.py @@ -229,7 +229,10 @@ def forward(self, samples: Sequence[Any]) -> Any: with self._collate_context: samples, metadata = self._extract_metadata(samples) - samples = self.collate_fn(samples) + try: + samples = self.collate_fn(samples, metadata) + except TypeError: + samples = self.collate_fn(samples) if metadata and isinstance(samples, dict): samples[DefaultDataKeys.METADATA] = metadata self.callback.on_collate(samples, self.stage) diff --git a/flash/core/data/process.py b/flash/core/data/process.py index 7020e32d36..a1d6e56085 100644 --- a/flash/core/data/process.py +++ b/flash/core/data/process.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import inspect import os from abc import ABC, abstractclassmethod, abstractmethod from typing import Any, Callable, Dict, List, Mapping, Optional, Sequence @@ -24,7 +25,7 @@ import flash from flash.core.data.batch import default_uncollate from flash.core.data.callback import FlashCallback -from flash.core.data.data_source import DatasetDataSource, DataSource, DefaultDataSources +from flash.core.data.data_source import DatasetDataSource, DataSource, DefaultDataKeys, DefaultDataSources from flash.core.data.properties import Properties from flash.core.data.states import CollateFn from flash.core.data.utils import _PREPROCESS_FUNCS, _STAGES_PREFIX, convert_to_modules, CurrentRunningStageFuncContext @@ -360,18 +361,24 @@ def per_batch_transform(self, batch: Any) -> Any: """ return self.current_transform(batch) - def collate(self, samples: Sequence) -> Any: + def collate(self, samples: Sequence, metadata=None) -> Any: """ Transform to convert a sequence of samples to a collated batch. """ + current_transform = self.current_transform + if current_transform is self._identity: + current_transform = self._default_collate # the model can provide a custom ``collate_fn``. collate_fn = self.get_state(CollateFn) if collate_fn is not None: - return collate_fn.collate_fn(samples) - - current_transform = self.current_transform - if current_transform is self._identity: - return self._default_collate(samples) - return self.current_transform(samples) + collate_fn = collate_fn.collate_fn + else: + collate_fn = current_transform + # return collate_fn.collate_fn(samples) + + parameters = inspect.signature(collate_fn).parameters + if len(parameters) > 1 and DefaultDataKeys.METADATA in parameters: + return collate_fn(samples, metadata) + return collate_fn(samples) def per_sample_transform_on_device(self, sample: Any) -> Any: """Transforms to apply to the data before the collation (per-sample basis). diff --git a/flash_examples/serve/speech_recognition/client.py b/flash_examples/serve/speech_recognition/client.py index a7ade326ce..c855a37204 100644 --- a/flash_examples/serve/speech_recognition/client.py +++ b/flash_examples/serve/speech_recognition/client.py @@ -11,15 +11,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import base64 from pathlib import Path import requests import flash -audio_path = str(Path(flash.ASSETS_ROOT) / "example.wav") +with (Path(flash.ASSETS_ROOT) / "example.wav").open("rb") as f: + audio_str = base64.b64encode(f.read()).decode("UTF-8") -body = {"session": "UUID", "payload": {"inputs": {"data": audio_path}}} +body = {"session": "UUID", "payload": {"inputs": {"data": audio_str}}} resp = requests.post("http://127.0.0.1:8000/predict", json=body) print(resp.json()) diff --git a/flash_examples/serve/speech_recognition/inference_server.py b/flash_examples/serve/speech_recognition/inference_server.py index 7a4762e23a..bbc4479624 100644 --- a/flash_examples/serve/speech_recognition/inference_server.py +++ b/flash_examples/serve/speech_recognition/inference_server.py @@ -14,5 +14,4 @@ from flash.audio import SpeechRecognition model = SpeechRecognition.load_from_checkpoint("https://flash-weights.s3.amazonaws.com/speech_recognition_model.pt") - model.serve() diff --git a/flash_examples/speech_recognition.py b/flash_examples/speech_recognition.py index 0bebbf3e4e..269148c60f 100644 --- a/flash_examples/speech_recognition.py +++ b/flash_examples/speech_recognition.py @@ -21,20 +21,19 @@ datamodule = SpeechRecognitionData.from_json( input_fields="file", target_fields="text", - train_file="data/train.json", - test_file="data/test.json", - num_workers=4, + train_file="data/timit/train.json", + test_file="data/timit/test.json", ) # 2. Build the task -model = SpeechRecognition(learning_rate=1e-5) +model = SpeechRecognition(backbone="facebook/wav2vec2-base-960h") # 3. Create the trainer and finetune the model -trainer = flash.Trainer(max_epochs=1, gpus=1) +trainer = flash.Trainer(max_epochs=1, limit_train_batches=1, limit_test_batches=1) trainer.finetune(model, datamodule=datamodule, strategy='no_freeze') # 4. Predict on audio files! -predictions = model.predict(["data/example.wav"]) +predictions = model.predict(["data/timit/example.wav"]) print(predictions) # 5. Save the model! From a208e171ff4d1b2de673517beb88e55e12f247a0 Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Mon, 19 Jul 2021 17:59:25 +0100 Subject: [PATCH 38/46] Update CHANGELOG.md --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index cb7c1cb3b8..1fa497852c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added `AudioClassificationData` and an example for classifying audio spectrograms ([#594](https://github.com/PyTorchLightning/lightning-flash/pull/594)) +- Added a `SpeechRecognition` task for speech to text using Wav2Vec ([#586](https://github.com/PyTorchLightning/lightning-flash/pull/586)) + ### Changed - Changed how pretrained flag works for loading weights for ImageClassifier task ([#560](https://github.com/PyTorchLightning/lightning-flash/pull/560)) From d9d1a0aa1637e57c9c04e402c438931ae2e5d6a2 Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Mon, 19 Jul 2021 18:01:49 +0100 Subject: [PATCH 39/46] Updates --- docs/source/api/audio.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/source/api/audio.rst b/docs/source/api/audio.rst index a72839e5ee..858c4c8235 100644 --- a/docs/source/api/audio.rst +++ b/docs/source/api/audio.rst @@ -31,11 +31,12 @@ __________________ ~speech_recognition.model.SpeechRecognition ~speech_recognition.data.SpeechRecognitionData - speech_recognition.data.SpeechRecognitionPostprocess speech_recognition.data.SpeechRecognitionPreprocess + speech_recognition.data.SpeechRecognitionBackboneState + speech_recognition.data.SpeechRecognitionPostprocess speech_recognition.data.SpeechRecognitionCSVDataSource speech_recognition.data.SpeechRecognitionJSONDataSource speech_recognition.data.SpeechRecognitionDataSource - speech_recognition.data.SpeechRecognitionFilesSource - speech_recognition.data.TimitDataSource + speech_recognition.data.SpeechRecognitionPathsDataSource + speech_recognition.data.SpeechRecognitionDatasetDataSource speech_recognition.data.SpeechRecognitionDeserializer From 9259f44ed6430bb45533bfee4d146cb0aa01b725 Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Mon, 19 Jul 2021 18:06:13 +0100 Subject: [PATCH 40/46] Update docs --- docs/source/api/audio.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/source/api/audio.rst b/docs/source/api/audio.rst index 858c4c8235..706a364372 100644 --- a/docs/source/api/audio.rst +++ b/docs/source/api/audio.rst @@ -36,7 +36,8 @@ __________________ speech_recognition.data.SpeechRecognitionPostprocess speech_recognition.data.SpeechRecognitionCSVDataSource speech_recognition.data.SpeechRecognitionJSONDataSource - speech_recognition.data.SpeechRecognitionDataSource + speech_recognition.data.BaseSpeechRecognition + speech_recognition.data.SpeechRecognitionFileDataSource speech_recognition.data.SpeechRecognitionPathsDataSource speech_recognition.data.SpeechRecognitionDatasetDataSource speech_recognition.data.SpeechRecognitionDeserializer From 70607a27d2ed8f167ccf30ac2cff5eaba01adac1 Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Mon, 19 Jul 2021 18:06:49 +0100 Subject: [PATCH 41/46] Update docs --- docs/source/reference/speech_recognition.rst | 5 ----- 1 file changed, 5 deletions(-) diff --git a/docs/source/reference/speech_recognition.rst b/docs/source/reference/speech_recognition.rst index 4672c6a1af..4c225bef16 100644 --- a/docs/source/reference/speech_recognition.rst +++ b/docs/source/reference/speech_recognition.rst @@ -31,11 +31,6 @@ Here's the structure our CSV file: Once we've downloaded the data using :func:`~flash.core.data.download_data`, we create the :class:`~flash.audio.speech_recognition.data.SpeechRecognitionData`. We select a pre-trained Wav2Vec backbone to use for our :class:`~flash.audio.speech_recognition.model.SpeechRecognition` and finetune on a subset of the `TIMIT corpus `__. The backbone can be any Wav2Vec model from `HuggingFace transformers `__. - -.. note:: - - When changing the backbone, make sure you pass in the same backbone to the :class:`~flash.audio.speech_recognition.model.SpeechRecognition` and the :class:`~flash.audio.speech_recognition.data.SpeechRecognitionData`! - Next, we use the trained :class:`~flash.audio.speech_recognition.model.SpeechRecognition` for inference and save the model. Here's the full example: From 4e6bce7746b25f589f2f796b8f6a2fb98fc02b0e Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Mon, 19 Jul 2021 18:09:48 +0100 Subject: [PATCH 42/46] Update docs --- docs/source/reference/speech_recognition.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/reference/speech_recognition.rst b/docs/source/reference/speech_recognition.rst index 4c225bef16..63816cba49 100644 --- a/docs/source/reference/speech_recognition.rst +++ b/docs/source/reference/speech_recognition.rst @@ -48,12 +48,12 @@ The :class:`~flash.audio.speech_recognition.model.SpeechRecognition` is servable This means you can call ``.serve`` to serve your :class:`~flash.core.model.Task`. Here's an example: -.. literalinclude:: ../../../flash_examples/serve/text_classification/inference_server.py +.. literalinclude:: ../../../flash_examples/serve/speech_recognition/inference_server.py :language: python :lines: 14- You can now perform inference from your client like this: -.. literalinclude:: ../../../flash_examples/serve/text_classification/client.py +.. literalinclude:: ../../../flash_examples/serve/speech_recognition/client.py :language: python :lines: 14- From 2d08f2106ba9f9e1ca3e41c569197778e9516e38 Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Mon, 19 Jul 2021 18:10:51 +0100 Subject: [PATCH 43/46] Add example to CI --- tests/examples/test_scripts.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/examples/test_scripts.py b/tests/examples/test_scripts.py index 56b729e36e..bc3260b1a8 100644 --- a/tests/examples/test_scripts.py +++ b/tests/examples/test_scripts.py @@ -42,6 +42,10 @@ "audio_classification.py", marks=pytest.mark.skipif(not _AUDIO_TESTING, reason="audio libraries aren't installed") ), + pytest.param( + "speech_recognition.py", + marks=pytest.mark.skipif(not _AUDIO_TESTING, reason="audio libraries aren't installed") + ), pytest.param( "image_classification.py", marks=pytest.mark.skipif(not _IMAGE_TESTING, reason="image libraries aren't installed") From 0052f1f3316cf2e73222c203d70812af7dd87a67 Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Mon, 19 Jul 2021 18:24:48 +0100 Subject: [PATCH 44/46] Fix some tests --- tests/audio/speech_recognition/test_data.py | 40 +++++++-------------- 1 file changed, 13 insertions(+), 27 deletions(-) diff --git a/tests/audio/speech_recognition/test_data.py b/tests/audio/speech_recognition/test_data.py index 6817b5205e..2b87129210 100644 --- a/tests/audio/speech_recognition/test_data.py +++ b/tests/audio/speech_recognition/test_data.py @@ -19,10 +19,9 @@ import flash from flash.audio import SpeechRecognitionData +from flash.core.data.data_source import DefaultDataKeys from tests.helpers.utils import _AUDIO_TESTING -TEST_BACKBONE = "patrickvonplaten/wav2vec2_tiny_random_robust" # super small model for testing - path = str(Path(flash.ASSETS_ROOT) / "example.wav") sample = {'file': path, 'text': 'example input.'} @@ -52,12 +51,10 @@ def json_data(tmpdir, n_samples=5): @pytest.mark.skipif(not _AUDIO_TESTING, reason="speech libraries aren't installed.") def test_from_csv(tmpdir): csv_path = csv_data(tmpdir) - dm = SpeechRecognitionData.from_csv( - "file", "text", backbone=TEST_BACKBONE, train_file=csv_path, batch_size=1, num_workers=0 - ) + dm = SpeechRecognitionData.from_csv("file", "text", train_file=csv_path, batch_size=1, num_workers=0) batch = next(iter(dm.train_dataloader())) - assert "labels" in batch - assert "input_values" in batch + assert DefaultDataKeys.INPUT in batch + assert DefaultDataKeys.TARGET in batch @pytest.mark.skipif(os.name == "nt", reason="Huggingface timing out on Windows") @@ -65,39 +62,28 @@ def test_from_csv(tmpdir): def test_stage_test_and_valid(tmpdir): csv_path = csv_data(tmpdir) dm = SpeechRecognitionData.from_csv( - "file", - "text", - backbone=TEST_BACKBONE, - train_file=csv_path, - val_file=csv_path, - test_file=csv_path, - batch_size=1, - num_workers=0 + "file", "text", train_file=csv_path, val_file=csv_path, test_file=csv_path, batch_size=1, num_workers=0 ) batch = next(iter(dm.val_dataloader())) - assert "labels" in batch - assert "input_values" in batch + assert DefaultDataKeys.INPUT in batch + assert DefaultDataKeys.TARGET in batch batch = next(iter(dm.test_dataloader())) - assert "labels" in batch - assert "input_values" in batch + assert DefaultDataKeys.INPUT in batch + assert DefaultDataKeys.TARGET in batch @pytest.mark.skipif(os.name == "nt", reason="Huggingface timing out on Windows") @pytest.mark.skipif(not _AUDIO_TESTING, reason="speech libraries aren't installed.") def test_from_json(tmpdir): json_path = json_data(tmpdir) - dm = SpeechRecognitionData.from_json( - "file", "text", backbone=TEST_BACKBONE, train_file=json_path, batch_size=1, num_workers=0 - ) + dm = SpeechRecognitionData.from_json("file", "text", train_file=json_path, batch_size=1, num_workers=0) batch = next(iter(dm.train_dataloader())) - assert "labels" in batch - assert "input_values" in batch + assert DefaultDataKeys.INPUT in batch + assert DefaultDataKeys.TARGET in batch @pytest.mark.skipif(_AUDIO_TESTING, reason="audio libraries are installed.") def test_audio_module_not_found_error(): with pytest.raises(ModuleNotFoundError, match="[audio]"): - SpeechRecognitionData.from_json( - "file", "text", backbone=TEST_BACKBONE, train_file="", batch_size=1, num_workers=0 - ) + SpeechRecognitionData.from_json("file", "text", train_file="", batch_size=1, num_workers=0) From 0c87f04483a10a27d6dbe5c875f4ab99ad78c989 Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Mon, 19 Jul 2021 18:43:38 +0100 Subject: [PATCH 45/46] Fix some broken tests --- tests/audio/speech_recognition/test_model.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tests/audio/speech_recognition/test_model.py b/tests/audio/speech_recognition/test_model.py index a435c497cf..69cf6a7aa3 100644 --- a/tests/audio/speech_recognition/test_model.py +++ b/tests/audio/speech_recognition/test_model.py @@ -15,12 +15,14 @@ import re from unittest import mock +import numpy as np import pytest import torch from flash import Trainer from flash.audio import SpeechRecognition from flash.audio.speech_recognition.data import SpeechRecognitionPostprocess, SpeechRecognitionPreprocess +from flash.core.data.data_source import DefaultDataKeys from tests.helpers.utils import _AUDIO_TESTING, _SERVE_TESTING # ======== Mock functions ======== @@ -30,8 +32,11 @@ class DummyDataset(torch.utils.data.Dataset): def __getitem__(self, index): return { - "input_values": torch.randn(size=torch.Size([86631])).float(), - "labels": torch.randn(size=(1, 77)).long(), + DefaultDataKeys.INPUT: np.random.randn(86631), + DefaultDataKeys.TARGET: "some target text", + DefaultDataKeys.METADATA: { + "sampling_rate": 16000 + }, } def __len__(self) -> int: @@ -77,8 +82,8 @@ def test_jit(tmpdir): def test_serve(): model = SpeechRecognition(backbone=TEST_BACKBONE) # TODO: Currently only servable once a preprocess and postprocess have been attached - model._preprocess = SpeechRecognitionPreprocess(backbone=TEST_BACKBONE) - model._postprocess = SpeechRecognitionPostprocess(backbone=TEST_BACKBONE) + model._preprocess = SpeechRecognitionPreprocess() + model._postprocess = SpeechRecognitionPostprocess() model.eval() model.serve() From bfe8ea6402745d28e97b3a0010a9ac0a6f91e569 Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Mon, 19 Jul 2021 18:51:57 +0100 Subject: [PATCH 46/46] Fixes --- tests/audio/speech_recognition/test_data_model_integration.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/audio/speech_recognition/test_data_model_integration.py b/tests/audio/speech_recognition/test_data_model_integration.py index 8fb611ba0d..0c9773022d 100644 --- a/tests/audio/speech_recognition/test_data_model_integration.py +++ b/tests/audio/speech_recognition/test_data_model_integration.py @@ -57,7 +57,6 @@ def test_classification_csv(tmpdir): data = SpeechRecognitionData.from_csv( "file", "text", - backbone=TEST_BACKBONE, train_file=csv_path, num_workers=0, batch_size=2, @@ -75,7 +74,6 @@ def test_classification_json(tmpdir): data = SpeechRecognitionData.from_json( "file", "text", - backbone=TEST_BACKBONE, train_file=json_path, num_workers=0, batch_size=2,